<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic showing only a limited number of lines from the CSV file in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80289#M36103</link>
    <description>&lt;P&gt;Expected no of lines is - 16400&lt;/P&gt;
&lt;P&gt;Showing only 20 No of records&lt;/P&gt;
&lt;P&gt;Script&lt;/P&gt;
&lt;DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;spark.conf.&lt;/SPAN&gt;&lt;SPAN&gt;set&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;"&lt;STRIKE&gt;REDACTED&lt;/STRIKE&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;"&lt;STRIKE&gt;REDACTED&lt;/STRIKE&gt;"&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;BR /&gt;
&lt;DIV&gt;&lt;SPAN&gt;# File location&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;file_location = "&lt;STRIKE&gt;REDACTED&lt;/STRIKE&gt;"&lt;/DIV&gt;
&lt;BR /&gt;
&lt;DIV&gt;&lt;SPAN&gt;# Read in the data to dataframe df&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;df = spark.read.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"CSV"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"inferSchema"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"header"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"delimiter"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;","&lt;/SPAN&gt;&lt;SPAN&gt;).load(file_location).show()&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 24 Jul 2024 10:48:50 GMT</pubDate>
    <dc:creator>Yyyyy</dc:creator>
    <dc:date>2024-07-24T10:48:50Z</dc:date>
    <item>
      <title>showing only a limited number of lines from the CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80289#M36103</link>
      <description>&lt;P&gt;Expected no of lines is - 16400&lt;/P&gt;
&lt;P&gt;Showing only 20 No of records&lt;/P&gt;
&lt;P&gt;Script&lt;/P&gt;
&lt;DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;spark.conf.&lt;/SPAN&gt;&lt;SPAN&gt;set&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;"&lt;STRIKE&gt;REDACTED&lt;/STRIKE&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;"&lt;STRIKE&gt;REDACTED&lt;/STRIKE&gt;"&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;BR /&gt;
&lt;DIV&gt;&lt;SPAN&gt;# File location&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;file_location = "&lt;STRIKE&gt;REDACTED&lt;/STRIKE&gt;"&lt;/DIV&gt;
&lt;BR /&gt;
&lt;DIV&gt;&lt;SPAN&gt;# Read in the data to dataframe df&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;SPAN&gt;df = spark.read.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"CSV"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"inferSchema"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"header"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"delimiter"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;","&lt;/SPAN&gt;&lt;SPAN&gt;).load(file_location).show()&lt;/SPAN&gt;&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 24 Jul 2024 10:48:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80289#M36103</guid>
      <dc:creator>Yyyyy</dc:creator>
      <dc:date>2024-07-24T10:48:50Z</dc:date>
    </item>
    <item>
      <title>Re: showing only a limited number of lines from the CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80299#M36104</link>
      <description>&lt;P&gt;Hi, the show() method prints only the top 20 rows by default:&amp;nbsp;&lt;CODE class="sig-prename descclassname"&gt;DataFrame.&lt;/CODE&gt;&lt;CODE class="sig-name descname"&gt;show&lt;/CODE&gt;&lt;SPAN class="sig-paren"&gt;(&lt;/SPAN&gt;&lt;EM class="sig-param"&gt;&lt;SPAN class="n"&gt;n&lt;/SPAN&gt;&lt;SPAN class="p"&gt;:&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="n"&gt;int&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="o"&gt;=&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="default_value"&gt;20&lt;/SPAN&gt;&lt;/EM&gt;&lt;SPAN&gt;,&amp;nbsp;&lt;/SPAN&gt;&lt;EM class="sig-param"&gt;&lt;SPAN class="n"&gt;truncate&lt;/SPAN&gt;&lt;SPAN class="p"&gt;:&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="n"&gt;Union&lt;SPAN class="p"&gt;[&lt;/SPAN&gt;bool&lt;SPAN class="p"&gt;,&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/SPAN&gt;int&lt;SPAN class="p"&gt;]&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="o"&gt;=&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="default_value"&gt;True&lt;/SPAN&gt;&lt;/EM&gt;&lt;SPAN&gt;,&amp;nbsp;&lt;/SPAN&gt;&lt;EM class="sig-param"&gt;&lt;SPAN class="n"&gt;vertical&lt;/SPAN&gt;&lt;SPAN class="p"&gt;:&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="n"&gt;bool&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="o"&gt;=&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN class="default_value"&gt;False&lt;/SPAN&gt;&lt;/EM&gt;&lt;SPAN class="sig-paren"&gt;)&lt;/SPAN&gt; (cf&amp;nbsp;&lt;A href="https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html" target="_blank"&gt;https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.show.html&lt;/A&gt;)&lt;/P&gt;
&lt;P&gt;You can either use show() with a bigger n parameter, or use the Databricks display() command to print the dataframe in a tabular format:&lt;/P&gt;
&lt;LI-CODE lang="python"&gt;df = spark.read.format("CSV").option("inferSchema", "true").option("header", "true").option("delimiter", ",").load(file_location)

display(df)&lt;/LI-CODE&gt;
&lt;P&gt;&lt;A href="https://www.databricks.com/spark/getting-started-with-apache-spark/dataframes#view-the-dataframe" target="_blank"&gt;https://www.databricks.com/spark/getting-started-with-apache-spark/dataframes#view-the-dataframe&lt;/A&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 24 Jul 2024 09:45:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80299#M36104</guid>
      <dc:creator>romy</dc:creator>
      <dc:date>2024-07-24T09:45:03Z</dc:date>
    </item>
    <item>
      <title>Re: showing only a limited number of lines from the CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80307#M36105</link>
      <description>&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp;hi, pls look help me&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;spark.conf.&lt;/SPAN&gt;&lt;SPAN&gt;set&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;"REDACTED"&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp; &amp;nbsp; &lt;/SPAN&gt;&lt;SPAN&gt;"REDACTED"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;# File location&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;file_location = &lt;/SPAN&gt;&lt;SPAN&gt;"REDACTED"&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;# Read in the data to dataframe df&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;df = spark.read.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"CSV"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"inferSchema"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"header"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;).option(&lt;/SPAN&gt;&lt;SPAN&gt;"delimiter"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;","&lt;/SPAN&gt;&lt;SPAN&gt;).load(file_location)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;BR /&gt;&lt;DIV&gt;&lt;SPAN&gt;# Display the dataframe&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;display(df)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;error - Failed to upload command result to DBFS. Error message: Status code: 301 Moved Permanently, Error message: &amp;lt;?xml version="1.0" encoding="UTF-8"?&amp;gt; &amp;lt;Error&amp;gt;&amp;lt;Code&amp;gt;PermanentRedirect&amp;lt;/Code&amp;gt;&amp;lt;Message&amp;gt;The bucket you are attempting to access must be addressed using the specified endpoint. Please send all future requests to this endpoint.&amp;lt;/Message&amp;gt;&amp;lt;Endpoint&amp;gt;command-results.s3.amazonaws.com&amp;lt;/Endpoint&amp;gt;&amp;lt;Bucket&amp;gt;command-results&amp;lt;/Bucket&amp;gt;&amp;lt;RequestId&amp;gt;AMNJ84M2CZ0G4MFK&amp;lt;/RequestId&amp;gt;&amp;lt;HostId&amp;gt;rXnbI5MLQZdZmhOfF/SbvNDErLlAqj92hFAxcTi4cwGqo2Qe2E1VIDkMoyAOUpIkBLePYy4+up4=&amp;lt;/HostId&amp;gt;&amp;lt;/Error&amp;gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;once, i am tring to use display() i am getting above error&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 24 Jul 2024 11:14:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80307#M36105</guid>
      <dc:creator>Yyyyy</dc:creator>
      <dc:date>2024-07-24T11:14:04Z</dc:date>
    </item>
    <item>
      <title>Re: showing only a limited number of lines from the CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80318#M36106</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/113300"&gt;@Yyyyy&lt;/a&gt;&amp;nbsp;,&lt;BR /&gt;&lt;BR /&gt;You should edit your question and redacted key your'e setting in spark session.&lt;/P&gt;</description>
      <pubDate>Wed, 24 Jul 2024 10:56:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/showing-only-a-limited-number-of-lines-from-the-csv-file/m-p/80318#M36106</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2024-07-24T10:56:36Z</dc:date>
    </item>
  </channel>
</rss>

