<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: read the csv file as shown in description in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24190#M16804</link>
    <description>&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql import functions as F
df = spark.read.option("sep", "|").option("header", "true").csv("/tmp/file.csv")
display(df.groupBy("projectNo").agg(F.expr("collect_list(EmployeeNo)").alias("employees")))&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 31 Mar 2022 20:06:53 GMT</pubDate>
    <dc:creator>garren_staubli</dc:creator>
    <dc:date>2022-03-31T20:06:53Z</dc:date>
    <item>
      <title>read the csv file as shown in description</title>
      <link>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24189#M16803</link>
      <description>&lt;P&gt;Project_Details.csv&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;ProjectNo|ProjectName|EmployeeNo&lt;/P&gt;&lt;P&gt;100|analytics|1&lt;/P&gt;&lt;P&gt;100|analytics|2&lt;/P&gt;&lt;P&gt;101|machine learning|3&lt;/P&gt;&lt;P&gt;101|machine learning|1&lt;/P&gt;&lt;P&gt;101|machine learning|4&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Find each employee in the form of list working on each project?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Output:&lt;/P&gt;&lt;P&gt;ProjectNo|employeeNo&lt;/P&gt;&lt;P&gt;100|[1,2]&lt;/P&gt;&lt;P&gt;101|[3,1,4]&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 30 Mar 2022 18:54:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24189#M16803</guid>
      <dc:creator>sannycse</dc:creator>
      <dc:date>2022-03-30T18:54:53Z</dc:date>
    </item>
    <item>
      <title>Re: read the csv file as shown in description</title>
      <link>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24190#M16804</link>
      <description>&lt;PRE&gt;&lt;CODE&gt;from pyspark.sql import functions as F
df = spark.read.option("sep", "|").option("header", "true").csv("/tmp/file.csv")
display(df.groupBy("projectNo").agg(F.expr("collect_list(EmployeeNo)").alias("employees")))&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 31 Mar 2022 20:06:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24190#M16804</guid>
      <dc:creator>garren_staubli</dc:creator>
      <dc:date>2022-03-31T20:06:53Z</dc:date>
    </item>
    <item>
      <title>Re: read the csv file as shown in description</title>
      <link>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24192#M16806</link>
      <description>&lt;P&gt;I tried but that was created in pyspark and i'm unable to crack that code into spark Sql&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 02 Apr 2022 16:53:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24192#M16806</guid>
      <dc:creator>sannycse</dc:creator>
      <dc:date>2022-04-02T16:53:40Z</dc:date>
    </item>
    <item>
      <title>Re: read the csv file as shown in description</title>
      <link>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24193#M16807</link>
      <description>&lt;P&gt;@SANJEEV BANDRU​&amp;nbsp;, You can persist the data frame in temp view by adding following in the python:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df.createOrReplaceTempView("employees_csv")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;then you can select:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;select projectNo, collect_list(EmployeeNo)
from employees_csv
group by projectNo&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 02 Apr 2022 17:11:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24193#M16807</guid>
      <dc:creator>merca</dc:creator>
      <dc:date>2022-04-02T17:11:26Z</dc:date>
    </item>
    <item>
      <title>Re: read the csv file as shown in description</title>
      <link>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24194#M16808</link>
      <description>&lt;P&gt;@SANJEEV BANDRU​&amp;nbsp; You can simply do this&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Just change the file path &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;CREATE TEMPORARY VIEW readcsv USING CSV OPTIONS (&lt;/P&gt;&lt;P&gt;&amp;nbsp;path "dbfs:/docs/test.csv",&lt;/P&gt;&lt;P&gt;&amp;nbsp;header "true",&lt;/P&gt;&lt;P&gt;&amp;nbsp;delimiter "|",&lt;/P&gt;&lt;P&gt;&amp;nbsp;mode "FAILFAST"&lt;/P&gt;&lt;P&gt;);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;select&lt;/P&gt;&lt;P&gt;&amp;nbsp;ProjectNo,&lt;/P&gt;&lt;P&gt;&amp;nbsp;collect_list(EmployeeNo) Employees&lt;/P&gt;&lt;P&gt;from&lt;/P&gt;&lt;P&gt;&amp;nbsp;readcsv&lt;/P&gt;&lt;P&gt;group by&lt;/P&gt;&lt;P&gt;&amp;nbsp;projectNo&lt;/P&gt;</description>
      <pubDate>Wed, 13 Apr 2022 15:56:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/read-the-csv-file-as-shown-in-description/m-p/24194#M16808</guid>
      <dc:creator>User16764241763</dc:creator>
      <dc:date>2022-04-13T15:56:47Z</dc:date>
    </item>
  </channel>
</rss>

