<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Best way of loading several csv files in a table in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31531#M22969</link>
    <description>&lt;P&gt;What would be the best way of loading several files like in a single table to be consumed?&lt;/P&gt;&lt;P&gt;&lt;A href="https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-07.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-08.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-09.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-07.csv" target="test_blank"&gt;https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-07.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-08.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-09.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-07.csv&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Mon, 17 Jan 2022 20:24:41 GMT</pubDate>
    <dc:creator>CleverAnjos</dc:creator>
    <dc:date>2022-01-17T20:24:41Z</dc:date>
    <item>
      <title>Best way of loading several csv files in a table</title>
      <link>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31531#M22969</link>
      <description>&lt;P&gt;What would be the best way of loading several files like in a single table to be consumed?&lt;/P&gt;&lt;P&gt;&lt;A href="https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-07.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-08.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-09.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-07.csv" target="test_blank"&gt;https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-07.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-08.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-09.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-07.csv&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 17 Jan 2022 20:24:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31531#M22969</guid>
      <dc:creator>CleverAnjos</dc:creator>
      <dc:date>2022-01-17T20:24:41Z</dc:date>
    </item>
    <item>
      <title>Re: Best way of loading several csv files in a table</title>
      <link>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31533#M22971</link>
      <description>&lt;P&gt;New Your Taxi data from your example is already included in your workspace as it is demo dataset.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;It is enough to read "yellow" folder and it will read all csvs from there.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If you want to save it as a single file you can do .repartition(1).write.csv(destination_folder).save()&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/2167iB2A09AAF2739A058/image-size/large?v=v2&amp;amp;px=999" role="button" title="image.png" alt="image.png" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 18 Jan 2022 20:49:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31533#M22971</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-01-18T20:49:35Z</dc:date>
    </item>
    <item>
      <title>Re: Best way of loading several csv files in a table</title>
      <link>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31534#M22972</link>
      <description>&lt;P&gt;Great! &lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 18 Jan 2022 20:56:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31534#M22972</guid>
      <dc:creator>CleverAnjos</dc:creator>
      <dc:date>2022-01-18T20:56:41Z</dc:date>
    </item>
    <item>
      <title>Re: Best way of loading several csv files in a table</title>
      <link>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31535#M22973</link>
      <description>&lt;P&gt;Unfortunately it seems that nytaxi is outdated. there is no records from 2021 and 2020 and 2019 is barely uncomplete&lt;/P&gt;&lt;P&gt;+-----------+------------------+&lt;/P&gt;&lt;P&gt;|       2010|         169001154|&lt;/P&gt;&lt;P&gt;|       2011|         176897208|&lt;/P&gt;&lt;P&gt;|       2015|         146112990|&lt;/P&gt;&lt;P&gt;|       2014|         165114361|&lt;/P&gt;&lt;P&gt;|       2013|         173179759|&lt;/P&gt;&lt;P&gt;|       2012|         178544324|&lt;/P&gt;&lt;P&gt;|       2009|         170896987|&lt;/P&gt;&lt;P&gt;|       2016|         131165043|&lt;/P&gt;&lt;P&gt;|       2017|         113496933|&lt;/P&gt;&lt;P&gt;|       2018|         102803387|&lt;/P&gt;&lt;P&gt;|       2041|                 3|&lt;/P&gt;&lt;P&gt;|       2008|               585|&lt;/P&gt;&lt;P&gt;|       2001|                15|&lt;/P&gt;&lt;P&gt;|       2029|                 6|&lt;/P&gt;&lt;P&gt;|       2002|                33|&lt;/P&gt;&lt;P&gt;|       2053|                 2|&lt;/P&gt;&lt;P&gt;|       2003|                23|&lt;/P&gt;&lt;P&gt;|       2020|               438|&lt;/P&gt;&lt;P&gt;|       2019|          84397753|&lt;/P&gt;&lt;P&gt;|       2037|                 1|&lt;/P&gt;&lt;P&gt;+-----------+------------------+&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jan 2022 15:45:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31535#M22973</guid>
      <dc:creator>CleverAnjos</dc:creator>
      <dc:date>2022-01-19T15:45:36Z</dc:date>
    </item>
    <item>
      <title>Re: Best way of loading several csv files in a table</title>
      <link>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31537#M22975</link>
      <description>&lt;P&gt;Thanks Kaniz, I already have the files. I was discussing about the best way to load them&lt;/P&gt;</description>
      <pubDate>Mon, 31 Jan 2022 12:04:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31537#M22975</guid>
      <dc:creator>CleverAnjos</dc:creator>
      <dc:date>2022-01-31T12:04:37Z</dc:date>
    </item>
    <item>
      <title>Re: Best way of loading several csv files in a table</title>
      <link>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31539#M22977</link>
      <description>&lt;P&gt;Yes, &lt;/P&gt;&lt;P&gt;1) Downloaded the files using sh from here  &lt;A href="https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-07.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-08.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-09.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-07.csv" alt="https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2019-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-07.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-08.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-09.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-10.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-11.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2020-12.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-01.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-02.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-03.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-04.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-05.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-06.csvhttps://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2021-07.csv" target="_blank"&gt;https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_&amp;lt;year&amp;gt;-&amp;lt;month&amp;gt;.csv&lt;/A&gt; to /mnt&lt;/P&gt;&lt;P&gt;2) Loaded a dataframe with the csv files&lt;/P&gt;&lt;P&gt;3) Stored as a partitioned table&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I don´t know if this the best approach, but its working&lt;/P&gt;</description>
      <pubDate>Mon, 31 Jan 2022 13:36:34 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/best-way-of-loading-several-csv-files-in-a-table/m-p/31539#M22977</guid>
      <dc:creator>CleverAnjos</dc:creator>
      <dc:date>2022-01-31T13:36:34Z</dc:date>
    </item>
  </channel>
</rss>

