<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Write in Single CSV file in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29553#M21276</link>
    <description>&lt;P&gt;Thank you for your time and support, is there any other effective method to combine part CSV files into a single CSV file in databricks?&lt;/P&gt;</description>
    <pubDate>Thu, 06 Oct 2022 05:50:44 GMT</pubDate>
    <dc:creator>Mohit_Kumar_Sut</dc:creator>
    <dc:date>2022-10-06T05:50:44Z</dc:date>
    <item>
      <title>Write in Single CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29551#M21274</link>
      <description>&lt;P&gt;We are reading 520GB partitions files from CSV and when we write in a Single CSV using repartition(1) it is taking 25+ hours. please let us know an optimized way to create a single CSV file so that our process could complete within 5 hours. &lt;/P&gt;</description>
      <pubDate>Mon, 03 Oct 2022 16:54:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29551#M21274</guid>
      <dc:creator>Mohit_Kumar_Sut</dc:creator>
      <dc:date>2022-10-03T16:54:59Z</dc:date>
    </item>
    <item>
      <title>Re: Write in Single CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29552#M21275</link>
      <description>&lt;P&gt;If you repartition(1), only one core of your whole cluster works. Please use repartition to the number of cores (SparkContext.DefaultParallelism).&lt;/P&gt;&lt;P&gt;After writing, you will get one file per core, so please use other software to merge files if you want to have only one (ADF has some excellent options for that in copy).&lt;/P&gt;</description>
      <pubDate>Mon, 03 Oct 2022 20:01:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29552#M21275</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-10-03T20:01:59Z</dc:date>
    </item>
    <item>
      <title>Re: Write in Single CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29553#M21276</link>
      <description>&lt;P&gt;Thank you for your time and support, is there any other effective method to combine part CSV files into a single CSV file in databricks?&lt;/P&gt;</description>
      <pubDate>Thu, 06 Oct 2022 05:50:44 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29553#M21276</guid>
      <dc:creator>Mohit_Kumar_Sut</dc:creator>
      <dc:date>2022-10-06T05:50:44Z</dc:date>
    </item>
    <item>
      <title>Re: Write in Single CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29554#M21277</link>
      <description>&lt;P&gt;The method in databricks is one that you are using and is slow (repartition(1)).&lt;/P&gt;</description>
      <pubDate>Fri, 14 Oct 2022 11:32:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29554#M21277</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-10-14T11:32:53Z</dc:date>
    </item>
    <item>
      <title>Re: Write in Single CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29555#M21278</link>
      <description>&lt;P&gt;You can use coalesce(1) for example:&lt;/P&gt;&lt;P&gt;df.coalesce(1).write.option("header","true").csv("path_to_save_your_CSV")&lt;/P&gt;</description>
      <pubDate>Fri, 28 Oct 2022 22:57:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29555#M21278</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2022-10-28T22:57:58Z</dc:date>
    </item>
    <item>
      <title>Re: Write in Single CSV file</title>
      <link>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29556#M21279</link>
      <description>&lt;P&gt;Hi @mohit kumar suthar​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope all is well! Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or &lt;B&gt;mark an answer as best&lt;/B&gt;? Else please let us know if you need more help.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We'd love to hear from you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 12 Nov 2022 06:51:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/write-in-single-csv-file/m-p/29556#M21279</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2022-11-12T06:51:02Z</dc:date>
    </item>
  </channel>
</rss>

