<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Optimizing Writes from Databricks to Snowflake in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7986#M3719</link>
    <description>&lt;P&gt;seems slow to me.&lt;/P&gt;&lt;P&gt;Are you sure you do not do any spark processing?&lt;/P&gt;&lt;P&gt;because if so: a chunck of that 2 minutes is spark transforming the data.&lt;/P&gt;</description>
    <pubDate>Fri, 10 Mar 2023 12:16:35 GMT</pubDate>
    <dc:creator>-werners-</dc:creator>
    <dc:date>2023-03-10T12:16:35Z</dc:date>
    <item>
      <title>Optimizing Writes from Databricks to Snowflake</title>
      <link>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7983#M3716</link>
      <description>&lt;P&gt;My job after doing all the processing in Databricks layer writes the final output to Snowflake tables using df.write API and using Spark snowflake connector. I often see that even a small dataset (16 partitions and 20k rows in each partition) takes around 2 minutes to write. Is there any way, the write can be optimized? &lt;/P&gt;</description>
      <pubDate>Fri, 10 Mar 2023 08:26:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7983#M3716</guid>
      <dc:creator>pvignesh92</dc:creator>
      <dc:date>2023-03-10T08:26:51Z</dc:date>
    </item>
    <item>
      <title>Re: Optimizing Writes from Databricks to Snowflake</title>
      <link>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7984#M3717</link>
      <description>&lt;P&gt;afaik the spark connector is already optimized. Can you try to change the partitioning of your dataset? for bulk loading larger files are better.&lt;/P&gt;</description>
      <pubDate>Fri, 10 Mar 2023 10:52:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7984#M3717</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-03-10T10:52:31Z</dc:date>
    </item>
    <item>
      <title>Re: Optimizing Writes from Databricks to Snowflake</title>
      <link>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7985#M3718</link>
      <description>&lt;P&gt;Yes. Brought that down to 4 partitions while doing my transformations and then tried as well. On an average, it takes 2 minutes for the write. I'm not sure if that's the expected behavior with jdbc connection. &lt;/P&gt;</description>
      <pubDate>Fri, 10 Mar 2023 12:12:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7985#M3718</guid>
      <dc:creator>pvignesh92</dc:creator>
      <dc:date>2023-03-10T12:12:53Z</dc:date>
    </item>
    <item>
      <title>Re: Optimizing Writes from Databricks to Snowflake</title>
      <link>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7986#M3719</link>
      <description>&lt;P&gt;seems slow to me.&lt;/P&gt;&lt;P&gt;Are you sure you do not do any spark processing?&lt;/P&gt;&lt;P&gt;because if so: a chunck of that 2 minutes is spark transforming the data.&lt;/P&gt;</description>
      <pubDate>Fri, 10 Mar 2023 12:16:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7986#M3719</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-03-10T12:16:35Z</dc:date>
    </item>
    <item>
      <title>Re: Optimizing Writes from Databricks to Snowflake</title>
      <link>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7987#M3720</link>
      <description>&lt;P&gt;Hi @Vigneshraja Palaniraj​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope all is well! &lt;/P&gt;&lt;P&gt;Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or mark an answer as best? Else please let us know if you need more help.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We'd love to hear from you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 03 Apr 2023 11:21:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7987#M3720</guid>
      <dc:creator>Vartika</dc:creator>
      <dc:date>2023-04-03T11:21:32Z</dc:date>
    </item>
    <item>
      <title>Re: Optimizing Writes from Databricks to Snowflake</title>
      <link>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7988#M3721</link>
      <description>&lt;P&gt;There are few options I tried out which had given me a better performance.&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Caching the intermediate or final results so that while writing the dataframe computation does not repeat again. &lt;/LI&gt;&lt;LI&gt;Coalesce the results into the partitions 1x or 0.5x your number of cores and also ensure that your partitions are equal to or more than 128 MB blocks so that the writes are happening efficiently.&lt;/LI&gt;&lt;/OL&gt;</description>
      <pubDate>Mon, 03 Apr 2023 11:32:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7988#M3721</guid>
      <dc:creator>pvignesh92</dc:creator>
      <dc:date>2023-04-03T11:32:26Z</dc:date>
    </item>
    <item>
      <title>Re: Optimizing Writes from Databricks to Snowflake</title>
      <link>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7989#M3722</link>
      <description>&lt;P&gt;Thanks @Vartika Nain​&amp;nbsp;for following up. I closed this thread. &lt;/P&gt;</description>
      <pubDate>Mon, 03 Apr 2023 11:32:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/optimizing-writes-from-databricks-to-snowflake/m-p/7989#M3722</guid>
      <dc:creator>pvignesh92</dc:creator>
      <dc:date>2023-04-03T11:32:55Z</dc:date>
    </item>
  </channel>
</rss>

