<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic While trying to save the spark dataframe to delta table is taking too long in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/while-trying-to-save-the-spark-dataframe-to-delta-table-is/m-p/4028#M870</link>
    <description>&lt;P&gt;While working on video analytics task I need to save the image bytes to the delta table earlier extracted into the spark dataframe. While I want to over write a same delta table over the period of complete task and also the size of input data differs. It is taking too much time even after doing several trials with compactions. I cant use the streaming delta tables as I simply want to store extracted image bytes to the delta table and simply complete the inference task for object detection and other transformations. I have even tried to drop the lengthy data columns but did not make any difference. 1 Driver&lt;/P&gt;&lt;P&gt;16&amp;nbsp;GB Memory,&amp;nbsp;4&amp;nbsp;Cores 11.3.x-gpu-ml-scala2.12, g4dn.xlarge is the configuration of my current cluster.&lt;/P&gt;&lt;P&gt;11.3.x-gpu-ml-scala2.12&lt;/P&gt;</description>
    <pubDate>Wed, 24 May 2023 12:08:10 GMT</pubDate>
    <dc:creator>Neil</dc:creator>
    <dc:date>2023-05-24T12:08:10Z</dc:date>
    <item>
      <title>While trying to save the spark dataframe to delta table is taking too long</title>
      <link>https://community.databricks.com/t5/data-engineering/while-trying-to-save-the-spark-dataframe-to-delta-table-is/m-p/4028#M870</link>
      <description>&lt;P&gt;While working on video analytics task I need to save the image bytes to the delta table earlier extracted into the spark dataframe. While I want to over write a same delta table over the period of complete task and also the size of input data differs. It is taking too much time even after doing several trials with compactions. I cant use the streaming delta tables as I simply want to store extracted image bytes to the delta table and simply complete the inference task for object detection and other transformations. I have even tried to drop the lengthy data columns but did not make any difference. 1 Driver&lt;/P&gt;&lt;P&gt;16&amp;nbsp;GB Memory,&amp;nbsp;4&amp;nbsp;Cores 11.3.x-gpu-ml-scala2.12, g4dn.xlarge is the configuration of my current cluster.&lt;/P&gt;&lt;P&gt;11.3.x-gpu-ml-scala2.12&lt;/P&gt;</description>
      <pubDate>Wed, 24 May 2023 12:08:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/while-trying-to-save-the-spark-dataframe-to-delta-table-is/m-p/4028#M870</guid>
      <dc:creator>Neil</dc:creator>
      <dc:date>2023-05-24T12:08:10Z</dc:date>
    </item>
    <item>
      <title>Re: While trying to save the spark dataframe to delta table is taking too long</title>
      <link>https://community.databricks.com/t5/data-engineering/while-trying-to-save-the-spark-dataframe-to-delta-table-is/m-p/4029#M871</link>
      <description>&lt;P&gt;can you check the spark UI, to see where the time is spent?&lt;/P&gt;&lt;P&gt;It can be a join, udf, ...&lt;/P&gt;</description>
      <pubDate>Thu, 25 May 2023 07:52:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/while-trying-to-save-the-spark-dataframe-to-delta-table-is/m-p/4029#M871</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-05-25T07:52:58Z</dc:date>
    </item>
  </channel>
</rss>

