<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Databricks write to Azure Data Explorer  writes suddenly become slower in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19187#M12838</link>
    <description>&lt;P&gt;It's Azure Data Lake Storage Gen2.&lt;/P&gt;</description>
    <pubDate>Mon, 06 Jun 2022 06:20:51 GMT</pubDate>
    <dc:creator>RengarLee</dc:creator>
    <dc:date>2022-06-06T06:20:51Z</dc:date>
    <item>
      <title>Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19182#M12833</link>
      <description>&lt;P&gt;Now, I write to Azure Data explorer using Spark streaming. one day， writes suddenly become slower. restart is no effect.&lt;/P&gt;&lt;P&gt;I have a questions about Spark Streaming to Azure Data explorer.&lt;/P&gt;&lt;P&gt;&lt;B&gt;Q1: What should I do to get performance to reply?&lt;/B&gt;&lt;/P&gt;&lt;P&gt;Figure 1 shows the performance of writing in the current table. &lt;/P&gt;&lt;P&gt;Figure 2 the performance of writing in the now table.&lt;/P&gt;&lt;P&gt;Figure 3 the performance of writing in the current table, but the checkpoint location is new.&lt;/P&gt;&lt;P&gt;Could it be that checkpoint location caused it?&lt;/P&gt;</description>
      <pubDate>Wed, 01 Jun 2022 07:37:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19182#M12833</guid>
      <dc:creator>RengarLee</dc:creator>
      <dc:date>2022-06-01T07:37:41Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19183#M12834</link>
      <description>&lt;P&gt;if the checkpoint location is in another region or has another 'level' (think premium vs standard storage) that could be the case.&lt;/P&gt;&lt;P&gt;Can you check that?&lt;/P&gt;</description>
      <pubDate>Wed, 01 Jun 2022 12:47:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19183#M12834</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-06-01T12:47:30Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19184#M12835</link>
      <description>&lt;P&gt;Thank you for your answer.&lt;/P&gt;&lt;P&gt;I check it.&lt;/P&gt;&lt;P&gt;data source（AdxDF） and checkpoint location is the same container, Only the path is different.&lt;/P&gt;&lt;P&gt;Azure Data Explorer and data source is the same region.&lt;/P&gt;&lt;P&gt;I have a new discovery.&lt;/P&gt;&lt;P&gt;​if I write to the new table. It's fast at first, and after a few hours of running, it suddenly slows down.&amp;nbsp;&lt;/P&gt;&lt;P&gt;​I'll add a screenshot&amp;nbsp;later&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;</description>
      <pubDate>Thu, 02 Jun 2022 07:35:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19184#M12835</guid>
      <dc:creator>RengarLee</dc:creator>
      <dc:date>2022-06-02T07:35:45Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19186#M12837</link>
      <description>&lt;P&gt;Is it Blob storage or ADLS Storage account where your data and checkpoint files are stored?&lt;/P&gt;</description>
      <pubDate>Mon, 06 Jun 2022 03:36:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19186#M12837</guid>
      <dc:creator>User16764241763</dc:creator>
      <dc:date>2022-06-06T03:36:16Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19187#M12838</link>
      <description>&lt;P&gt;It's Azure Data Lake Storage Gen2.&lt;/P&gt;</description>
      <pubDate>Mon, 06 Jun 2022 06:20:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19187#M12838</guid>
      <dc:creator>RengarLee</dc:creator>
      <dc:date>2022-06-06T06:20:51Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19188#M12839</link>
      <description>&lt;P&gt;could it be the stream query that gets slow?  Maybe checkpoint more often?&lt;/P&gt;</description>
      <pubDate>Tue, 07 Jun 2022 11:21:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19188#M12839</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-06-07T11:21:04Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19189#M12840</link>
      <description />
      <pubDate>Wed, 08 Jun 2022 01:42:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19189#M12840</guid>
      <dc:creator>RengarLee</dc:creator>
      <dc:date>2022-06-08T01:42:02Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19190#M12841</link>
      <description>&lt;P&gt;Do you have any more data to be process? check the driver logs for any errors messages? this suddenly drop might point to another issue happening here&lt;/P&gt;</description>
      <pubDate>Fri, 29 Jul 2022 00:35:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19190#M12841</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2022-07-29T00:35:42Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19191#M12842</link>
      <description>&lt;P&gt;I'm so sorry, I just thought the issue wasn't resolved&lt;/P&gt;&lt;P&gt;&lt;B&gt;Solution&lt;/B&gt;&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Set maxFilesPerTrigger and maxBytesPerTrigger &lt;/LI&gt;&lt;LI&gt;Enable autpoptimize&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&lt;B&gt;Reason&lt;/B&gt;&lt;/P&gt;&lt;P&gt;       for the first day, it processes larger files and then eventually process smaller files。&lt;/P&gt;&lt;P&gt;&lt;B&gt;Detailed reason&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;       Before performance drops：&lt;/B&gt;&lt;/P&gt;&lt;P&gt;      &lt;span class="lia-inline-image-display-wrapper" image-alt="Before performance drops"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1816iE2B18DE35E5A3B1E/image-size/large?v=v2&amp;amp;px=999" role="button" title="Before performance drops" alt="Before performance drops" /&gt;&lt;/span&gt;1000 =&amp;nbsp;(endOffset's index - startOffset's index) =&amp;nbsp;（&lt;B&gt;80999- 79999）&lt;/B&gt;&lt;/P&gt;&lt;P&gt;1305389&amp;nbsp;=&amp;nbsp;&lt;B&gt;numInputRows&amp;nbsp;&lt;/B&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;B&gt;avg records per files is 1305389/1000 = 1305.389&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;        After performance drops：&lt;span class="lia-inline-image-display-wrapper" image-alt="After"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1821i6BF02575F18E9E5B/image-size/large?v=v2&amp;amp;px=999" role="button" title="After" alt="After" /&gt;&lt;/span&gt;1000 =&amp;nbsp;(endOffset's index - startOffset's index) =&amp;nbsp;（90999- 89999）&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;45644=&amp;nbsp;numInputRows&amp;nbsp;&lt;/B&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;B&gt;avg records per files is 45644/1000 = 45&lt;/B&gt;&lt;/P&gt;&lt;P&gt;From the comparison of (1) and (2), it can be seen that the number of files read by each batch before and after the performance drop (23:30) remains unchanged at 1000, but after 23:30 the number of 1000 total files changes. Less, it is most likely that the file size has become smaller, resulting in a smaller file, so the total number of read items has decreased. That is, for the first day, it processes larger files and then eventually processes smaller files.&lt;/P&gt;&lt;P&gt;&lt;B&gt;Suggestion：&lt;/B&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.microsoft.com/en-gb/azure/databricks/delta/delta-streaming#limit-input-rate" alt="https://docs.microsoft.com/en-gb/azure/databricks/delta/delta-streaming#limit-input-rate" target="_blank"&gt;https://docs.microsoft.com/en-gb/azure/databricks/delta/delta-streaming#limit-input-rate&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.microsoft.com/en-us/azure/databricks/delta/optimizations/auto-optimize" alt="https://docs.microsoft.com/en-us/azure/databricks/delta/optimizations/auto-optimize" target="_blank"&gt;https://docs.microsoft.com/en-us/azure/databricks/delta/optimizations/auto-optimize&lt;/A&gt;&lt;/P&gt;&lt;P&gt;​&lt;/P&gt;&lt;P&gt;​Finally, a big thank you to the Databricks team and the Microsoft team for their technical support.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 02:18:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19191#M12842</guid>
      <dc:creator>RengarLee</dc:creator>
      <dc:date>2023-03-28T02:18:45Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19192#M12843</link>
      <description>&lt;P&gt;I'm very sorry to reply you now, this problem has been resolved, the specific reason is in another answer.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 02:22:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19192#M12843</guid>
      <dc:creator>RengarLee</dc:creator>
      <dc:date>2023-03-28T02:22:42Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks write to Azure Data Explorer  writes suddenly become slower</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19193#M12844</link>
      <description>&lt;P&gt;I'm very sorry to reply you now, this problem has been resolved, the specific reason is in another answer.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 02:22:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-write-to-azure-data-explorer-writes-suddenly-become/m-p/19193#M12844</guid>
      <dc:creator>RengarLee</dc:creator>
      <dc:date>2023-03-28T02:22:49Z</dc:date>
    </item>
  </channel>
</rss>

