<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How limit input rate reading delta table as stream? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-limit-input-rate-reading-delta-table-as-stream/m-p/9015#M4514</link>
    <description>&lt;P&gt;Thanks, you are right! Data was very skewed&lt;/P&gt;</description>
    <pubDate>Mon, 27 Feb 2023 16:52:53 GMT</pubDate>
    <dc:creator>Lulka</dc:creator>
    <dc:date>2023-02-27T16:52:53Z</dc:date>
    <item>
      <title>How limit input rate reading delta table as stream?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-limit-input-rate-reading-delta-table-as-stream/m-p/9011#M4510</link>
      <description>&lt;P&gt;Hello to everyone!&lt;/P&gt;&lt;P&gt;I am trying to read delta table as a streaming source using spark. But my microbatches are disbalanced - one very small and the other are very huge. How I can limit this? &lt;/P&gt;&lt;P&gt;I used different configurations with maxBytesPerTrigger and maxFilesPerTrigger, but nothing changes, batch size is always the same. &lt;/P&gt;&lt;P&gt;Are there any ideas?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df = spark \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;.readStream \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;.format("delta") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;.load("...")&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.writeStream \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.outputMode("append") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.option("checkpointLocation", "...") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;.table("...")&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Kind Regards&lt;/P&gt;</description>
      <pubDate>Tue, 21 Feb 2023 07:55:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-limit-input-rate-reading-delta-table-as-stream/m-p/9011#M4510</guid>
      <dc:creator>Lulka</dc:creator>
      <dc:date>2023-02-21T07:55:17Z</dc:date>
    </item>
    <item>
      <title>Re: How limit input rate reading delta table as stream?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-limit-input-rate-reading-delta-table-as-stream/m-p/9012#M4511</link>
      <description>&lt;P&gt;besides the parameters you mention, I don't know of any other which controls the batch size.&lt;/P&gt;&lt;P&gt;did you check if the delta table is not horribly skewed?  &lt;/P&gt;</description>
      <pubDate>Tue, 21 Feb 2023 12:12:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-limit-input-rate-reading-delta-table-as-stream/m-p/9012#M4511</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-02-21T12:12:45Z</dc:date>
    </item>
    <item>
      <title>Re: How limit input rate reading delta table as stream?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-limit-input-rate-reading-delta-table-as-stream/m-p/9015#M4514</link>
      <description>&lt;P&gt;Thanks, you are right! Data was very skewed&lt;/P&gt;</description>
      <pubDate>Mon, 27 Feb 2023 16:52:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-limit-input-rate-reading-delta-table-as-stream/m-p/9015#M4514</guid>
      <dc:creator>Lulka</dc:creator>
      <dc:date>2023-02-27T16:52:53Z</dc:date>
    </item>
  </channel>
</rss>

