<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to limit batch size from Confluent Kafka in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-to-limit-batch-size-from-confluent-kafka/m-p/20254#M13646</link>
    <description>&lt;P&gt;Looking at the SQL Job and watching 309 mil rows and 55 hr of run time while stream status is still initializing.  No data has been written to a table which is the end of the process as well.&lt;/P&gt;</description>
    <pubDate>Tue, 29 Nov 2022 22:25:49 GMT</pubDate>
    <dc:creator>AdamRink</dc:creator>
    <dc:date>2022-11-29T22:25:49Z</dc:date>
    <item>
      <title>How to limit batch size from Confluent Kafka</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-limit-batch-size-from-confluent-kafka/m-p/20252#M13644</link>
      <description>&lt;P&gt;I have a large stream of data read from Confluent Kafka, 500+ millions of row.  When I initialize the stream I cannot control the batch sizes that are read.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I've tried setting options on the readstream -  maxBytesPerTrigger, maxOffsetsPerTrigger, fetch.max.bytes, max.poll.records&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Configuring spark cluster options  maxRatePerPartition&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Starting with a fresh checkpoint&lt;/P&gt;</description>
      <pubDate>Mon, 28 Nov 2022 14:03:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-limit-batch-size-from-confluent-kafka/m-p/20252#M13644</guid>
      <dc:creator>AdamRink</dc:creator>
      <dc:date>2022-11-28T14:03:26Z</dc:date>
    </item>
    <item>
      <title>Re: How to limit batch size from Confluent Kafka</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-limit-batch-size-from-confluent-kafka/m-p/20253#M13645</link>
      <description>&lt;P&gt;Hi @Adam Rink​&amp;nbsp;&lt;/P&gt;&lt;P&gt;Just checking for further info on your question. How are you deducing that the batch sizes are more than what you are providing as maxOffsetsPerTrigger ?&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2022 18:46:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-limit-batch-size-from-confluent-kafka/m-p/20253#M13645</guid>
      <dc:creator>UmaMahesh1</dc:creator>
      <dc:date>2022-11-29T18:46:52Z</dc:date>
    </item>
    <item>
      <title>Re: How to limit batch size from Confluent Kafka</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-limit-batch-size-from-confluent-kafka/m-p/20254#M13646</link>
      <description>&lt;P&gt;Looking at the SQL Job and watching 309 mil rows and 55 hr of run time while stream status is still initializing.  No data has been written to a table which is the end of the process as well.&lt;/P&gt;</description>
      <pubDate>Tue, 29 Nov 2022 22:25:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-limit-batch-size-from-confluent-kafka/m-p/20254#M13646</guid>
      <dc:creator>AdamRink</dc:creator>
      <dc:date>2022-11-29T22:25:49Z</dc:date>
    </item>
  </channel>
</rss>

