<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic What is the maximum limit of data that can be broadcasted using broadcast join in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/what-is-the-maximum-limit-of-data-that-can-be-broadcasted-using/m-p/18600#M12373</link>
    <description />
    <pubDate>Fri, 25 Jun 2021 21:51:19 GMT</pubDate>
    <dc:creator>brickster_2018</dc:creator>
    <dc:date>2021-06-25T21:51:19Z</dc:date>
    <item>
      <title>What is the maximum limit of data that can be broadcasted using broadcast join</title>
      <link>https://community.databricks.com/t5/data-engineering/what-is-the-maximum-limit-of-data-that-can-be-broadcasted-using/m-p/18600#M12373</link>
      <description />
      <pubDate>Fri, 25 Jun 2021 21:51:19 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-is-the-maximum-limit-of-data-that-can-be-broadcasted-using/m-p/18600#M12373</guid>
      <dc:creator>brickster_2018</dc:creator>
      <dc:date>2021-06-25T21:51:19Z</dc:date>
    </item>
    <item>
      <title>Re: What is the maximum limit of data that can be broadcasted using broadcast join</title>
      <link>https://community.databricks.com/t5/data-engineering/what-is-the-maximum-limit-of-data-that-can-be-broadcasted-using/m-p/18601#M12374</link>
      <description>&lt;P&gt;By default, only 10 MB of data can be broadcasted. &lt;/P&gt;&lt;P&gt;spark.sql.autoBroadcastJoinThreshold can be increased up to 8GB&lt;/P&gt;&lt;P&gt;There is an upper limit in terms of records as well. We can't broadcast more than 512m records.  So its either 512m records or 8GB which ever limit hits first&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 25 Jun 2021 21:53:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-is-the-maximum-limit-of-data-that-can-be-broadcasted-using/m-p/18601#M12374</guid>
      <dc:creator>brickster_2018</dc:creator>
      <dc:date>2021-06-25T21:53:47Z</dc:date>
    </item>
    <item>
      <title>Re: What is the maximum limit of data that can be broadcasted using broadcast join</title>
      <link>https://community.databricks.com/t5/data-engineering/what-is-the-maximum-limit-of-data-that-can-be-broadcasted-using/m-p/99022#M39890</link>
      <description>&lt;P&gt;Is the limit per "table/dataframe" or for all tables/dataframes put together?&lt;/P&gt;&lt;P&gt;The driver collects the data from all executors (which are having the respective table or dataframe) and distributes to all executors. When will the memory be released in both driver and executor? Or does it hold on to this memory through out the pipeline/application?&lt;/P&gt;</description>
      <pubDate>Sat, 16 Nov 2024 14:01:19 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-is-the-maximum-limit-of-data-that-can-be-broadcasted-using/m-p/99022#M39890</guid>
      <dc:creator>lchari</dc:creator>
      <dc:date>2024-11-16T14:01:19Z</dc:date>
    </item>
  </channel>
</rss>

