<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Where can we use Broadcast variable? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/where-can-we-use-broadcast-variable/m-p/14318#M8833</link>
    <description>&lt;P&gt;best situations where we can use broadcast variables ?&lt;/P&gt;</description>
    <pubDate>Wed, 28 Dec 2022 05:53:50 GMT</pubDate>
    <dc:creator>Prototype998</dc:creator>
    <dc:date>2022-12-28T05:53:50Z</dc:date>
    <item>
      <title>Where can we use Broadcast variable?</title>
      <link>https://community.databricks.com/t5/data-engineering/where-can-we-use-broadcast-variable/m-p/14318#M8833</link>
      <description>&lt;P&gt;best situations where we can use broadcast variables ?&lt;/P&gt;</description>
      <pubDate>Wed, 28 Dec 2022 05:53:50 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/where-can-we-use-broadcast-variable/m-p/14318#M8833</guid>
      <dc:creator>Prototype998</dc:creator>
      <dc:date>2022-12-28T05:53:50Z</dc:date>
    </item>
    <item>
      <title>Re: Where can we use Broadcast variable?</title>
      <link>https://community.databricks.com/t5/data-engineering/where-can-we-use-broadcast-variable/m-p/14319#M8834</link>
      <description>&lt;P&gt;hey @Punit Chauhan​&amp;nbsp;&lt;/P&gt;&lt;P&gt;BV are used in the same way for RDD, DataFrame, and Dataset.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;When you run a Spark RDD, DataFrame jobs that has the Broadcast variables defined and used, Spark does the following.&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Spark breaks the job into stages that have distributed shuffling and actions are executed with in the stage.&lt;/LI&gt;&lt;LI&gt;Later Stages are also broken into tasks&lt;/LI&gt;&lt;LI&gt;Spark BV the common data (reusable) needed by tasks within each stage.&lt;/LI&gt;&lt;LI&gt;The BV data is cache in serialized format and deserialized before executing each task.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 28 Dec 2022 07:50:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/where-can-we-use-broadcast-variable/m-p/14319#M8834</guid>
      <dc:creator>Rishabh-Pandey</dc:creator>
      <dc:date>2022-12-28T07:50:56Z</dc:date>
    </item>
  </channel>
</rss>

