<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Are we using the advantage of &amp;quot;Map &amp; Reduce&amp;quot; ? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/are-we-using-the-advantage-of-quot-map-reduce-quot/m-p/29184#M20930</link>
    <description>&lt;P&gt;Thank you.&lt;/P&gt;</description>
    <pubDate>Tue, 08 Feb 2022 12:53:08 GMT</pubDate>
    <dc:creator>wyzer</dc:creator>
    <dc:date>2022-02-08T12:53:08Z</dc:date>
    <item>
      <title>Are we using the advantage of "Map &amp; Reduce" ?</title>
      <link>https://community.databricks.com/t5/data-engineering/are-we-using-the-advantage-of-quot-map-reduce-quot/m-p/29181#M20927</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;We are new on Databricks and we would like to know if our working method are good.&lt;/P&gt;&lt;P&gt;Currently, we are working like this :&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;spark.sql("CREATE TABLE Temp (SELECT avg(***), sum(***) FROM aaa LEFT JOIN bbb WHERE *** &amp;gt;= ***)")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;With this method, are we using the full capacity of Databricks, like "Map &amp;amp; Reduce" ?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 07 Feb 2022 14:06:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/are-we-using-the-advantage-of-quot-map-reduce-quot/m-p/29181#M20927</guid>
      <dc:creator>wyzer</dc:creator>
      <dc:date>2022-02-07T14:06:57Z</dc:date>
    </item>
    <item>
      <title>Re: Are we using the advantage of "Map &amp; Reduce" ?</title>
      <link>https://community.databricks.com/t5/data-engineering/are-we-using-the-advantage-of-quot-map-reduce-quot/m-p/29183#M20929</link>
      <description>&lt;P&gt;Spark will handle the map/reduce for you.&lt;/P&gt;&lt;P&gt;So as long as you use Spark provided functions, be it in scala, python or sql (or even R) you will be using distributed processing.&lt;/P&gt;&lt;P&gt;You just care about what you want as a result.&lt;/P&gt;&lt;P&gt;And afterwards when you are more familiar with Spark you can start tuning (f.e. trying to avoid shuffles, other join types etc)&lt;/P&gt;</description>
      <pubDate>Tue, 08 Feb 2022 10:23:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/are-we-using-the-advantage-of-quot-map-reduce-quot/m-p/29183#M20929</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-02-08T10:23:30Z</dc:date>
    </item>
    <item>
      <title>Re: Are we using the advantage of "Map &amp; Reduce" ?</title>
      <link>https://community.databricks.com/t5/data-engineering/are-we-using-the-advantage-of-quot-map-reduce-quot/m-p/29184#M20930</link>
      <description>&lt;P&gt;Thank you.&lt;/P&gt;</description>
      <pubDate>Tue, 08 Feb 2022 12:53:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/are-we-using-the-advantage-of-quot-map-reduce-quot/m-p/29184#M20930</guid>
      <dc:creator>wyzer</dc:creator>
      <dc:date>2022-02-08T12:53:08Z</dc:date>
    </item>
  </channel>
</rss>

