<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to take distinct of multiple columns ( &amp;gt; than 2 columns) in pyspark datafarme ? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-to-take-distinct-of-multiple-columns-gt-than-2-columns-in/m-p/28220#M20043</link>
    <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;I have 10+ columns and want to take distinct rows by multiple columns into consideration. How to achieve this using pyspark dataframe functions ? &lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 05 Mar 2019 07:58:17 GMT</pubDate>
    <dc:creator>srchella</dc:creator>
    <dc:date>2019-03-05T07:58:17Z</dc:date>
    <item>
      <title>How to take distinct of multiple columns ( &gt; than 2 columns) in pyspark datafarme ?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-take-distinct-of-multiple-columns-gt-than-2-columns-in/m-p/28220#M20043</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;I have 10+ columns and want to take distinct rows by multiple columns into consideration. How to achieve this using pyspark dataframe functions ? &lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 05 Mar 2019 07:58:17 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-take-distinct-of-multiple-columns-gt-than-2-columns-in/m-p/28220#M20043</guid>
      <dc:creator>srchella</dc:creator>
      <dc:date>2019-03-05T07:58:17Z</dc:date>
    </item>
    <item>
      <title>Re: How to take distinct of multiple columns ( &gt; than 2 columns) in pyspark datafarme ?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-take-distinct-of-multiple-columns-gt-than-2-columns-in/m-p/28221#M20044</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;You can use dropDuplicates&lt;/P&gt;
&lt;P&gt; &lt;A href="https://spark.apache.org/docs/latest/api/python/pyspark.sql.html?highlight=distinct#pyspark.sql.DataFrame.dropDuplicates" target="test_blank"&gt;https://spark.apache.org/docs/latest/api/python/pyspark.sql.html?highlight=distinct#pyspark.sql.DataFrame.dropDuplicates&lt;/A&gt;&lt;/P&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 28 Mar 2019 15:06:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-take-distinct-of-multiple-columns-gt-than-2-columns-in/m-p/28221#M20044</guid>
      <dc:creator>Sandeep</dc:creator>
      <dc:date>2019-03-28T15:06:05Z</dc:date>
    </item>
  </channel>
</rss>

