<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Spark structured streaming in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/spark-structured-streaming/m-p/73749#M7491</link>
    <description>&lt;P&gt;&lt;EM&gt;hi,&lt;BR /&gt;&lt;/EM&gt;could someone please help me with this code :-&lt;/P&gt;&lt;P&gt;input parameter df is a spark structured streaming dataframe&lt;BR /&gt;&amp;nbsp;def apply_duplicacy_check(df, duplicate_check_columns):&lt;BR /&gt;&amp;nbsp; &amp;nbsp; if len(duplicate_check_columns) == 0:&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;return None, df&lt;BR /&gt;&lt;BR /&gt;&amp;nbsp; &amp;nbsp; valid_df = df.dropDuplicates(duplicate_check_columns)&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; error_df = df.exceptAll(valid_df)&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; return error_df,valid_df&lt;/P&gt;&lt;P&gt;I am getting this error :-&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Except on a streaming DataFrame/Dataset on the right is not supported;&lt;BR /&gt;Except All true&lt;BR /&gt;:- Project [page#54781.Name AS division_name#54786, page#54781.ShortName AS short_name#54787, page#54781.ExternalSystemCode AS external_system_code#54788, page#54781.AccountingCode AS division_number#54789, page#54781.ParentDivisionId AS parent_division_id#54790, page#54781.TimeZone AS timezone#54791, page#54781.DivisionType.Id AS division_type_id#54792, page#54781.DivisionType.Name AS division_type_name#54793, sourceExtractDatetime#54773 AS source_extract_datetime#54794, page#54781.Id AS division_id#54795]&lt;BR /&gt;: +- Project [Data#54772, sourceExtractDatetime#54773, page#54781]&lt;BR /&gt;: +- Generate explode(Data#54772.Page), true, [page#54781]&lt;/P&gt;</description>
    <pubDate>Thu, 13 Jun 2024 08:07:12 GMT</pubDate>
    <dc:creator>nileshtiwaari</dc:creator>
    <dc:date>2024-06-13T08:07:12Z</dc:date>
    <item>
      <title>Spark structured streaming</title>
      <link>https://community.databricks.com/t5/get-started-discussions/spark-structured-streaming/m-p/73749#M7491</link>
      <description>&lt;P&gt;&lt;EM&gt;hi,&lt;BR /&gt;&lt;/EM&gt;could someone please help me with this code :-&lt;/P&gt;&lt;P&gt;input parameter df is a spark structured streaming dataframe&lt;BR /&gt;&amp;nbsp;def apply_duplicacy_check(df, duplicate_check_columns):&lt;BR /&gt;&amp;nbsp; &amp;nbsp; if len(duplicate_check_columns) == 0:&lt;BR /&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;return None, df&lt;BR /&gt;&lt;BR /&gt;&amp;nbsp; &amp;nbsp; valid_df = df.dropDuplicates(duplicate_check_columns)&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; error_df = df.exceptAll(valid_df)&lt;/P&gt;&lt;P&gt;&amp;nbsp; &amp;nbsp; return error_df,valid_df&lt;/P&gt;&lt;P&gt;I am getting this error :-&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Except on a streaming DataFrame/Dataset on the right is not supported;&lt;BR /&gt;Except All true&lt;BR /&gt;:- Project [page#54781.Name AS division_name#54786, page#54781.ShortName AS short_name#54787, page#54781.ExternalSystemCode AS external_system_code#54788, page#54781.AccountingCode AS division_number#54789, page#54781.ParentDivisionId AS parent_division_id#54790, page#54781.TimeZone AS timezone#54791, page#54781.DivisionType.Id AS division_type_id#54792, page#54781.DivisionType.Name AS division_type_name#54793, sourceExtractDatetime#54773 AS source_extract_datetime#54794, page#54781.Id AS division_id#54795]&lt;BR /&gt;: +- Project [Data#54772, sourceExtractDatetime#54773, page#54781]&lt;BR /&gt;: +- Generate explode(Data#54772.Page), true, [page#54781]&lt;/P&gt;</description>
      <pubDate>Thu, 13 Jun 2024 08:07:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/spark-structured-streaming/m-p/73749#M7491</guid>
      <dc:creator>nileshtiwaari</dc:creator>
      <dc:date>2024-06-13T08:07:12Z</dc:date>
    </item>
  </channel>
</rss>

