<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic data=[[&amp;#39;x&amp;#39;, 20220118, &amp;#39;FALSE&amp;#39;, 3],[&amp;#39;x&amp;#39;, 20220118, &amp;#39;TRUE&amp;#39;, 97],[&amp;#39;x&amp;#39;, 20220119, &amp;#39;FALSE&amp;#39;, 1],[&amp;#39;x&amp;#39;... in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/data-39-x-39-20220118-39-false-39-3-39-x-39-20220118-39-true-39/m-p/31266#M22748</link>
    <description>&lt;P&gt;data=[['x', 20220118, 'FALSE', 3],['x', 20220118, 'TRUE', 97],['x', 20220119, 'FALSE', 1],['x', 20220119, 'TRUE', 49],['Y', 20220118, 'FALSE', 100],['Y', 20220118, 'TRUE', 900],['Y', 20220119, 'FALSE', 200],['Y', 20220119, 'TRUE', 800]]&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df=spark.createDataFrame(data, ['source', 'date', 'formattedMessage', 'messageCount'])&lt;/P&gt;&lt;P&gt;df.show()&lt;/P&gt;&lt;P&gt;df.createOrReplaceTempView("source_data")&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data=[['x', 20220118, 'FALSE', 3],['x', 20220118, 'TRUE', 97],['x', 20220119, 'FALSE', 1],['x', 20220119, 'TRUE', 49],['Y', 20220118, 'FALSE', 100],['Y', 20220118, 'TRUE', 900],['Y', 20220119, 'FALSE', 200],['Y', 20220119, 'TRUE', 800]]&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df=spark.createDataFrame(data, ['source', 'date', 'formattedMessage', 'messageCount'])&lt;/P&gt;&lt;P&gt;df.show()&lt;/P&gt;&lt;P&gt;df.createOrReplaceTempView("source_data")&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;spark.sql("with count_data as (select source, date, sum(messageCount) as total, count(date) from source_data group By source, date), main_data as (select * from source_data where formattedMessage='FALSE')select cd.source, cd.date, (messageCount/total)*100||'0%' as badMessages from count_data cd inner join main_data md on cd.source=md.source and cd.date=md.date").show()&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;**Note: 0% I used default value, round function can be used for decimal places. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 21 Jan 2022 10:40:32 GMT</pubDate>
    <dc:creator>Azure_Data_Eng1</dc:creator>
    <dc:date>2022-01-21T10:40:32Z</dc:date>
    <item>
      <title>data=[['x', 20220118, 'FALSE', 3],['x', 20220118, 'TRUE', 97],['x', 20220119, 'FALSE', 1],['x'...</title>
      <link>https://community.databricks.com/t5/data-engineering/data-39-x-39-20220118-39-false-39-3-39-x-39-20220118-39-true-39/m-p/31266#M22748</link>
      <description>&lt;P&gt;data=[['x', 20220118, 'FALSE', 3],['x', 20220118, 'TRUE', 97],['x', 20220119, 'FALSE', 1],['x', 20220119, 'TRUE', 49],['Y', 20220118, 'FALSE', 100],['Y', 20220118, 'TRUE', 900],['Y', 20220119, 'FALSE', 200],['Y', 20220119, 'TRUE', 800]]&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df=spark.createDataFrame(data, ['source', 'date', 'formattedMessage', 'messageCount'])&lt;/P&gt;&lt;P&gt;df.show()&lt;/P&gt;&lt;P&gt;df.createOrReplaceTempView("source_data")&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;data=[['x', 20220118, 'FALSE', 3],['x', 20220118, 'TRUE', 97],['x', 20220119, 'FALSE', 1],['x', 20220119, 'TRUE', 49],['Y', 20220118, 'FALSE', 100],['Y', 20220118, 'TRUE', 900],['Y', 20220119, 'FALSE', 200],['Y', 20220119, 'TRUE', 800]]&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df=spark.createDataFrame(data, ['source', 'date', 'formattedMessage', 'messageCount'])&lt;/P&gt;&lt;P&gt;df.show()&lt;/P&gt;&lt;P&gt;df.createOrReplaceTempView("source_data")&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;spark.sql("with count_data as (select source, date, sum(messageCount) as total, count(date) from source_data group By source, date), main_data as (select * from source_data where formattedMessage='FALSE')select cd.source, cd.date, (messageCount/total)*100||'0%' as badMessages from count_data cd inner join main_data md on cd.source=md.source and cd.date=md.date").show()&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;**Note: 0% I used default value, round function can be used for decimal places. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jan 2022 10:40:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/data-39-x-39-20220118-39-false-39-3-39-x-39-20220118-39-true-39/m-p/31266#M22748</guid>
      <dc:creator>Azure_Data_Eng1</dc:creator>
      <dc:date>2022-01-21T10:40:32Z</dc:date>
    </item>
  </channel>
</rss>

