<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Data-quality help: Save Data Profile dbutils.data.summarize(df) to table in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/data-quality-help-save-data-profile-dbutils-data-summarize-df-to/m-p/15597#M9905</link>
    <description>&lt;P&gt;Hi there,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We would like to create a data quality database that helps us understand how complete our data is. We would like to run a job each day that basically outputs the same table data as dbutils.data.summarize(df) for a given table and save it to databricks.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Any ideas on how we could do that?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Avkash&lt;/P&gt;</description>
    <pubDate>Tue, 20 Dec 2022 20:05:31 GMT</pubDate>
    <dc:creator>Kash</dc:creator>
    <dc:date>2022-12-20T20:05:31Z</dc:date>
    <item>
      <title>Data-quality help: Save Data Profile dbutils.data.summarize(df) to table</title>
      <link>https://community.databricks.com/t5/data-engineering/data-quality-help-save-data-profile-dbutils-data-summarize-df-to/m-p/15597#M9905</link>
      <description>&lt;P&gt;Hi there,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We would like to create a data quality database that helps us understand how complete our data is. We would like to run a job each day that basically outputs the same table data as dbutils.data.summarize(df) for a given table and save it to databricks.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Any ideas on how we could do that?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Avkash&lt;/P&gt;</description>
      <pubDate>Tue, 20 Dec 2022 20:05:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/data-quality-help-save-data-profile-dbutils-data-summarize-df-to/m-p/15597#M9905</guid>
      <dc:creator>Kash</dc:creator>
      <dc:date>2022-12-20T20:05:31Z</dc:date>
    </item>
    <item>
      <title>Re: Data-quality help: Save Data Profile dbutils.data.summarize(df) to table</title>
      <link>https://community.databricks.com/t5/data-engineering/data-quality-help-save-data-profile-dbutils-data-summarize-df-to/m-p/15598#M9906</link>
      <description>&lt;P&gt;From what I know there's no easy way to save dbutils.data.summarize() into a df.&lt;/P&gt;&lt;P&gt;You can still create your custom python/pyspark code to profile your data and save the output. &lt;/P&gt;</description>
      <pubDate>Thu, 22 Dec 2022 13:17:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/data-quality-help-save-data-profile-dbutils-data-summarize-df-to/m-p/15598#M9906</guid>
      <dc:creator>daniel_sahal</dc:creator>
      <dc:date>2022-12-22T13:17:20Z</dc:date>
    </item>
  </channel>
</rss>

