<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: What the best Framework/Package for data quality in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/what-the-best-framework-package-for-data-quality/m-p/139397#M51189</link>
    <description>&lt;P&gt;Here are few DQ packages for DLT or LDP that you can try.&lt;/P&gt;&lt;P&gt;&lt;!--  StartFragment   --&gt;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;1. Databricks Labs DQX&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Purpose-built for Spark and Databricks.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Rule-based checks on &lt;/SPAN&gt;&lt;SPAN class=""&gt;DataFrames&lt;/SPAN&gt;&lt;SPAN class=""&gt; (batch &amp;amp; streaming).&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Supports quarantine and profiling.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Lightweight and easy to integrate.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;2. Great Expectations&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Popular Python library for data validation.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Works with Spark, Pandas, SQL.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Rich set of expectations and auto-generated documentation.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Best for governance and transparency.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;3. &lt;/SPAN&gt;&lt;SPAN class=""&gt;Cuallee&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Lightweight, fast, and &lt;/SPAN&gt;&lt;SPAN class=""&gt;DataFrame&lt;/SPAN&gt;&lt;SPAN class=""&gt;-agnostic.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Supports &lt;/SPAN&gt;&lt;SPAN class=""&gt;PySpark&lt;/SPAN&gt;&lt;SPAN class=""&gt;, Pandas, Polars, &lt;/SPAN&gt;&lt;SPAN class=""&gt;DuckDB&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;50+ built-in checks, minimal setup.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;4. Spark Expectations&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Designed for Spark environments.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Uses decorators for defining rules.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Provides error tables and stats for monitoring.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;5. Pandas-DQ&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;For quick profiling and cleaning in Pandas.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;HTML reports, duplicate/missing value checks.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Ideal for small datasets or pre-ingestion checks.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;!--  EndFragment   --&gt;&lt;/P&gt;</description>
    <pubDate>Mon, 17 Nov 2025 16:09:45 GMT</pubDate>
    <dc:creator>nayan_wylde</dc:creator>
    <dc:date>2025-11-17T16:09:45Z</dc:date>
    <item>
      <title>What the best Framework/Package for data quality</title>
      <link>https://community.databricks.com/t5/data-engineering/what-the-best-framework-package-for-data-quality/m-p/139385#M51183</link>
      <description>&lt;P&gt;Hi everyone,&lt;/P&gt;&lt;P&gt;I’m currently looking for a data-quality solution for my environment. I don’t have DTL tables or a Unity Catalog in place.&lt;/P&gt;&lt;P&gt;In your opinion, what is the best framework or package to implement reliable data-quality checks under these conditions?&lt;/P&gt;&lt;P&gt;Thanks in advance!&lt;/P&gt;</description>
      <pubDate>Mon, 17 Nov 2025 15:41:22 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-the-best-framework-package-for-data-quality/m-p/139385#M51183</guid>
      <dc:creator>William_Scardua</dc:creator>
      <dc:date>2025-11-17T15:41:22Z</dc:date>
    </item>
    <item>
      <title>Re: What the best Framework/Package for data quality</title>
      <link>https://community.databricks.com/t5/data-engineering/what-the-best-framework-package-for-data-quality/m-p/139397#M51189</link>
      <description>&lt;P&gt;Here are few DQ packages for DLT or LDP that you can try.&lt;/P&gt;&lt;P&gt;&lt;!--  StartFragment   --&gt;&lt;/P&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;1. Databricks Labs DQX&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Purpose-built for Spark and Databricks.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Rule-based checks on &lt;/SPAN&gt;&lt;SPAN class=""&gt;DataFrames&lt;/SPAN&gt;&lt;SPAN class=""&gt; (batch &amp;amp; streaming).&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Supports quarantine and profiling.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Lightweight and easy to integrate.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;2. Great Expectations&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Popular Python library for data validation.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Works with Spark, Pandas, SQL.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Rich set of expectations and auto-generated documentation.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Best for governance and transparency.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;3. &lt;/SPAN&gt;&lt;SPAN class=""&gt;Cuallee&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Lightweight, fast, and &lt;/SPAN&gt;&lt;SPAN class=""&gt;DataFrame&lt;/SPAN&gt;&lt;SPAN class=""&gt;-agnostic.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Supports &lt;/SPAN&gt;&lt;SPAN class=""&gt;PySpark&lt;/SPAN&gt;&lt;SPAN class=""&gt;, Pandas, Polars, &lt;/SPAN&gt;&lt;SPAN class=""&gt;DuckDB&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;50+ built-in checks, minimal setup.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;4. Spark Expectations&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Designed for Spark environments.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Uses decorators for defining rules.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Provides error tables and stats for monitoring.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;&lt;SPAN class=""&gt;5. Pandas-DQ&lt;/SPAN&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;For quick profiling and cleaning in Pandas.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;HTML reports, duplicate/missing value checks.&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;Ideal for small datasets or pre-ingestion checks.&lt;/SPAN&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;!--  EndFragment   --&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 17 Nov 2025 16:09:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/what-the-best-framework-package-for-data-quality/m-p/139397#M51189</guid>
      <dc:creator>nayan_wylde</dc:creator>
      <dc:date>2025-11-17T16:09:45Z</dc:date>
    </item>
  </channel>
</rss>

