<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: DLT Pipeline &amp;amp; Automatic Liquid Clustering Syntax in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121303#M46414</link>
    <description>&lt;P&gt;You can now use Automatic Liquid Clustering with Python:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;# Enabling Automatic Liquid Clustering on a new table
@dlt.table(cluster_by_auto=True)
def tbl_with_auto():
   return spark.range(5)

# Manually choosing a clustering key initially, followed by automatic clustering
@dlt.table(cluster_by_auto=True, cluster_by=["id"])
def tbl_with_auto_and_initial_hint():
   return spark.range(5)&lt;/LI-CODE&gt;</description>
    <pubDate>Tue, 10 Jun 2025 08:51:29 GMT</pubDate>
    <dc:creator>lucami</dc:creator>
    <dc:date>2025-06-10T08:51:29Z</dc:date>
    <item>
      <title>DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115388#M45077</link>
      <description>&lt;P&gt;Hi everyone,&lt;/P&gt;&lt;P&gt;I noticed Databricks recently released the automatic liquid clustering feature, which looks very promising. I'm currently implementing a DLT pipeline and would like to leverage this new functionality.&lt;/P&gt;&lt;P&gt;However, I'm having trouble figuring out the correct syntax to integrate automatic liquid clustering within my DLT pipeline. I've tried the following code, but it doesn't seem to be working as expected.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;dlt.create_streaming_table(
        "table_a",
        schema="""   id STRING NOT NULL,
                    description STRING NOT NULL,
                    is_current BOOLEAN NOT NULL,
        """,
        cluster_by=["auto"],
        comment="table a with automatic liquid clustering",
    )&lt;/LI-CODE&gt;&lt;P&gt;Could someone please provide an example of the correct syntax for using automatic liquid clustering within a Databricks DLT pipeline? Any guidance or best practices would be greatly appreciated!&lt;/P&gt;&lt;P&gt;Thanks in advance!&lt;/P&gt;</description>
      <pubDate>Mon, 14 Apr 2025 07:26:26 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115388#M45077</guid>
      <dc:creator>HoussemBL</dc:creator>
      <dc:date>2025-04-14T07:26:26Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115421#M45080</link>
      <description>&lt;P&gt;Hi!&lt;/P&gt;&lt;P&gt;I think it's worth trying the same syntax, as is shown here:&amp;nbsp;&lt;A href="https://docs.databricks.com/aws/en/delta/clustering?language=Python" target="_blank"&gt;https://docs.databricks.com/aws/en/delta/clustering?language=Python&lt;/A&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 14 Apr 2025 14:53:41 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115421#M45080</guid>
      <dc:creator>notwarte</dc:creator>
      <dc:date>2025-04-14T14:53:41Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115426#M45081</link>
      <description>&lt;P&gt;Also: &lt;A href="https://community.databricks.com/t5/community-platform-discussions/cluster-by-auto-pyspark/m-p/115310#M9863" target="_blank"&gt;https://community.databricks.com/t5/community-platform-discussions/cluster-by-auto-pyspark/m-p/115310#M9863&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 14 Apr 2025 15:55:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115426#M45081</guid>
      <dc:creator>notwarte</dc:creator>
      <dc:date>2025-04-14T15:55:53Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115484#M45092</link>
      <description>&lt;P&gt;Thanks a lot for your reply&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/156994"&gt;@notwarte&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;I cannot really use the links that you suggest as I am implementing a DLT pipeline. The syntax of&amp;nbsp;&lt;SPAN&gt;DLT Python is different especially when it comes to creating tables.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 15 Apr 2025 09:13:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115484#M45092</guid>
      <dc:creator>HoussemBL</dc:creator>
      <dc:date>2025-04-15T09:13:30Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115672#M45154</link>
      <description>&lt;P&gt;Hey&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/115968"&gt;@HoussemBL&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You're correct about DLT not support Auto LC. You can assign any columns in the cluster_by but if you set it to auto, it will throw an error complaining about auto not being present in the list of columns.&lt;/P&gt;&lt;P&gt;Maybe, altering thee table to set/reset the LC is the only option left as of now.&lt;/P&gt;&lt;P&gt;Let me know your thoughts.&lt;/P&gt;&lt;P&gt;Cheers!&lt;/P&gt;</description>
      <pubDate>Wed, 16 Apr 2025 16:51:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/115672#M45154</guid>
      <dc:creator>RiyazAliM</dc:creator>
      <dc:date>2025-04-16T16:51:11Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/118430#M45639</link>
      <description>&lt;P&gt;It works with SQL syntax (using CLUSTER BY AUTO), but not with pyspark.&lt;/P&gt;</description>
      <pubDate>Thu, 08 May 2025 09:30:19 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/118430#M45639</guid>
      <dc:creator>lucami</dc:creator>
      <dc:date>2025-05-08T09:30:19Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121303#M46414</link>
      <description>&lt;P&gt;You can now use Automatic Liquid Clustering with Python:&lt;/P&gt;&lt;LI-CODE lang="python"&gt;# Enabling Automatic Liquid Clustering on a new table
@dlt.table(cluster_by_auto=True)
def tbl_with_auto():
   return spark.range(5)

# Manually choosing a clustering key initially, followed by automatic clustering
@dlt.table(cluster_by_auto=True, cluster_by=["id"])
def tbl_with_auto_and_initial_hint():
   return spark.range(5)&lt;/LI-CODE&gt;</description>
      <pubDate>Tue, 10 Jun 2025 08:51:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121303#M46414</guid>
      <dc:creator>lucami</dc:creator>
      <dc:date>2025-06-10T08:51:29Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121699#M46523</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/136214"&gt;@lucami&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P class=""&gt;Still unfortunately getting an error when attempting to run your code. Here's the specific error message:&lt;/P&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;PRE&gt;org.apache.spark.sql.AnalysisException: [CLUSTER_BY_AUTO_REQUIRES_PREDICTIVE_OPTIMIZATION] &lt;BR /&gt;CLUSTER BY AUTO requires Predictive Optimization to be enabled. &lt;BR /&gt;SQLSTATE: 56038&lt;/PRE&gt;&lt;/DIV&gt;&lt;P class=""&gt;&lt;STRONG&gt;Additional context:&lt;/STRONG&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P class=""&gt;Predictive Optimization is enabled in our Databricks account.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P class=""&gt;According to the documentation, this feature should be automatically enabled for all workspaces, catalogs, and tables.&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P class=""&gt;Is there any extra setting that should be added in DLT pipeline definition?&lt;/P&gt;</description>
      <pubDate>Fri, 13 Jun 2025 10:44:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121699#M46523</guid>
      <dc:creator>HoussemBL</dc:creator>
      <dc:date>2025-06-13T10:44:52Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121700#M46524</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/115968"&gt;@HoussemBL&lt;/a&gt;, I had the same issue.&amp;nbsp;&lt;SPAN&gt;As I know, automatic Liquid Clustering on DLT in is private preview, I would suggest you to contact your sales representative to enable it &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 13 Jun 2025 11:30:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121700#M46524</guid>
      <dc:creator>lucami</dc:creator>
      <dc:date>2025-06-13T11:30:57Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121712#M46526</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/115968"&gt;@HoussemBL&lt;/a&gt;&amp;nbsp;, you can check if PO is enabled for the target catalog in DLT.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 13 Jun 2025 13:03:04 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121712#M46526</guid>
      <dc:creator>nikhilj0421</dc:creator>
      <dc:date>2025-06-13T13:03:04Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121772#M46544</link>
      <description>&lt;P&gt;Same issue here. I have activated PO on the specific schema where the materialized view resides per these instructions&amp;nbsp;&lt;A href="https://docs.databricks.com/aws/en/optimizations/predictive-optimization#check-whether-predictive-optimization-is-enabled" target="_blank" rel="noopener"&gt;https://docs.databricks.com/aws/en/optimizations/predictive-optimization#check-whether-predictive-optimization-is-enabled&lt;/A&gt;&lt;BR /&gt;- Doesn't help with the issue&lt;BR /&gt;&lt;BR /&gt;Problem hypothesis: DLT (newly renamed to lakeflow declarative pipelines) is not creating Unity Catalog Managed Tables, which is a pre-condition for Predictive Optimization, which in turn is a pre-condition for automated liquid clustering.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;Context:&lt;BR /&gt;- Predictive optimization is enabled on the account and the specific unity catalog schemas used&lt;BR /&gt;- Other tables (non-DLT created) in the schemas are Unity catalog managed (see image) and then unity catalog shows the validation in the UI.See image below:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Alex006_0-1749898948011.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/17523i4D37808E7FB81506/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Alex006_0-1749898948011.png" alt="Alex006_0-1749898948011.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;Proof of PO being activated for the schema&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="Alex006_1-1749899682047.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/17524i27DBC7A4626AB229/image-size/medium?v=v2&amp;amp;px=400" role="button" title="Alex006_1-1749899682047.png" alt="Alex006_1-1749899682047.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Question&lt;BR /&gt;- Is DLT not capable of creating unity catalog managed tables?&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 14 Jun 2025 11:15:01 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/121772#M46544</guid>
      <dc:creator>Alex006</dc:creator>
      <dc:date>2025-06-14T11:15:01Z</dc:date>
    </item>
    <item>
      <title>Re: DLT Pipeline &amp; Automatic Liquid Clustering Syntax</title>
      <link>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/129156#M48452</link>
      <description>&lt;P&gt;Is there a resolution to this? I am having the same problem. I can create tables with cluster by auto, but the MVs are failing saying I need to enable PO. This was working yesterday and is working in other environments.&lt;/P&gt;</description>
      <pubDate>Thu, 21 Aug 2025 16:35:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/dlt-pipeline-amp-automatic-liquid-clustering-syntax/m-p/129156#M48452</guid>
      <dc:creator>jsturgeon</dc:creator>
      <dc:date>2025-08-21T16:35:47Z</dc:date>
    </item>
  </channel>
</rss>

