<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Liquid Clustering With more than 4 columns in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/92893#M38578</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/117376"&gt;@filipniziol&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Good idea. I'll try it and will come back with the result. Thanks!&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 07 Oct 2024 07:02:58 GMT</pubDate>
    <dc:creator>Erfan</dc:creator>
    <dc:date>2024-10-07T07:02:58Z</dc:date>
    <item>
      <title>Liquid Clustering With more than 4 columns</title>
      <link>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/92877#M38573</link>
      <description>&lt;P&gt;Hi there,&lt;/P&gt;&lt;P&gt;I’m trying to join a small table (a few million records) with a much larger table (around 1 TB in size, containing a few billion records).&lt;/P&gt;&lt;P&gt;The small table isn’t quite small enough to use Broadcast. Additionally, our join clause involves more than four columns. I attempted to enable Liquid Clustering on the large table, but it only supports up to four columns. I experimented with different combinations of four-column sets for Liquid Clustering, but none of them reduced the join time.&lt;/P&gt;&lt;P&gt;Do you have any recommendations for optimizing a query on a table with Liquid Clustering when the join criteria involve more than four columns?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 07 Oct 2024 02:29:35 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/92877#M38573</guid>
      <dc:creator>Erfan</dc:creator>
      <dc:date>2024-10-07T02:29:35Z</dc:date>
    </item>
    <item>
      <title>Re: Liquid Clustering With more than 4 columns</title>
      <link>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/92892#M38577</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/39264"&gt;@Erfan&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;What you can do is to create an additional column that concatenates the values of multiple columns and then apply Liquid Clustering on that new column.&lt;/P&gt;</description>
      <pubDate>Mon, 07 Oct 2024 06:55:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/92892#M38577</guid>
      <dc:creator>filipniziol</dc:creator>
      <dc:date>2024-10-07T06:55:05Z</dc:date>
    </item>
    <item>
      <title>Re: Liquid Clustering With more than 4 columns</title>
      <link>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/92893#M38578</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/117376"&gt;@filipniziol&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;Good idea. I'll try it and will come back with the result. Thanks!&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 07 Oct 2024 07:02:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/92893#M38578</guid>
      <dc:creator>Erfan</dc:creator>
      <dc:date>2024-10-07T07:02:58Z</dc:date>
    </item>
    <item>
      <title>Re: Liquid Clustering With more than 4 columns</title>
      <link>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/93207#M38651</link>
      <description>&lt;P&gt;Unfortunatelly, since I am not the owner of the data, I am not allowed to add additional column. So I can't test it. But I guess your idead&lt;/P&gt;</description>
      <pubDate>Wed, 09 Oct 2024 01:53:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/liquid-clustering-with-more-than-4-columns/m-p/93207#M38651</guid>
      <dc:creator>Erfan</dc:creator>
      <dc:date>2024-10-09T01:53:48Z</dc:date>
    </item>
  </channel>
</rss>

