<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Filtering delta table by CONCAT of a partition column and a non-partition one in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/filtering-delta-table-by-concat-of-a-partition-column-and-a-non/m-p/3829#M738</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I know how filtering a delta table on a partition column is a very powerful time-saving approach, but what if this column appears as a CONCAT in the where-clause?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I explain my case: I have a delta table with only one partition column, say called col1. I need to interrogate this table through an API request by using a serverless SQL warehouse in Databricks SQL, and for my purpose it is simpler to implement a filter as a CONCAT of col1 together with another column. &lt;/P&gt;&lt;P&gt;Is Spark smart enough to understand that this table is partitioned on one of the two columns, or do I lose the partition info?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
    <pubDate>Wed, 31 May 2023 08:37:33 GMT</pubDate>
    <dc:creator>darioAnt</dc:creator>
    <dc:date>2023-05-31T08:37:33Z</dc:date>
    <item>
      <title>Filtering delta table by CONCAT of a partition column and a non-partition one</title>
      <link>https://community.databricks.com/t5/data-engineering/filtering-delta-table-by-concat-of-a-partition-column-and-a-non/m-p/3829#M738</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I know how filtering a delta table on a partition column is a very powerful time-saving approach, but what if this column appears as a CONCAT in the where-clause?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I explain my case: I have a delta table with only one partition column, say called col1. I need to interrogate this table through an API request by using a serverless SQL warehouse in Databricks SQL, and for my purpose it is simpler to implement a filter as a CONCAT of col1 together with another column. &lt;/P&gt;&lt;P&gt;Is Spark smart enough to understand that this table is partitioned on one of the two columns, or do I lose the partition info?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Wed, 31 May 2023 08:37:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/filtering-delta-table-by-concat-of-a-partition-column-and-a-non/m-p/3829#M738</guid>
      <dc:creator>darioAnt</dc:creator>
      <dc:date>2023-05-31T08:37:33Z</dc:date>
    </item>
    <item>
      <title>Re: Filtering delta table by CONCAT of a partition column and a non-partition one</title>
      <link>https://community.databricks.com/t5/data-engineering/filtering-delta-table-by-concat-of-a-partition-column-and-a-non/m-p/3830#M739</link>
      <description>&lt;P&gt;I did myself a test and the answer is no:&lt;/P&gt;&lt;P&gt;with a Concat filter, spark sql does not know I am using a partition-based column, so it scan all the table. &lt;span class="lia-unicode-emoji" title=":disappointed_face:"&gt;😞&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 31 May 2023 13:21:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/filtering-delta-table-by-concat-of-a-partition-column-and-a-non/m-p/3830#M739</guid>
      <dc:creator>darioAnt</dc:creator>
      <dc:date>2023-05-31T13:21:20Z</dc:date>
    </item>
  </channel>
</rss>

