<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Partition filter is skipped when table is used in where condition, why? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/partition-filter-is-skipped-when-table-is-used-in-where/m-p/12392#M7207</link>
    <description>&lt;P&gt;No hints on partition pruning afaik.&lt;/P&gt;&lt;P&gt;The reason the partitions were not pruned is because the second query generates a completely different plan.&lt;/P&gt;&lt;P&gt;To be able to filter the partitions, a join first has to happen.   And in this case it means the table has to be scanned.&lt;/P&gt;</description>
    <pubDate>Thu, 12 Jan 2023 10:19:33 GMT</pubDate>
    <dc:creator>-werners-</dc:creator>
    <dc:date>2023-01-12T10:19:33Z</dc:date>
    <item>
      <title>Partition filter is skipped when table is used in where condition, why?</title>
      <link>https://community.databricks.com/t5/data-engineering/partition-filter-is-skipped-when-table-is-used-in-where/m-p/12391#M7206</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;maybe someone can help me i do want to run very narrow query &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;SELECT *
FROM my_table
WHERE snapshot_date IN('2023-01-06', '2023-01-07')
&amp;nbsp;
-- part of the physical plan:
-- Location: PreparedDeltaFileIndex [dbfs:/...]
-- PartitionFilters: [cast(snapshot_date#282634 as string) IN (2023-01-06,2033-01-07)]&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;it is very fast ~1s, in the execution plan i see it is using those provided datasets as arguments for partition filters.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;If I try to provide filter predicate in form of the one column table it does full table scan and it takes 100x longer.&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
SELECT *
FROM
  my_table
WHERE snapshot_date IN (
    SELECT snapshot_date
    FROM (VLUES('2023-01-06'), ('2023-01-07')) T(snapshot_date)
  )
&amp;nbsp;
-- plan
Location: PreparedDeltaFileIndex [dbfs:/...]
ReadSchema: ...&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt; Is there a way to provide hints for parition filters or something to make sure it does not read whole table?&lt;/P&gt;</description>
      <pubDate>Thu, 12 Jan 2023 09:24:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/partition-filter-is-skipped-when-table-is-used-in-where/m-p/12391#M7206</guid>
      <dc:creator>pasiasty2077</dc:creator>
      <dc:date>2023-01-12T09:24:47Z</dc:date>
    </item>
    <item>
      <title>Re: Partition filter is skipped when table is used in where condition, why?</title>
      <link>https://community.databricks.com/t5/data-engineering/partition-filter-is-skipped-when-table-is-used-in-where/m-p/12392#M7207</link>
      <description>&lt;P&gt;No hints on partition pruning afaik.&lt;/P&gt;&lt;P&gt;The reason the partitions were not pruned is because the second query generates a completely different plan.&lt;/P&gt;&lt;P&gt;To be able to filter the partitions, a join first has to happen.   And in this case it means the table has to be scanned.&lt;/P&gt;</description>
      <pubDate>Thu, 12 Jan 2023 10:19:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/partition-filter-is-skipped-when-table-is-used-in-where/m-p/12392#M7207</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2023-01-12T10:19:33Z</dc:date>
    </item>
  </channel>
</rss>

