<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How do I choose which column to partition by? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-do-i-choose-which-column-to-partition-by/m-p/20314#M13700</link>
    <description>&lt;P&gt;The important factors deciding partition columns are:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Even distribution of data. &lt;/LI&gt;&lt;LI&gt;Choose the column that is commonly or widely accessed or queried. &lt;/LI&gt;&lt;LI&gt;Do not create multiple levels of partition, as you can end up with a large number of small files. &lt;/LI&gt;&lt;/UL&gt;</description>
    <pubDate>Thu, 24 Jun 2021 23:22:00 GMT</pubDate>
    <dc:creator>brickster_2018</dc:creator>
    <dc:date>2021-06-24T23:22:00Z</dc:date>
    <item>
      <title>How do I choose which column to partition by?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-do-i-choose-which-column-to-partition-by/m-p/20313#M13699</link>
      <description>&lt;P&gt;I am in the process of building my data pipeline, but I am unsure of how to choose which fields in my data I should use for partitioning. What should I be considering when choosing a partitioning strategy?&lt;/P&gt;</description>
      <pubDate>Thu, 24 Jun 2021 22:06:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-do-i-choose-which-column-to-partition-by/m-p/20313#M13699</guid>
      <dc:creator>User16826992666</dc:creator>
      <dc:date>2021-06-24T22:06:12Z</dc:date>
    </item>
    <item>
      <title>Re: How do I choose which column to partition by?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-do-i-choose-which-column-to-partition-by/m-p/20314#M13700</link>
      <description>&lt;P&gt;The important factors deciding partition columns are:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Even distribution of data. &lt;/LI&gt;&lt;LI&gt;Choose the column that is commonly or widely accessed or queried. &lt;/LI&gt;&lt;LI&gt;Do not create multiple levels of partition, as you can end up with a large number of small files. &lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Thu, 24 Jun 2021 23:22:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-do-i-choose-which-column-to-partition-by/m-p/20314#M13700</guid>
      <dc:creator>brickster_2018</dc:creator>
      <dc:date>2021-06-24T23:22:00Z</dc:date>
    </item>
  </channel>
</rss>

