<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Add the creation date of a parquet file into a DataFrame in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/add-the-creation-date-of-a-parquet-file-into-a-dataframe/m-p/21712#M14841</link>
    <description>&lt;P&gt;Thanks @Michail Karamanos​&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 18 Nov 2022 20:46:00 GMT</pubDate>
    <dc:creator>wyzer</dc:creator>
    <dc:date>2022-11-18T20:46:00Z</dc:date>
    <item>
      <title>Add the creation date of a parquet file into a DataFrame</title>
      <link>https://community.databricks.com/t5/data-engineering/add-the-creation-date-of-a-parquet-file-into-a-dataframe/m-p/21710#M14839</link>
      <description>&lt;P&gt;Currently I load multiple parquet file with this code:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = spark.read.parquet("/mnt/dev/bronze/Voucher/*/*")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;(Inside the Voucher folder, there is one folder by date. Each one containing one parquet file)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;How can I add a column into this DataFrame, that contains the creation date of each parquet file ?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Fri, 18 Nov 2022 16:25:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/add-the-creation-date-of-a-parquet-file-into-a-dataframe/m-p/21710#M14839</guid>
      <dc:creator>wyzer</dc:creator>
      <dc:date>2022-11-18T16:25:08Z</dc:date>
    </item>
    <item>
      <title>Re: Add the creation date of a parquet file into a DataFrame</title>
      <link>https://community.databricks.com/t5/data-engineering/add-the-creation-date-of-a-parquet-file-into-a-dataframe/m-p/21711#M14840</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;You can use the file metadata column: &lt;A href="https://docs.databricks.com/ingestion/file-metadata-column.html" alt="https://docs.databricks.com/ingestion/file-metadata-column.html" target="_blank"&gt;https://docs.databricks.com/ingestion/file-metadata-column.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;This way you can access the &lt;B&gt;file_path&lt;/B&gt;, &lt;B&gt;file_name&lt;/B&gt;, &lt;B&gt;file_size&lt;/B&gt; and &lt;B&gt;file_modification_time&lt;/B&gt; of the data file from the corresponding dataframe row. No need to do it manually!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I found it useful &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 18 Nov 2022 16:43:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/add-the-creation-date-of-a-parquet-file-into-a-dataframe/m-p/21711#M14840</guid>
      <dc:creator>MichailKaramano</dc:creator>
      <dc:date>2022-11-18T16:43:59Z</dc:date>
    </item>
    <item>
      <title>Re: Add the creation date of a parquet file into a DataFrame</title>
      <link>https://community.databricks.com/t5/data-engineering/add-the-creation-date-of-a-parquet-file-into-a-dataframe/m-p/21712#M14841</link>
      <description>&lt;P&gt;Thanks @Michail Karamanos​&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 18 Nov 2022 20:46:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/add-the-creation-date-of-a-parquet-file-into-a-dataframe/m-p/21712#M14841</guid>
      <dc:creator>wyzer</dc:creator>
      <dc:date>2022-11-18T20:46:00Z</dc:date>
    </item>
  </channel>
</rss>

