<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Flatfiles ingestion on Bronze layer, 'to schema' or 'not to schemarize'? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/flatfiles-ingestion-on-bronze-layer-to-schema-or-not-to/m-p/23195#M15978</link>
    <description>&lt;P&gt;Hi all, What is the general guideline for handling flatfiles (xml, json with several nested hierarchies that is also schema evolving) in the bronze layer?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Should I persist the file content into a single column as text in the parquet file &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;or &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;should I let spark infer a schema and have it output a parquet file with several columns representing the content of the xml/json file?&lt;/P&gt;</description>
    <pubDate>Tue, 08 Nov 2022 13:32:52 GMT</pubDate>
    <dc:creator>7effrey</dc:creator>
    <dc:date>2022-11-08T13:32:52Z</dc:date>
    <item>
      <title>Flatfiles ingestion on Bronze layer, 'to schema' or 'not to schemarize'?</title>
      <link>https://community.databricks.com/t5/data-engineering/flatfiles-ingestion-on-bronze-layer-to-schema-or-not-to/m-p/23195#M15978</link>
      <description>&lt;P&gt;Hi all, What is the general guideline for handling flatfiles (xml, json with several nested hierarchies that is also schema evolving) in the bronze layer?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Should I persist the file content into a single column as text in the parquet file &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;or &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;should I let spark infer a schema and have it output a parquet file with several columns representing the content of the xml/json file?&lt;/P&gt;</description>
      <pubDate>Tue, 08 Nov 2022 13:32:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/flatfiles-ingestion-on-bronze-layer-to-schema-or-not-to/m-p/23195#M15978</guid>
      <dc:creator>7effrey</dc:creator>
      <dc:date>2022-11-08T13:32:52Z</dc:date>
    </item>
  </channel>
</rss>

