<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Autoloader schema inference in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32427#M23627</link>
    <description>&lt;P&gt;Do you mean? .&lt;I&gt;option("&lt;/I&gt;&lt;B&gt;&lt;I&gt;cloudFiles.schemaLocation&lt;/I&gt;&lt;/B&gt;&lt;I&gt;", "&amp;lt;path_to_checkpoint&amp;gt;")&lt;/I&gt;&lt;/P&gt;&lt;P&gt;If thats the case, then you can check the following docs &lt;A href="https://docs.databricks.com/ingestion/auto-loader/options.html" target="test_blank"&gt;https://docs.databricks.com/ingestion/auto-loader/options.html&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 09 Sep 2022 23:04:30 GMT</pubDate>
    <dc:creator>jose_gonzalez</dc:creator>
    <dc:date>2022-09-09T23:04:30Z</dc:date>
    <item>
      <title>Autoloader schema inference</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32424#M23624</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;is it possible to turn off schema inference with AutoLoader? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you,&lt;/P&gt;&lt;P&gt;Cosimo&lt;/P&gt;</description>
      <pubDate>Tue, 06 Sep 2022 20:32:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32424#M23624</guid>
      <dc:creator>Cosimo_F_</dc:creator>
      <dc:date>2022-09-06T20:32:54Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader schema inference</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32426#M23626</link>
      <description>&lt;P&gt;Thank you for your reply!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;The documentation mentions passing a schema to AutoLoader but does not explain how. The solution is simply to use the .schema method like so:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;spark.\&lt;/P&gt;&lt;P&gt;&amp;nbsp;readStream.\&lt;/P&gt;&lt;P&gt;&amp;nbsp;schema(&amp;lt;schema&amp;gt;).\&lt;/P&gt;&lt;P&gt;&amp;nbsp;format("cloudFiles").\&lt;/P&gt;&lt;P&gt;load()&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Best,&lt;/P&gt;&lt;P&gt;Cosimo.&lt;/P&gt;</description>
      <pubDate>Thu, 08 Sep 2022 18:35:53 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32426#M23626</guid>
      <dc:creator>Cosimo_F_</dc:creator>
      <dc:date>2022-09-08T18:35:53Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader schema inference</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32427#M23627</link>
      <description>&lt;P&gt;Do you mean? .&lt;I&gt;option("&lt;/I&gt;&lt;B&gt;&lt;I&gt;cloudFiles.schemaLocation&lt;/I&gt;&lt;/B&gt;&lt;I&gt;", "&amp;lt;path_to_checkpoint&amp;gt;")&lt;/I&gt;&lt;/P&gt;&lt;P&gt;If thats the case, then you can check the following docs &lt;A href="https://docs.databricks.com/ingestion/auto-loader/options.html" target="test_blank"&gt;https://docs.databricks.com/ingestion/auto-loader/options.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 09 Sep 2022 23:04:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32427#M23627</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2022-09-09T23:04:30Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader schema inference</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32428#M23628</link>
      <description>&lt;P&gt;Hi Jose,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;No, that's the location of the schema hints (which work together with schema inference). Specifying a schema location does not turn off schema inference as I wanted. In fact schemaLocation is a required option _unless_ the schema is passed explicitly as I showed.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Best,&lt;/P&gt;&lt;P&gt;Cosimo, &lt;/P&gt;</description>
      <pubDate>Sat, 10 Sep 2022 14:01:55 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/32428#M23628</guid>
      <dc:creator>Cosimo_F_</dc:creator>
      <dc:date>2022-09-10T14:01:55Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader schema inference</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/100777#M40422</link>
      <description>&lt;P&gt;&lt;A href="https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/patterns.html#language-python" target="_blank" rel="noopener"&gt;https://docs.databricks.com/en/ingestion/cloud-object-storage/auto-loader/patterns.html#language-python&lt;/A&gt;&lt;/P&gt;&lt;P&gt;you can enforce the schema or use the "&lt;SPAN&gt;cloudFiles.schemaHints"&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;&amp;nbsp;to override the Inference.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;&lt;SPAN class=""&gt;df&lt;/SPAN&gt; &lt;SPAN class=""&gt;=&lt;/SPAN&gt; &lt;SPAN class=""&gt;spark&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;readStream&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;format&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;"cloudFiles"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt; \
  &lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;option&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;"cloudFiles.format"&lt;/SPAN&gt;&lt;SPAN class=""&gt;,&lt;/SPAN&gt; &lt;SPAN class=""&gt;"csv"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt; \
  &lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;option&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;"header"&lt;/SPAN&gt;&lt;SPAN class=""&gt;,&lt;/SPAN&gt; &lt;SPAN class=""&gt;"true"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt; \
  &lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;option&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;"rescuedDataColumn"&lt;/SPAN&gt;&lt;SPAN class=""&gt;,&lt;/SPAN&gt; &lt;SPAN class=""&gt;"_rescued_data"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt; \ &lt;SPAN class=""&gt;# makes sure that you don't lose data&lt;/SPAN&gt;
  &lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;schema&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;&amp;lt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;schema&lt;/SPAN&gt;&lt;SPAN class=""&gt;&amp;gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt; \ &lt;SPAN class=""&gt;# provide a schema here for the files&lt;/SPAN&gt;
  &lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN class=""&gt;load&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;&amp;lt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;path&lt;/SPAN&gt;&lt;SPAN class=""&gt;&amp;gt;&lt;/SPAN&gt;&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 03 Dec 2024 15:26:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-schema-inference/m-p/100777#M40422</guid>
      <dc:creator>shivagarg</dc:creator>
      <dc:date>2024-12-03T15:26:31Z</dc:date>
    </item>
  </channel>
</rss>

