<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Unable to load Parquet file using Autoloader. Can someone help? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16703#M10833</link>
    <description>&lt;P&gt;Done !!&lt;/P&gt;</description>
    <pubDate>Mon, 27 Jun 2022 17:49:56 GMT</pubDate>
    <dc:creator>Mayank</dc:creator>
    <dc:date>2022-06-27T17:49:56Z</dc:date>
    <item>
      <title>Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16697#M10827</link>
      <description>&lt;P&gt;I am trying to load parquet files using Autoloader. Below is the code &lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;def autoload_to_table (data_source, source_format, table_name, checkpoint_path):
    query = (spark.readStream
                  .format('cloudFiles')
                  .option('cloudFiles.format', source_format)
                  .schema("VendorID long,tpep_pickup_datetime timestamp, tpep_dropoff_datetime timestamp, passenger_count long, trip_distance long, RateCodeID long,  Store_and_fwd_flag string,PULocationID int, DOLocationID long, payment_type long, fare_amount long, extra long, mta_tax long,Tip_amount long, tolls_amount long, improvement_surcharge long,  total_amount long, congestion_Surcharge long, airport_fee long ")
                  .option('cloudFiles.schemaLocation', checkpoint_path)
                  .load(data_source)
                  .writeStream
                  .option('checkpointLocation', checkpoint_path)
                  .option('mergeSchema', "true")
                  .table(table_name)
            )
    
    return query
&amp;nbsp;
query = autoload_to_table (data_source = "/mnt/landing/nyctaxi",
                           source_format = "parquet",
                           table_name = "yellow_trip_data",
                           checkpoint_path='/tmp/delta/yellowdata/_checkpoints'
                          )&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;However, I run into the following error. i have also attached the ipython notebook/&lt;/P&gt;&lt;P&gt;Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 3011.0 failed 4 times, most recent failure: Lost task 0.3 in stage 3011.0 (TID 11673) (10.139.64.5 executor 0): java.lang.UnsupportedOperationException: org.apache.parquet.column.values.dictionary.PlainValuesDictionary$PlainDoubleDictionary&lt;/P&gt;&lt;P&gt;	at org.apache.parquet.column.Dictionary.decodeToLong(Dictionary.java:49)&lt;/P&gt;&lt;P&gt;	at org.apache.spark.sql.execution.datasources.parquet.ParquetDictionary.decodeToLong(ParquetDictionary.java:54)&lt;/P&gt;</description>
      <pubDate>Sun, 26 Jun 2022 21:54:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16697#M10827</guid>
      <dc:creator>Mayank</dc:creator>
      <dc:date>2022-06-26T21:54:28Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16698#M10828</link>
      <description>&lt;P&gt;it could be an incompatible schema,&lt;/P&gt;&lt;P&gt;there is a &lt;A href="https://kb.databricks.com/data/wrong-schema-in-files.html" alt="https://kb.databricks.com/data/wrong-schema-in-files.html" target="_blank"&gt;knowledge base article about that&lt;/A&gt;.&lt;/P&gt;</description>
      <pubDate>Mon, 27 Jun 2022 07:19:24 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16698#M10828</guid>
      <dc:creator>-werners-</dc:creator>
      <dc:date>2022-06-27T07:19:24Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16699#M10829</link>
      <description>&lt;P&gt;As @Werner Stinckens​&amp;nbsp;said.&lt;/P&gt;&lt;P&gt;Just load your file the normal way (spark.read.parquet ) without specifying schema and then extract DDL.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;schema_json = spark.read.parquet("your_file.parquet").schema.json()
ddl = spark.sparkContext._jvm.org.apache.spark.sql.types.DataType.fromJson(schema_json).toDDL()
print(ddl)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Jun 2022 14:50:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16699#M10829</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-06-27T14:50:05Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16700#M10830</link>
      <description>&lt;P&gt;Smart idea. Let me try this one.  @Hubert Dudek​&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Jun 2022 15:25:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16700#M10830</guid>
      <dc:creator>Mayank</dc:creator>
      <dc:date>2022-06-27T15:25:32Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16701#M10831</link>
      <description>&lt;P&gt;This ran !!! you are awesome @Hubert Dudek​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Jun 2022 15:45:01 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16701#M10831</guid>
      <dc:creator>Mayank</dc:creator>
      <dc:date>2022-06-27T15:45:01Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16702#M10832</link>
      <description>&lt;P&gt;Hey @Mayank Srivastava​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope you are well!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We are happy to know that you were able to resolve your issue. It would be really awesome if you could mark the answer as best. It would be really helpful for the other members too.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Cheers!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Jun 2022 17:15:21 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16702#M10832</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2022-06-27T17:15:21Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16703#M10833</link>
      <description>&lt;P&gt;Done !!&lt;/P&gt;</description>
      <pubDate>Mon, 27 Jun 2022 17:49:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16703#M10833</guid>
      <dc:creator>Mayank</dc:creator>
      <dc:date>2022-06-27T17:49:56Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16704#M10834</link>
      <description>&lt;P&gt;Hi again @Mayank Srivastava​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thank you so much for getting back to us and marking the answer as best.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We really appreciate your time.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Wish you a great Databricks journey ahead!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 27 Jun 2022 18:06:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16704#M10834</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2022-06-27T18:06:59Z</dc:date>
    </item>
    <item>
      <title>Re: Unable to load Parquet file using Autoloader. Can someone help?</title>
      <link>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16705#M10835</link>
      <description>&lt;P&gt;Great! Thank you.&lt;/P&gt;</description>
      <pubDate>Tue, 28 Jun 2022 13:50:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/unable-to-load-parquet-file-using-autoloader-can-someone-help/m-p/16705#M10835</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2022-06-28T13:50:12Z</dc:date>
    </item>
  </channel>
</rss>

