<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Reading data from &amp;quot;dbfs:/mnt/&amp;quot; in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16241#M10447</link>
    <description>&lt;P&gt;Hi community,&lt;/P&gt;&lt;P&gt;I don't know what is happening TBH. &lt;/P&gt;&lt;P&gt;I have a use case where data is written to the location "dbfs:/mnt/...", don't ask me why it's mounted, it's just a side project. I do believe that data is stored in ADLS2.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I've been trying to read the data after it's written bu when I try to read data from the folder:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = spark.read.format("parquet").load("dbfs:/mnt/table/")
&amp;nbsp;
or
&amp;nbsp;
df = spark.read.format("parquet").load("dbfs:/mnt/table/date=2022-12-16")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I get: AnalysisException: Unable to infer schema for Parquet. It must be specified manually.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;when I provide the schema, the count = 0 (zero):&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df.count()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;but when I provide full path to the parquet file it works:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = spark.read.format("parquet").load("dbfs:/mnt/table/date=2022-12-16/some-spark-file.snappy.parquet")
&amp;nbsp;
df.count()
&amp;nbsp;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;it return 700 rows.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;any ideas ? &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Fri, 16 Dec 2022 21:51:10 GMT</pubDate>
    <dc:creator>Pat</dc:creator>
    <dc:date>2022-12-16T21:51:10Z</dc:date>
    <item>
      <title>Reading data from "dbfs:/mnt/"</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16241#M10447</link>
      <description>&lt;P&gt;Hi community,&lt;/P&gt;&lt;P&gt;I don't know what is happening TBH. &lt;/P&gt;&lt;P&gt;I have a use case where data is written to the location "dbfs:/mnt/...", don't ask me why it's mounted, it's just a side project. I do believe that data is stored in ADLS2.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I've been trying to read the data after it's written bu when I try to read data from the folder:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = spark.read.format("parquet").load("dbfs:/mnt/table/")
&amp;nbsp;
or
&amp;nbsp;
df = spark.read.format("parquet").load("dbfs:/mnt/table/date=2022-12-16")&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;I get: AnalysisException: Unable to infer schema for Parquet. It must be specified manually.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;when I provide the schema, the count = 0 (zero):&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df.count()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;but when I provide full path to the parquet file it works:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = spark.read.format("parquet").load("dbfs:/mnt/table/date=2022-12-16/some-spark-file.snappy.parquet")
&amp;nbsp;
df.count()
&amp;nbsp;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;it return 700 rows.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;any ideas ? &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Dec 2022 21:51:10 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16241#M10447</guid>
      <dc:creator>Pat</dc:creator>
      <dc:date>2022-12-16T21:51:10Z</dc:date>
    </item>
    <item>
      <title>Re: Reading data from "dbfs:/mnt/"</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16242#M10448</link>
      <description>&lt;P&gt;I am still not sure what happened, but I've re-run job on smaller dataset and seems to work, maybe corrupted data ?&lt;/P&gt;</description>
      <pubDate>Fri, 16 Dec 2022 22:57:06 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16242#M10448</guid>
      <dc:creator>Pat</dc:creator>
      <dc:date>2022-12-16T22:57:06Z</dc:date>
    </item>
    <item>
      <title>Re: Reading data from "dbfs:/mnt/"</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16243#M10449</link>
      <description>&lt;P&gt;Yes, maybe the data of a particular partition or file got corrupted and for me, it is working fine for a sample parquet data, I can able to read without any issues.&lt;/P&gt;</description>
      <pubDate>Sat, 17 Dec 2022 02:22:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16243#M10449</guid>
      <dc:creator>Chaitanya_Raju</dc:creator>
      <dc:date>2022-12-17T02:22:08Z</dc:date>
    </item>
    <item>
      <title>Re: Reading data from "dbfs:/mnt/"</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16244#M10450</link>
      <description>&lt;P&gt;this is really interesting never faced this type od situation @Pat Sienkiewicz​&amp;nbsp; can you please share whole code by that we can test and debug this in our system&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;Aviral&lt;/P&gt;</description>
      <pubDate>Sun, 18 Dec 2022 06:08:51 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16244#M10450</guid>
      <dc:creator>Aviral-Bhardwaj</dc:creator>
      <dc:date>2022-12-18T06:08:51Z</dc:date>
    </item>
    <item>
      <title>Re: Reading data from "dbfs:/mnt/"</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16245#M10451</link>
      <description>&lt;P&gt;Hi @Aviral Bhardwaj​&amp;nbsp;,&lt;/P&gt;&lt;P&gt;I will try to re-produce this. I think that at least one of the files is corrupted, but I would expect different error in that case, not long running job that fails with `Unable to infer schema for Parquet. It must be specified manually.`&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 19 Dec 2022 07:35:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16245#M10451</guid>
      <dc:creator>Pat</dc:creator>
      <dc:date>2022-12-19T07:35:30Z</dc:date>
    </item>
    <item>
      <title>Re: Reading data from "dbfs:/mnt/"</title>
      <link>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16246#M10452</link>
      <description>&lt;P&gt;thanks for the sharing ,i hope it will work&lt;/P&gt;</description>
      <pubDate>Tue, 20 Dec 2022 01:45:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/reading-data-from-quot-dbfs-mnt-quot/m-p/16246#M10452</guid>
      <dc:creator>Aviral-Bhardwaj</dc:creator>
      <dc:date>2022-12-20T01:45:03Z</dc:date>
    </item>
  </channel>
</rss>

