<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How do you read an Excel spreadsheet with Databricks in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-do-you-read-an-excel-spreadsheet-with-databricks/m-p/25994#M18133</link>
    <description>&lt;P&gt;My cluster has Scala 2.12&lt;/P&gt;&lt;P&gt;I've installed Maven Library com.crealytics:spark-excel_2.12:0.14.0&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I get an error &lt;/P&gt;&lt;P&gt;java.lang.IllegalStateException: Cannot get a STRING value from a NUMERIC cell&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;when trying to execute the following&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;%python&lt;/P&gt;&lt;P&gt;excelFileName="/mnt/dlstor/raw/sales/Budget vols FY 21-22 FY 22-23.xlsx"&lt;/P&gt;&lt;P&gt;excelWorksheetName="'22-23'!A2"&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;isHeaderOn="true"&lt;/P&gt;&lt;P&gt;isInferSchemaOn="true"&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df = spark.read.format("com.crealytics.spark.excel") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.option("header", isHeaderOn) \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.option("inferSchema", isInferSchemaOn) \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.option("treatEmptyValuesAsNulls", "true") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.option("dataAddress", excelWorksheetName) \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.load(excelFileName)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;display(df)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I couldn't find a similar post.  Any suggestions would be gratefully received.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;</description>
    <pubDate>Thu, 10 Mar 2022 11:15:28 GMT</pubDate>
    <dc:creator>LPlates</dc:creator>
    <dc:date>2022-03-10T11:15:28Z</dc:date>
    <item>
      <title>How do you read an Excel spreadsheet with Databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/how-do-you-read-an-excel-spreadsheet-with-databricks/m-p/25994#M18133</link>
      <description>&lt;P&gt;My cluster has Scala 2.12&lt;/P&gt;&lt;P&gt;I've installed Maven Library com.crealytics:spark-excel_2.12:0.14.0&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I get an error &lt;/P&gt;&lt;P&gt;java.lang.IllegalStateException: Cannot get a STRING value from a NUMERIC cell&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;when trying to execute the following&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;%python&lt;/P&gt;&lt;P&gt;excelFileName="/mnt/dlstor/raw/sales/Budget vols FY 21-22 FY 22-23.xlsx"&lt;/P&gt;&lt;P&gt;excelWorksheetName="'22-23'!A2"&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;isHeaderOn="true"&lt;/P&gt;&lt;P&gt;isInferSchemaOn="true"&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df = spark.read.format("com.crealytics.spark.excel") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.option("header", isHeaderOn) \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.option("inferSchema", isInferSchemaOn) \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.option("treatEmptyValuesAsNulls", "true") \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.option("dataAddress", excelWorksheetName) \&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;.load(excelFileName)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;display(df)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I couldn't find a similar post.  Any suggestions would be gratefully received.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;</description>
      <pubDate>Thu, 10 Mar 2022 11:15:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-do-you-read-an-excel-spreadsheet-with-databricks/m-p/25994#M18133</guid>
      <dc:creator>LPlates</dc:creator>
      <dc:date>2022-03-10T11:15:28Z</dc:date>
    </item>
    <item>
      <title>Re: How do you read an Excel spreadsheet with Databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/how-do-you-read-an-excel-spreadsheet-with-databricks/m-p/25995#M18134</link>
      <description>&lt;P&gt;Okay, I've 'resolved' my issue&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I changed the isHeaderOn="true" to isHeaderOn="false" and was able to load the dataframe.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 11 Mar 2022 15:39:34 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-do-you-read-an-excel-spreadsheet-with-databricks/m-p/25995#M18134</guid>
      <dc:creator>LPlates</dc:creator>
      <dc:date>2022-03-11T15:39:34Z</dc:date>
    </item>
    <item>
      <title>Re: How do you read an Excel spreadsheet with Databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/how-do-you-read-an-excel-spreadsheet-with-databricks/m-p/25997#M18136</link>
      <description>&lt;P&gt;Another way also help for your case is usign Pandas to read excel then convert Pandas Dataframe to Pyspark Dataframe &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 19 Nov 2022 10:12:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-do-you-read-an-excel-spreadsheet-with-databricks/m-p/25997#M18136</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2022-11-19T10:12:13Z</dc:date>
    </item>
  </channel>
</rss>

