<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Error when reading Excel file: &amp;quot;org.apache.poi.ooxml.POIXMLException: Strict OOXML isn't currently supported, please see bug #57699&amp;quot; in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/error-when-reading-excel-file-quot-org-apache-poi-ooxml/m-p/21573#M1181</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I want to read an Excel "xlsx" file. The excel file has several sheets and multi-row header. The original file format was "xlsm" and I changed the extension to "xlsx".  &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I try the following code:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;filepath_xlsx = "dbfs:/FileStore/Sample_Excel/data.xlsx"
&amp;nbsp;
sampleDF_xlsx = (spark.read.format("com.crealytics.spark.excel")
  .option("sheetName", Sheet1") 
  .option("Header", "true") 
  .option("inferSchema", "false") 
  .option("treatEmptyValuesAsNulls", "false") 
  .load(filepath_xlsx)
            )
&amp;nbsp;
display(sampleDF_xlsx)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;And I get the error: &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;org.apache.poi.ooxml.POIXMLException: Strict OOXML isn't currently supported, please see bug #57699
&amp;nbsp;
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
&amp;lt;command-496434324351845&amp;gt; in &amp;lt;cell line: 4&amp;gt;()
      2 
      3 # Read excel file
----&amp;gt; 4 sampleDF_xlsx = (spark.read.format("com.crealytics.spark.excel")
      5   .option("sheetName", "Database BT")
      6   .option("Header", "true")
&amp;nbsp;
/databricks/spark/python/pyspark/instrumentation_utils.py in wrapper(*args, **kwargs)
     46             start = time.perf_counter()
     47             try:
---&amp;gt; 48                 res = func(*args, **kwargs)
     49                 logger.log_success(
     50                     module_name, class_name, function_name, time.perf_counter() - start, signature
&amp;nbsp;
/databricks/spark/python/pyspark/sql/readwriter.py in load(self, path, format, schema, **options)
    175         self.options(**options)
    176         if isinstance(path, str):
--&amp;gt; 177             return self._df(self._jreader.load(path))
    178         elif path is not None:
    179             if type(path) != list:
&amp;nbsp;
/databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py in __call__(self, *args)
   1319 
   1320         answer = self.gateway_client.send_command(command)
-&amp;gt; 1321         return_value = get_return_value(
   1322             answer, self.gateway_client, self.target_id, self.name)
   1323 
&amp;nbsp;
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
    194     def deco(*a: Any, **kw: Any) -&amp;gt; Any:
    195         try:
--&amp;gt; 196             return f(*a, **kw)
    197         except Py4JJavaError as e:
    198             converted = convert_exception(e.java_exception)
&amp;nbsp;
/databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    324             value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325             if answer[1] == REFERENCE_TYPE:
--&amp;gt; 326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
    328                     format(target_id, ".", name), value)
&amp;nbsp;
Py4JJavaError: An error occurred while calling o578.load.
: org.apache.poi.ooxml.POIXMLException: Strict OOXML isn't currently supported, please see bug #57699
	at org.apache.poi.ooxml.POIXMLDocumentPart.getPartFromOPCPackage(POIXMLDocumentPart.java:757)
	at org.apache.poi.ooxml.POIXMLDocumentPart.&amp;lt;init&amp;gt;(POIXMLDocumentPart.java:151)
	at org.apache.poi.ooxml.POIXMLDocumentPart.&amp;lt;init&amp;gt;(POIXMLDocumentPart.java:141)
	at org.apache.poi.ooxml.POIXMLDocument.&amp;lt;init&amp;gt;(POIXMLDocument.java:60)
	at org.apache.poi.xssf.usermodel.XSSFWorkbook.&amp;lt;init&amp;gt;(XSSFWorkbook.java:254)
	at org.apache.poi.xssf.usermodel.XSSFWorkbookFactory.createWorkbook(XSSFWorkbookFactory.java:118)
	at org.apache.poi.xssf.usermodel.XSSFWorkbookFactory.create(XSSFWorkbookFactory.java:98)
	at org.apache.poi.xssf.usermodel.XSSFWorkbookFactory.create(XSSFWorkbookFactory.java:36)
	at org.apache.poi.ss.usermodel.WorkbookFactory.lambda$create$2(WorkbookFactory.java:224)
	at org.apache.poi.ss.usermodel.WorkbookFactory.wp(WorkbookFactory.java:329)
	at org.apache.poi.ss.usermodel.WorkbookFactory.create(WorkbookFactory.java:224)
	at org.apache.poi.ss.usermodel.WorkbookFactory.create(WorkbookFactory.java:185)
	at com.crealytics.spark.excel.DefaultWorkbookReader.$anonfun$openWorkbook$3(WorkbookReader.scala:107)
	at scala.Option.fold(Option.scala:251)
	at com.crealytics.spark.excel.DefaultWorkbookReader.openWorkbook(WorkbookReader.scala:107)
	at com.crealytics.spark.excel.WorkbookReader.withWorkbook(WorkbookReader.scala:34)
	at com.crealytics.spark.excel.WorkbookReader.withWorkbook$(WorkbookReader.scala:33)
	at com.crealytics.spark.excel.DefaultWorkbookReader.withWorkbook(WorkbookReader.scala:92)
	at com.crealytics.spark.excel.ExcelRelation.excerpt$lzycompute(ExcelRelation.scala:48)
	at com.crealytics.spark.excel.ExcelRelation.excerpt(ExcelRelation.scala:48)
	at com.crealytics.spark.excel.ExcelRelation.headerColumns$lzycompute(ExcelRelation.scala:121)
	at com.crealytics.spark.excel.ExcelRelation.headerColumns(ExcelRelation.scala:120)
	at com.crealytics.spark.excel.ExcelRelation.$anonfun$inferSchema$1(ExcelRelation.scala:189)
	at scala.Option.getOrElse(Option.scala:189)
	at com.crealytics.spark.excel.ExcelRelation.inferSchema(ExcelRelation.scala:188)
	at com.crealytics.spark.excel.ExcelRelation.&amp;lt;init&amp;gt;(ExcelRelation.scala:52)
	at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:52)
	at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:29)
	at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:24)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:385)
	at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:368)
	at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:324)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:324)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:237)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
	at py4j.Gateway.invoke(Gateway.java:306)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
	at java.lang.Thread.run(Thread.java:750)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have installed "com.crealytics:spark-excel_2.12:3.3.1_0.18.5" on the cluster.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I use the same code for another dataset (single-sheet, single-row header, and cleaned dataset). And, I don't get any error. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Any idea to solve this issue? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Sun, 20 Nov 2022 05:51:18 GMT</pubDate>
    <dc:creator>Mado</dc:creator>
    <dc:date>2022-11-20T05:51:18Z</dc:date>
    <item>
      <title>Error when reading Excel file: "org.apache.poi.ooxml.POIXMLException: Strict OOXML isn't currently supported, please see bug #57699"</title>
      <link>https://community.databricks.com/t5/machine-learning/error-when-reading-excel-file-quot-org-apache-poi-ooxml/m-p/21573#M1181</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I want to read an Excel "xlsx" file. The excel file has several sheets and multi-row header. The original file format was "xlsm" and I changed the extension to "xlsx".  &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I try the following code:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;filepath_xlsx = "dbfs:/FileStore/Sample_Excel/data.xlsx"
&amp;nbsp;
sampleDF_xlsx = (spark.read.format("com.crealytics.spark.excel")
  .option("sheetName", Sheet1") 
  .option("Header", "true") 
  .option("inferSchema", "false") 
  .option("treatEmptyValuesAsNulls", "false") 
  .load(filepath_xlsx)
            )
&amp;nbsp;
display(sampleDF_xlsx)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;And I get the error: &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;org.apache.poi.ooxml.POIXMLException: Strict OOXML isn't currently supported, please see bug #57699
&amp;nbsp;
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
&amp;lt;command-496434324351845&amp;gt; in &amp;lt;cell line: 4&amp;gt;()
      2 
      3 # Read excel file
----&amp;gt; 4 sampleDF_xlsx = (spark.read.format("com.crealytics.spark.excel")
      5   .option("sheetName", "Database BT")
      6   .option("Header", "true")
&amp;nbsp;
/databricks/spark/python/pyspark/instrumentation_utils.py in wrapper(*args, **kwargs)
     46             start = time.perf_counter()
     47             try:
---&amp;gt; 48                 res = func(*args, **kwargs)
     49                 logger.log_success(
     50                     module_name, class_name, function_name, time.perf_counter() - start, signature
&amp;nbsp;
/databricks/spark/python/pyspark/sql/readwriter.py in load(self, path, format, schema, **options)
    175         self.options(**options)
    176         if isinstance(path, str):
--&amp;gt; 177             return self._df(self._jreader.load(path))
    178         elif path is not None:
    179             if type(path) != list:
&amp;nbsp;
/databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py in __call__(self, *args)
   1319 
   1320         answer = self.gateway_client.send_command(command)
-&amp;gt; 1321         return_value = get_return_value(
   1322             answer, self.gateway_client, self.target_id, self.name)
   1323 
&amp;nbsp;
/databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
    194     def deco(*a: Any, **kw: Any) -&amp;gt; Any:
    195         try:
--&amp;gt; 196             return f(*a, **kw)
    197         except Py4JJavaError as e:
    198             converted = convert_exception(e.java_exception)
&amp;nbsp;
/databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
    324             value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
    325             if answer[1] == REFERENCE_TYPE:
--&amp;gt; 326                 raise Py4JJavaError(
    327                     "An error occurred while calling {0}{1}{2}.\n".
    328                     format(target_id, ".", name), value)
&amp;nbsp;
Py4JJavaError: An error occurred while calling o578.load.
: org.apache.poi.ooxml.POIXMLException: Strict OOXML isn't currently supported, please see bug #57699
	at org.apache.poi.ooxml.POIXMLDocumentPart.getPartFromOPCPackage(POIXMLDocumentPart.java:757)
	at org.apache.poi.ooxml.POIXMLDocumentPart.&amp;lt;init&amp;gt;(POIXMLDocumentPart.java:151)
	at org.apache.poi.ooxml.POIXMLDocumentPart.&amp;lt;init&amp;gt;(POIXMLDocumentPart.java:141)
	at org.apache.poi.ooxml.POIXMLDocument.&amp;lt;init&amp;gt;(POIXMLDocument.java:60)
	at org.apache.poi.xssf.usermodel.XSSFWorkbook.&amp;lt;init&amp;gt;(XSSFWorkbook.java:254)
	at org.apache.poi.xssf.usermodel.XSSFWorkbookFactory.createWorkbook(XSSFWorkbookFactory.java:118)
	at org.apache.poi.xssf.usermodel.XSSFWorkbookFactory.create(XSSFWorkbookFactory.java:98)
	at org.apache.poi.xssf.usermodel.XSSFWorkbookFactory.create(XSSFWorkbookFactory.java:36)
	at org.apache.poi.ss.usermodel.WorkbookFactory.lambda$create$2(WorkbookFactory.java:224)
	at org.apache.poi.ss.usermodel.WorkbookFactory.wp(WorkbookFactory.java:329)
	at org.apache.poi.ss.usermodel.WorkbookFactory.create(WorkbookFactory.java:224)
	at org.apache.poi.ss.usermodel.WorkbookFactory.create(WorkbookFactory.java:185)
	at com.crealytics.spark.excel.DefaultWorkbookReader.$anonfun$openWorkbook$3(WorkbookReader.scala:107)
	at scala.Option.fold(Option.scala:251)
	at com.crealytics.spark.excel.DefaultWorkbookReader.openWorkbook(WorkbookReader.scala:107)
	at com.crealytics.spark.excel.WorkbookReader.withWorkbook(WorkbookReader.scala:34)
	at com.crealytics.spark.excel.WorkbookReader.withWorkbook$(WorkbookReader.scala:33)
	at com.crealytics.spark.excel.DefaultWorkbookReader.withWorkbook(WorkbookReader.scala:92)
	at com.crealytics.spark.excel.ExcelRelation.excerpt$lzycompute(ExcelRelation.scala:48)
	at com.crealytics.spark.excel.ExcelRelation.excerpt(ExcelRelation.scala:48)
	at com.crealytics.spark.excel.ExcelRelation.headerColumns$lzycompute(ExcelRelation.scala:121)
	at com.crealytics.spark.excel.ExcelRelation.headerColumns(ExcelRelation.scala:120)
	at com.crealytics.spark.excel.ExcelRelation.$anonfun$inferSchema$1(ExcelRelation.scala:189)
	at scala.Option.getOrElse(Option.scala:189)
	at com.crealytics.spark.excel.ExcelRelation.inferSchema(ExcelRelation.scala:188)
	at com.crealytics.spark.excel.ExcelRelation.&amp;lt;init&amp;gt;(ExcelRelation.scala:52)
	at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:52)
	at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:29)
	at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:24)
	at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:385)
	at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:368)
	at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:324)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:324)
	at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:237)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
	at py4j.Gateway.invoke(Gateway.java:306)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
	at java.lang.Thread.run(Thread.java:750)&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I have installed "com.crealytics:spark-excel_2.12:3.3.1_0.18.5" on the cluster.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I use the same code for another dataset (single-sheet, single-row header, and cleaned dataset). And, I don't get any error. &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Any idea to solve this issue? &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 20 Nov 2022 05:51:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/error-when-reading-excel-file-quot-org-apache-poi-ooxml/m-p/21573#M1181</guid>
      <dc:creator>Mado</dc:creator>
      <dc:date>2022-11-20T05:51:18Z</dc:date>
    </item>
    <item>
      <title>Re: Error when reading Excel file: "org.apache.poi.ooxml.POIXMLException: Strict OOXML isn't cu</title>
      <link>https://community.databricks.com/t5/machine-learning/error-when-reading-excel-file-quot-org-apache-poi-ooxml/m-p/114443#M4014</link>
      <description>&lt;P&gt;copying the data onto a newer file solved my issue. Likely issue related to files metadata!&amp;nbsp; &amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 03 Apr 2025 18:23:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/error-when-reading-excel-file-quot-org-apache-poi-ooxml/m-p/114443#M4014</guid>
      <dc:creator>Eag_le</dc:creator>
      <dc:date>2025-04-03T18:23:23Z</dc:date>
    </item>
  </channel>
</rss>

