I am trying to read a excel file which has 3 sheets which have integers as there names,
sheet 1 name = 21
sheet 2 name = 24
sheet 3 name = 224
i got this data from a user so I can't change the sheet name, but with spark reading these is an issue.
code -
val sheetName = "provided by user"
val df = spark.read
.format("com.crealytics.spark.excel").
option("header", "true").
option("inferSchema", "false").
option("dataAddress", f"$sheetName").
load("/home/sarveshks/data/xl.xlsx")
df.show(5)
stack -
Exception in thread "main" java.lang.IllegalStateException: Cannot get a STRING value from a NUMERIC cell
at shadeio.poi.xssf.usermodel.XSSFCell.typeMismatch(XSSFCell.java:1035)
at shadeio.poi.xssf.usermodel.XSSFCell.getRichStringCellValue(XSSFCell.java:390)
at shadeio.poi.xssf.usermodel.XSSFCell.getStringCellValue(XSSFCell.java:342)
at com.crealytics.spark.excel.ExcelRelation.colName$1(ExcelRelation.scala:125)
at com.crealytics.spark.excel.ExcelRelation.$anonfun$headerColumns$11(ExcelRelation.scala:128)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:285)
at scala.collection.Iterator.foreach(Iterator.scala:943)
at scala.collection.Iterator.foreach$(Iterator.scala:943)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at scala.collection.TraversableLike.map(TraversableLike.scala:285)
at scala.collection.TraversableLike.map$(TraversableLike.scala:278)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at com.crealytics.spark.excel.ExcelRelation.$anonfun$headerColumns$1(ExcelRelation.scala:128)
at scala.Option.getOrElse(Option.scala:189)
at com.crealytics.spark.excel.ExcelRelation.headerColumns$lzycompute(ExcelRelation.scala:107)
at com.crealytics.spark.excel.ExcelRelation.headerColumns(ExcelRelation.scala:103)
at com.crealytics.spark.excel.ExcelRelation.$anonfun$inferSchema$1(ExcelRelation.scala:172)
at scala.Option.getOrElse(Option.scala:189)
at com.crealytics.spark.excel.ExcelRelation.inferSchema(ExcelRelation.scala:171)
at com.crealytics.spark.excel.ExcelRelation.<init>(ExcelRelation.scala:36)
at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:36)
at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:13)
at com.crealytics.spark.excel.DefaultSource.createRelation(DefaultSource.scala:8)
at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:339)
at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:279)
at org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:268)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:268)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:214)
at com.sundogsoftware.spark.excel$.delayedEndpoint$com$sundogsoftware$spark$excel$1(excel.scala:35)
at com.sundogsoftware.spark.excel$delayedInit$body.apply(excel.scala:10)
at scala.Function0.apply$mcV$sp(Function0.scala:39)
at scala.Function0.apply$mcV$sp$(Function0.scala:39)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:17)
at scala.App.$anonfun$main$1$adapted(App.scala:80)
at scala.collection.immutable.List.foreach(List.scala:431)
at scala.App.main(App.scala:80)
at scala.App.main$(App.scala:78)
at com.sundogsoftware.spark.excel$.main(excel.scala:10)
at com.sundogsoftware.spark.excel.main(excel.scala)
I know what this error is trying to say, what i want is to read different sheets by there indexes that is
sheet name 21 has index 0
sheet name 24 has index 1
i want to read the sheets by there index not by there names.