<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Databricks pyspark - Find columns in xls file. in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32606#M23759</link>
    <description>&lt;P&gt;Hello everyone, every day I extract data into xls files but the column position changes every day. Is there any way to find these columns within the file?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here's a snippet of my code.&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = spark.read.format("com.crealytics.spark.excel")\
  .option("header", "true")\
  .schema(schema)\
  .option("dataAddress", "'releases'!A27:D78") \
  .load("dbfs:/FileStore/tables/invoice_september.xls")
df.display()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Sat, 03 Sep 2022 15:14:54 GMT</pubDate>
    <dc:creator>weldermartins</dc:creator>
    <dc:date>2022-09-03T15:14:54Z</dc:date>
    <item>
      <title>Databricks pyspark - Find columns in xls file.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32606#M23759</link>
      <description>&lt;P&gt;Hello everyone, every day I extract data into xls files but the column position changes every day. Is there any way to find these columns within the file?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Here's a snippet of my code.&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;df = spark.read.format("com.crealytics.spark.excel")\
  .option("header", "true")\
  .schema(schema)\
  .option("dataAddress", "'releases'!A27:D78") \
  .load("dbfs:/FileStore/tables/invoice_september.xls")
df.display()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 03 Sep 2022 15:14:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32606#M23759</guid>
      <dc:creator>weldermartins</dc:creator>
      <dc:date>2022-09-03T15:14:54Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks pyspark - Find columns in xls file.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32608#M23761</link>
      <description>&lt;P&gt;You can also do df.printSchema() to check. Or even dbutils.fs.head(&amp;lt;file_path&amp;gt;) to check the header's position. Docs &lt;A href="https://docs.databricks.com/dev-tools/databricks-utils.html" target="test_blank"&gt;https://docs.databricks.com/dev-tools/databricks-utils.html&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 09 Sep 2022 23:18:01 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32608#M23761</guid>
      <dc:creator>jose_gonzalez</dc:creator>
      <dc:date>2022-09-09T23:18:01Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks pyspark - Find columns in xls file.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32609#M23762</link>
      <description>&lt;P&gt;Hi @welder martins​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hope all is well! Just wanted to check in if you were able to resolve your issue and would you be happy to share the solution or mark an answer as best? Else please let us know if you need more help.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We'd love to hear from you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 21 Sep 2022 05:20:47 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32609#M23762</guid>
      <dc:creator>Vidula</dc:creator>
      <dc:date>2022-09-21T05:20:47Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks pyspark - Find columns in xls file.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32610#M23763</link>
      <description>&lt;P&gt;Hello, come shape. Thanks!&lt;/P&gt;</description>
      <pubDate>Thu, 06 Oct 2022 12:07:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32610#M23763</guid>
      <dc:creator>weldermartins</dc:creator>
      <dc:date>2022-10-06T12:07:12Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks pyspark - Find columns in xls file.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32607#M23760</link>
      <description>&lt;P&gt;Hi, Thanks for reaching out to community.databricks.com.&lt;/P&gt;&lt;P&gt;Please refer and let us know if this helps, you can find column names: &lt;A href="https://sparkbyexamples.com/pyspark/pyspark-find-datatype-column-names-of-dataframe/" alt="https://sparkbyexamples.com/pyspark/pyspark-find-datatype-column-names-of-dataframe/" target="_blank"&gt;https://sparkbyexamples.com/pyspark/pyspark-find-datatype-column-names-of-dataframe/&lt;/A&gt; &lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 05 Sep 2022 22:29:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-pyspark-find-columns-in-xls-file/m-p/32607#M23760</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2022-09-05T22:29:37Z</dc:date>
    </item>
  </channel>
</rss>

