<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Autoloader Excel Files in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/38013#M26542</link>
    <description>&lt;P&gt;What is the workaround ? Is it to transform the excel to a csv file ?&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 20 Jul 2023 09:19:44 GMT</pubDate>
    <dc:creator>erigaud</dc:creator>
    <dc:date>2023-07-20T09:19:44Z</dc:date>
    <item>
      <title>Autoloader Excel Files</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37943#M26517</link>
      <description>&lt;P&gt;Hello, I looked at the documentation but could not find what I wanted. Is there a way to load Excel files using an autoloader and if yes, what options should be given to specify format, sheet name etc ?&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you friends !&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2023 09:39:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37943#M26517</guid>
      <dc:creator>erigaud</dc:creator>
      <dc:date>2023-07-19T09:39:42Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader Excel Files</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37947#M26518</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/84270"&gt;@erigaud&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You can use&amp;nbsp;&lt;STRONG&gt;spark.read.format("com.crealytics.spark.excel") &lt;/STRONG&gt;while reading excel files using autoloader and to specify format you need to provide&amp;nbsp;&lt;STRONG&gt;com.crealytics.spark.excel &lt;/STRONG&gt;and to specify sheet name you can provide it under options.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Please find the below example code to read&amp;nbsp;load Excel files using an autoloader:&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;spark.read.format("com.crealytics.spark.excel") \&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;.option("header", "true") \&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;.option("dataAddress", "'Data - Current'!A1") \&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;.option("treatEmptyValuesAsNulls", "true") \&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;.option("inferSchema", "true") \&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;.load(location)&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;And you can also use pandas to read excel files:&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;import pandas as pd&lt;/STRONG&gt;&lt;/P&gt;&lt;DIV&gt;&lt;STRONG&gt;pd.read_excel(file ,sheet_name = 0, index_col = 0)&lt;/STRONG&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 19 Jul 2023 11:12:34 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37947#M26518</guid>
      <dc:creator>Vinay_M_R</dc:creator>
      <dc:date>2023-07-19T11:12:34Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader Excel Files</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37948#M26519</link>
      <description>&lt;P&gt;Thank you very much, great solution !&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2023 11:23:12 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37948#M26519</guid>
      <dc:creator>erigaud</dc:creator>
      <dc:date>2023-07-19T11:23:12Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader Excel Files</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37953#M26520</link>
      <description>&lt;P&gt;&lt;SPAN&gt;Unfortunately, Databricks autoloader doesn't support Excel file types to incrementally load new files.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Link:&lt;/SPAN&gt;&lt;SPAN&gt;&lt;A href="https://docs.databricks.com/ingestion/auto-loader/options.html" target="_self"&gt;https://docs.databricks.com/ingestion/auto-loader/options.html&lt;/A&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;If your excel file contains a single sheet then there is a workaround.&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2023 11:43:40 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37953#M26520</guid>
      <dc:creator>Hemant</dc:creator>
      <dc:date>2023-07-19T11:43:40Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader Excel Files</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37962#M26523</link>
      <description>&lt;P&gt;Where autoloader is being used in the above snippet &lt;span class="lia-unicode-emoji" title=":thinking_face:"&gt;🤔&lt;/span&gt; ?&lt;/P&gt;</description>
      <pubDate>Wed, 19 Jul 2023 15:31:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/37962#M26523</guid>
      <dc:creator>Hemant</dc:creator>
      <dc:date>2023-07-19T15:31:52Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader Excel Files</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/38013#M26542</link>
      <description>&lt;P&gt;What is the workaround ? Is it to transform the excel to a csv file ?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 20 Jul 2023 09:19:44 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/38013#M26542</guid>
      <dc:creator>erigaud</dc:creator>
      <dc:date>2023-07-20T09:19:44Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader Excel Files</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/38075#M26552</link>
      <description>&lt;P&gt;But using autoloader for the conversion into csv.&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jul 2023 03:42:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/38075#M26552</guid>
      <dc:creator>Hemant</dc:creator>
      <dc:date>2023-07-21T03:42:54Z</dc:date>
    </item>
    <item>
      <title>Re: Autoloader Excel Files</title>
      <link>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/38082#M26556</link>
      <description>&lt;P&gt;How would that work exactly ? I specify to the autoloader that the format is csv and it will be able to pick up the excel files, and load the sheet normally, even if the format is .xlsx ?&lt;/P&gt;</description>
      <pubDate>Fri, 21 Jul 2023 06:24:27 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/autoloader-excel-files/m-p/38082#M26556</guid>
      <dc:creator>erigaud</dc:creator>
      <dc:date>2023-07-21T06:24:27Z</dc:date>
    </item>
  </channel>
</rss>

