<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Reading Excel files folder in Community Articles</title>
    <link>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/90579#M272</link>
    <description>&lt;P&gt;&lt;SPAN&gt;Hi&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://community.databricks.com/t5/user/viewprofilepage/user-id/109757" target="_blank"&gt;@AhmedAlnaqa&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;,&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;Can we read from ADLS location too by using abfss ?&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
    <pubDate>Mon, 16 Sep 2024 13:33:49 GMT</pubDate>
    <dc:creator>maddy08</dc:creator>
    <dc:date>2024-09-16T13:33:49Z</dc:date>
    <item>
      <title>Reading Excel files folder</title>
      <link>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/77116#M164</link>
      <description>&lt;P&gt;Dears,&lt;/P&gt;&lt;P&gt;One of the tasks needed by DE is to ingest data from files, for example, Excel file.&lt;/P&gt;&lt;P&gt;Thanks for&amp;nbsp;OnerFusion-AI for the below thread that give us the steps of reading from one file&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.databricks.com/t5/get-started-discussions/how-to-import-excel-on-databricks/td-p/44915" target="_blank"&gt;https://community.databricks.com/t5/get-started-discussions/how-to-import-excel-on-databricks/td-p/44915&lt;/A&gt;&lt;/P&gt;&lt;P&gt;in addition, I provide the below code in case of reading all the Excel files in a folder:&lt;/P&gt;&lt;P&gt;IMP Note:&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;- All files must have the same structure.&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;Steps:&lt;/P&gt;&lt;P&gt;1- You need to upload the Excel files under a DBFS folder.&lt;/P&gt;&lt;P&gt;2- Use the below code to read each file and combine them to a single CSV file&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from pyspark.sql import SparkSession

# Create a SparkSession
spark = SparkSession.builder \
    .appName("ReadExcelWithHeader") \
    .config("spark.jars.packages", "com.crealytics:spark-excel_2.12:0.13.5") \
    .getOrCreate()

# Define the directory containing Excel files
excel_dir_path = "/FileStore/tables"

# List all files in the directory using dbutils.fs.ls
all_files = dbutils.fs.ls(excel_dir_path)

# Filter to get only the .xlsx files
excel_files = [file.path for file in all_files if file.path.endswith(".xlsx")]

# Initialize an empty DataFrame
df_combined = None

# Loop through each Excel file and read it into a DataFrame
for excel_file in excel_files:
    df = spark.read \
        .format("com.crealytics.spark.excel") \
        .option("header", "true") \
        .option("inferSchema", "true") \
        .load(excel_file)
    
    # Combine the DataFrames
    if df_combined is None:
        df_combined = df
    else:
        df_combined = df_combined.union(df)

# Check if df_combined is not None before writing to CSV
if df_combined is not None:
    # Define the output CSV file path
    csv_file_path = "/FileStore/tables/output_file.csv"
    
    # Save the combined DataFrame as a CSV file
    df_combined.write.csv(csv_file_path, header=True, mode='overwrite')
    
    print(f"Excel files in {excel_dir_path} have been successfully converted to {csv_file_path}")
else:
    print(f"No Excel files found in {excel_dir_path}")

# Stop the Spark session
#spark.stop()&lt;/LI-CODE&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 08 Jul 2024 09:58:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/77116#M164</guid>
      <dc:creator>AhmedAlnaqa</dc:creator>
      <dc:date>2024-07-08T09:58:11Z</dc:date>
    </item>
    <item>
      <title>Re: Reading Excel files folder</title>
      <link>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/77151#M166</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/109757"&gt;@AhmedAlnaqa&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;Thank you for sharing this. I am sure it will help other community members.&lt;/P&gt;
&lt;P&gt;Thanks,&lt;/P&gt;
&lt;P&gt;Rishabh&lt;/P&gt;</description>
      <pubDate>Mon, 08 Jul 2024 13:30:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/77151#M166</guid>
      <dc:creator>RishabhTiwari07</dc:creator>
      <dc:date>2024-07-08T13:30:29Z</dc:date>
    </item>
    <item>
      <title>Re: Reading Excel files folder</title>
      <link>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/90579#M272</link>
      <description>&lt;P&gt;&lt;SPAN&gt;Hi&amp;nbsp;&lt;/SPAN&gt;&lt;A href="https://community.databricks.com/t5/user/viewprofilepage/user-id/109757" target="_blank"&gt;@AhmedAlnaqa&lt;/A&gt;&lt;SPAN&gt;&amp;nbsp;,&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;Can we read from ADLS location too by using abfss ?&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Mon, 16 Sep 2024 13:33:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/90579#M272</guid>
      <dc:creator>maddy08</dc:creator>
      <dc:date>2024-09-16T13:33:49Z</dc:date>
    </item>
    <item>
      <title>Re: Reading Excel files folder</title>
      <link>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/90588#M273</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/121099"&gt;@maddy08&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;&lt;P&gt;You can read from abfss using &lt;STRONG&gt;com.crealytics:spark-excel.&amp;nbsp;&lt;/STRONG&gt;You can refer to the below video as an example:&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.youtube.com/watch?v=4vSbanmUtuI&amp;amp;ab_channel=CloudFitness" target="_blank"&gt;Read excel file in databricks using python and scala #spark (youtube.com)&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 16 Sep 2024 14:08:30 GMT</pubDate>
      <guid>https://community.databricks.com/t5/community-articles/reading-excel-files-folder/m-p/90588#M273</guid>
      <dc:creator>szymon_dybczak</dc:creator>
      <dc:date>2024-09-16T14:08:30Z</dc:date>
    </item>
  </channel>
</rss>

