Hello, yes of course you need to write the excel file in the tmp folder, but then you can move it to whatever you want without problem. In my current project we implemented this method to create the file in the tmp folder, and then move it to one specific folder in an Azure Blob Container.
def export_to_excel(dbutils, df, file_name, result_path):
from pyspark.sql.functions import row_number, monotonically_increasing_id
from pyspark.sql import Window
# Const that means the excel limit rows
excel_limit = 1048576 - 1 # for header
num_rows = df.count()
num_files = num_rows / excel_limit
if num_files < 1:
with pd.ExcelWriter(f"/databricks/driver/{file_name}.xlsx", engine='xlsxwriter') as writer:
df_export = df.toPandas()
df_export.to_excel(writer, sheet_name=tab_names[0], index=False)
secondary_df.to_excel(writer, sheet_name=tab_names[1], index=False)
writer.save()
print("moving fileName: {0} from src directory: /databricks/driver/ to dst directory: {1} ".format(file_name, result_path))
dbutils.fs.mv(f"file:/databricks/driver/{file_name}.xlsx", f"{result_path}{file_name}.xlsx")
else:
df = df.withColumn("row_idx", row_number().over(Window.orderBy(monotonically_increasing_id())))
i = 0
while i < num_files:
df_export = df.filter((df.row_idx >= excel_limit*i) & (df.row_idx < excel_limit*(i+1)))
i = i+1
print('saving ' + str(df_export.count()) + ' rows in ' + file_name + '_part' + str(i) + '.xlsx')
df_export = df_export.drop(df_export.row_idx).toPandas()
# Save the file in the cluster path for no exceptions
df_export.to_excel("{0}_part{1}.xlsx".format(file_name, i), engine='xlsxwriter', index=False, header=True)
# move the file from cluster disk to dbfs
print("moving fileName: {0}_part{1} in src directory: /databricks/driver/ to dst directory: {2} ".format(file_name, i, result_path))
dbutils.fs.mv("file:/databricks/driver/{0}_part{1}.xlsx".format(file_name, i), "{2}{0}_part{1}.xlsx".format(file_name, i, result_path))
PS: You need to install the xlsxwritter python library, in order to be able to execute this method and generate the excel file.