<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How can I preserve the data type of the delta tables while writing to Azure blob storage ? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/78145#M35473</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/107188"&gt;@Tiwarisk&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Thank you for reaching out to our community! We're here to help you.&lt;/SPAN&gt;&lt;SPAN&gt;To ensure we provide you with the best support, could you please take a moment to review the response and choose the one that best answers your question? Your feedback not only helps us assist you better but also benefits other community members who may have similar questions in the future.&lt;/SPAN&gt;&lt;SPAN&gt;If you found the answer helpful, consider giving it a kudo. If the response fully addresses your question, please mark it as the accepted solution. This will help us close the thread and ensure your question is resolved.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;We appreciate your participation and are here to assist you further if you need it!"&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Thanks,&lt;BR /&gt;Rishabh&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 10 Jul 2024 16:16:43 GMT</pubDate>
    <dc:creator>RishabhTiwari07</dc:creator>
    <dc:date>2024-07-10T16:16:43Z</dc:date>
    <item>
      <title>How can I preserve the data type of the delta tables while writing to Azure blob storage ?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/72709#M34589</link>
      <description>&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;I am writing a file using this but the data type of columns get changed while reading.&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp;df.write.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"com.crealytics.spark.excel"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"header"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;mode&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"overwrite"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;save&lt;/SPAN&gt;&lt;SPAN&gt;(path)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;Due to this I have to manual change every time as I can't change the datatype of my target. Is there any way I can write my file with the same datatypes?&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;DIV class=""&gt;[&lt;A class="" href="https://docs.microsoft.com/azure/databricks/error-messages/error-classes#delta_failed_to_merge_fields" target="_blank" rel="noopener noreferrer"&gt;DELTA_FAILED_TO_MERGE_FIELDS&lt;/A&gt;] Failed to merge fields 'quantity_volume' and 'quantity_volume' SQLSTATE: 22005&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;DIV class=""&gt;File &lt;SPAN class=""&gt;&amp;lt;command-788927452842747&amp;gt;, line 6&lt;/SPAN&gt; &lt;SPAN&gt;4&lt;/SPAN&gt; df&lt;SPAN&gt;.&lt;/SPAN&gt;printSchema() &lt;SPAN&gt;5&lt;/SPAN&gt; df&lt;SPAN&gt;.&lt;/SPAN&gt;count() &lt;SPAN class=""&gt;----&amp;gt; 6&lt;/SPAN&gt; df&lt;SPAN&gt;.&lt;/SPAN&gt;write&lt;SPAN&gt;.&lt;/SPAN&gt;mode(&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;overwrite&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;)&lt;SPAN&gt;.&lt;/SPAN&gt;saveAsTable(&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;cc.pg_vendor_product_id_hierarchy&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;)&lt;/DIV&gt;&lt;DIV class=""&gt;&lt;HR /&gt;&lt;/DIV&gt;&lt;DIV class=""&gt;File &lt;SPAN class=""&gt;/databricks/spark/python/pyspark/errors/exceptions/captured.py:230&lt;/SPAN&gt;, in &lt;SPAN class=""&gt;capture_sql_exception.&amp;lt;locals&amp;gt;.deco&lt;/SPAN&gt;&lt;SPAN class=""&gt;(*a, **kw)&lt;/SPAN&gt; &lt;SPAN&gt;226&lt;/SPAN&gt; converted &lt;SPAN&gt;=&lt;/SPAN&gt; convert_exception(e&lt;SPAN&gt;.&lt;/SPAN&gt;java_exception) &lt;SPAN&gt;227&lt;/SPAN&gt; &lt;SPAN class=""&gt;if&lt;/SPAN&gt; &lt;SPAN class=""&gt;not&lt;/SPAN&gt; &lt;SPAN&gt;isinstance&lt;/SPAN&gt;(converted, UnknownException): &lt;SPAN&gt;228&lt;/SPAN&gt; &lt;SPAN&gt;# Hide where the exception came from that shows a non-Pythonic&lt;/SPAN&gt; &lt;SPAN&gt;229&lt;/SPAN&gt; &lt;SPAN&gt;# JVM exception message.&lt;/SPAN&gt; &lt;SPAN class=""&gt;--&amp;gt; 230&lt;/SPAN&gt; &lt;SPAN class=""&gt;raise&lt;/SPAN&gt; converted &lt;SPAN class=""&gt;from&lt;/SPAN&gt; &lt;SPAN&gt;None&lt;/SPAN&gt; &lt;SPAN&gt;231&lt;/SPAN&gt; &lt;SPAN class=""&gt;else&lt;/SPAN&gt;: &lt;SPAN&gt;232&lt;/SPAN&gt; &lt;SPAN class=""&gt;raise&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 12 Jun 2024 04:40:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/72709#M34589</guid>
      <dc:creator>Tiwarisk</dc:creator>
      <dc:date>2024-06-12T04:40:16Z</dc:date>
    </item>
    <item>
      <title>Re: How can I preserve the data type of the delta tables while writing to Azure blob storage ?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/72741#M34593</link>
      <description>&lt;P&gt;Hi there&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/107188"&gt;@Tiwarisk&lt;/a&gt;,&lt;BR /&gt;if this is the major issue&amp;nbsp;&lt;/P&gt;&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/107188"&gt;@Tiwarisk&lt;/a&gt;&amp;nbsp;wrote:&lt;BR /&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;I am writing a file using this but the data type of columns get changed while reading.&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;&amp;nbsp;&lt;/BLOCKQUOTE&gt;&lt;P&gt;You can explicitly specify your table schema like this&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType

schema = StructType([
    StructField("column1", StringType(), True),
    StructField("column2", IntegerType(), True),
    StructField("column3", DoubleType(), True)
])&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Then you can read the Excel file like this&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;// Read the Excel file with the specified schema
val df = spark.read
  .format("com.crealytics.spark.excel")
  .option("header", "true")
  .schema(schema)  // Specify the schema here
  .load(path)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;After this when you write it won't cause trouble because When writing data to an Excel file using the `com.crealytics.spark.excel` format, you might encounter issues where the data types of the columns are altered. This happens because the Excel format doesn't natively support all Spark data types, and the conversion might not be perfect.&lt;/P&gt;&lt;BLOCKQUOTE&gt;&lt;HR /&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/107188"&gt;@Tiwarisk&lt;/a&gt;&amp;nbsp;wrote:&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;&amp;nbsp;df.write.&lt;/SPAN&gt;&lt;SPAN&gt;format&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"com.crealytics.spark.excel"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;option&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"header"&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"true"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;mode&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;"overwrite"&lt;/SPAN&gt;&lt;SPAN&gt;).&lt;/SPAN&gt;&lt;SPAN&gt;save&lt;/SPAN&gt;&lt;SPAN&gt;(path)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;HR /&gt;&lt;/BLOCKQUOTE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 12 Jun 2024 07:05:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/72741#M34593</guid>
      <dc:creator>ashraf1395</dc:creator>
      <dc:date>2024-06-12T07:05:14Z</dc:date>
    </item>
    <item>
      <title>Re: How can I preserve the data type of the delta tables while writing to Azure blob storage ?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/72745#M34594</link>
      <description>&lt;P&gt;I checked the library you are using to write to Excel and it seems there is a new version available that has improved data type handling.&lt;/P&gt;&lt;P&gt;&lt;EM&gt;&lt;A href="https://github.com/crealytics/spark-excel" target="_blank" rel="noopener"&gt;https://github.com/crealytics/spark-excel&lt;/A&gt;&lt;/EM&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;EM&gt;Spark-Excel V2 with data source API V2.0+, which supports loading from multiple files, corrupted record handling and some improvement on handling data types. See below for further details&lt;/EM&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;EM&gt;To use V2 implementation, just change your .format from&amp;nbsp;.format("com.crealytics.spark.excel")&amp;nbsp;to&amp;nbsp;.format("excel").&lt;/EM&gt;&lt;BR /&gt;&lt;BR /&gt;Check the github readme for details. If your dataframe has the same datatypes as the Excel table, I'm hoping this gives you some more luck.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 12 Jun 2024 07:22:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/72745#M34594</guid>
      <dc:creator>jacovangelder</dc:creator>
      <dc:date>2024-06-12T07:22:59Z</dc:date>
    </item>
    <item>
      <title>Re: How can I preserve the data type of the delta tables while writing to Azure blob storage ?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/72757#M34598</link>
      <description>&lt;P&gt;Do you need to write the data again in excel format ? Do you need it in that format ? If yes, while reading the excel file back, are you inferring the schema of the file ?&lt;/P&gt;</description>
      <pubDate>Wed, 12 Jun 2024 10:22:28 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/72757#M34598</guid>
      <dc:creator>UmaMahesh1</dc:creator>
      <dc:date>2024-06-12T10:22:28Z</dc:date>
    </item>
    <item>
      <title>Re: How can I preserve the data type of the delta tables while writing to Azure blob storage ?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/73722#M34656</link>
      <description>&lt;P&gt;yes inferschema is true&lt;/P&gt;</description>
      <pubDate>Thu, 13 Jun 2024 05:03:24 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/73722#M34656</guid>
      <dc:creator>Tiwarisk</dc:creator>
      <dc:date>2024-06-13T05:03:24Z</dc:date>
    </item>
    <item>
      <title>Re: How can I preserve the data type of the delta tables while writing to Azure blob storage ?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/78145#M35473</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/107188"&gt;@Tiwarisk&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Thank you for reaching out to our community! We're here to help you.&lt;/SPAN&gt;&lt;SPAN&gt;To ensure we provide you with the best support, could you please take a moment to review the response and choose the one that best answers your question? Your feedback not only helps us assist you better but also benefits other community members who may have similar questions in the future.&lt;/SPAN&gt;&lt;SPAN&gt;If you found the answer helpful, consider giving it a kudo. If the response fully addresses your question, please mark it as the accepted solution. This will help us close the thread and ensure your question is resolved.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;We appreciate your participation and are here to assist you further if you need it!"&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Thanks,&lt;BR /&gt;Rishabh&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 10 Jul 2024 16:16:43 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-can-i-preserve-the-data-type-of-the-delta-tables-while/m-p/78145#M35473</guid>
      <dc:creator>RishabhTiwari07</dc:creator>
      <dc:date>2024-07-10T16:16:43Z</dc:date>
    </item>
  </channel>
</rss>

