<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Pyspark Merge parquet and delta file in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/pyspark-merge-parquet-and-delta-file/m-p/2829#M19</link>
    <description>&lt;P&gt;Is it possible to use merge command when source file is parquet and destination file is delta? Or both files must delta files? &lt;/P&gt;&lt;P&gt;Currently, I'm using this code and I transform parquet into delta and it works. But I want to avoid of this tranformation.&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from delta.tables import *
&amp;nbsp;
deltaTablePeople = DeltaTable.forPath(spark, 'abfss://destination-delta')
deltaTablePeopleUpdates = DeltaTable.forPath(spark, 'abfss://source-parquet')
&amp;nbsp;
dfUpdates = deltaTablePeopleUpdates.toDF()
&amp;nbsp;
deltaTablePeople.alias('people') \
  .merge(
    dfUpdates.alias('updates'),
    'people.id = updates.id'
  ) \
  .whenMatchedUpdate(set =...&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 20 Jun 2023 09:02:48 GMT</pubDate>
    <dc:creator>alesventus</dc:creator>
    <dc:date>2023-06-20T09:02:48Z</dc:date>
    <item>
      <title>Pyspark Merge parquet and delta file</title>
      <link>https://community.databricks.com/t5/machine-learning/pyspark-merge-parquet-and-delta-file/m-p/2829#M19</link>
      <description>&lt;P&gt;Is it possible to use merge command when source file is parquet and destination file is delta? Or both files must delta files? &lt;/P&gt;&lt;P&gt;Currently, I'm using this code and I transform parquet into delta and it works. But I want to avoid of this tranformation.&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;from delta.tables import *
&amp;nbsp;
deltaTablePeople = DeltaTable.forPath(spark, 'abfss://destination-delta')
deltaTablePeopleUpdates = DeltaTable.forPath(spark, 'abfss://source-parquet')
&amp;nbsp;
dfUpdates = deltaTablePeopleUpdates.toDF()
&amp;nbsp;
deltaTablePeople.alias('people') \
  .merge(
    dfUpdates.alias('updates'),
    'people.id = updates.id'
  ) \
  .whenMatchedUpdate(set =...&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 20 Jun 2023 09:02:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/pyspark-merge-parquet-and-delta-file/m-p/2829#M19</guid>
      <dc:creator>alesventus</dc:creator>
      <dc:date>2023-06-20T09:02:48Z</dc:date>
    </item>
    <item>
      <title>Re: Pyspark Merge parquet and delta file</title>
      <link>https://community.databricks.com/t5/machine-learning/pyspark-merge-parquet-and-delta-file/m-p/2831#M21</link>
      <description>&lt;P&gt;Hi @Ales ventus​&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;We haven't heard from you since the last response from @Kaniz Fatma​&amp;nbsp;, and I was checking back to see if her suggestions helped you.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Or else, If you have any solution, please share it with the community, as it can be helpful to others.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Also, Please don't forget to click on the "Select As Best" button whenever the information provided helps resolve your question.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 21 Jun 2023 03:16:32 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/pyspark-merge-parquet-and-delta-file/m-p/2831#M21</guid>
      <dc:creator>Anonymous</dc:creator>
      <dc:date>2023-06-21T03:16:32Z</dc:date>
    </item>
  </channel>
</rss>

