<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Generate sh2 hashkey while loading files to delta table in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13611#M8256</link>
    <description>&lt;P&gt;I have files in azure data lake. I am using autoloader to read the incremental files&lt;/P&gt;&lt;P&gt;files don't have  primary key to load, In this case i want to use some columns and generate an hashkey and use it as primary key to do changes.&lt;/P&gt;&lt;P&gt;In this case i want to load my initial file with haskkey column should be appended &lt;/P&gt;&lt;P&gt;and for microbatches  also hashkey needs to be appended .&lt;/P&gt;&lt;P&gt;but while i am using sh2 to generate hash key getting error&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;inp file1:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/923i1EB8692C53DEDC08/image-size/large?v=v2&amp;amp;px=999" role="button" title="image.png" alt="image.png" /&gt;&lt;/span&gt;inputpath = 'abfss://***@***.dfs.core.windows.net/test/'&lt;/P&gt;&lt;P&gt;df = spark.readStream.format("cloudFiles").option("cloudFiles.format","csv").option("cloudFiles.schemaEvolutionMode","rescue").option("cloudFIles.schemaLocation", checkpoint_path).load(inputpath)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df.withColumns("Hashkey",sha2(concat_ws(",",df['id'],df['product_Name'],df['Location'],df['offer_code']),256))&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;getting &lt;/P&gt;&lt;P&gt;AssertionError:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 05 Jan 2023 05:23:52 GMT</pubDate>
    <dc:creator>Vijaykumarj</dc:creator>
    <dc:date>2023-01-05T05:23:52Z</dc:date>
    <item>
      <title>Generate sh2 hashkey while loading files to delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13611#M8256</link>
      <description>&lt;P&gt;I have files in azure data lake. I am using autoloader to read the incremental files&lt;/P&gt;&lt;P&gt;files don't have  primary key to load, In this case i want to use some columns and generate an hashkey and use it as primary key to do changes.&lt;/P&gt;&lt;P&gt;In this case i want to load my initial file with haskkey column should be appended &lt;/P&gt;&lt;P&gt;and for microbatches  also hashkey needs to be appended .&lt;/P&gt;&lt;P&gt;but while i am using sh2 to generate hash key getting error&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;inp file1:&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="image.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/923i1EB8692C53DEDC08/image-size/large?v=v2&amp;amp;px=999" role="button" title="image.png" alt="image.png" /&gt;&lt;/span&gt;inputpath = 'abfss://***@***.dfs.core.windows.net/test/'&lt;/P&gt;&lt;P&gt;df = spark.readStream.format("cloudFiles").option("cloudFiles.format","csv").option("cloudFiles.schemaEvolutionMode","rescue").option("cloudFIles.schemaLocation", checkpoint_path).load(inputpath)&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;df.withColumns("Hashkey",sha2(concat_ws(",",df['id'],df['product_Name'],df['Location'],df['offer_code']),256))&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;getting &lt;/P&gt;&lt;P&gt;AssertionError:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 05 Jan 2023 05:23:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13611#M8256</guid>
      <dc:creator>Vijaykumarj</dc:creator>
      <dc:date>2023-01-05T05:23:52Z</dc:date>
    </item>
    <item>
      <title>Re: Generate sh2 hashkey while loading files to delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13612#M8257</link>
      <description>&lt;P&gt;Can you copy the whole error?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I bet that it should be withColumn not with withColumns (remove s)&lt;/P&gt;</description>
      <pubDate>Thu, 05 Jan 2023 10:15:46 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13612#M8257</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-01-05T10:15:46Z</dc:date>
    </item>
    <item>
      <title>Re: Generate sh2 hashkey while loading files to delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13614#M8259</link>
      <description>&lt;P&gt;&lt;/P&gt;&lt;P&gt;Sorry for delay in response, refer below for error&lt;span class="lia-inline-image-display-wrapper" image-alt="image"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/938i3B9C56913BE810A2/image-size/large?v=v2&amp;amp;px=999" role="button" title="image" alt="image" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 06 Jan 2023 02:15:00 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13614#M8259</guid>
      <dc:creator>Vijaykumarj</dc:creator>
      <dc:date>2023-01-06T02:15:00Z</dc:date>
    </item>
    <item>
      <title>Re: Generate sh2 hashkey while loading files to delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13615#M8260</link>
      <description>&lt;P&gt;Try withColumn. ​withColumns takes an array of columns and does something with them, like rename using regex. withColumn creates new column named whatever.&lt;/P&gt;</description>
      <pubDate>Fri, 06 Jan 2023 06:04:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13615#M8260</guid>
      <dc:creator>Jfoxyyc</dc:creator>
      <dc:date>2023-01-06T06:04:52Z</dc:date>
    </item>
    <item>
      <title>Re: Generate sh2 hashkey while loading files to delta table</title>
      <link>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13613#M8258</link>
      <description>&lt;P&gt;Hi , Could you please provide the error code?&lt;/P&gt;</description>
      <pubDate>Thu, 05 Jan 2023 18:43:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/generate-sh2-hashkey-while-loading-files-to-delta-table/m-p/13613#M8258</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2023-01-05T18:43:13Z</dc:date>
    </item>
  </channel>
</rss>

