<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: You can use apache hudi in databricks without a problem: - in cluster settings, install Maven library org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0... in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/you-can-use-apache-hudi-in-databricks-without-a-problem-in/m-p/6936#M2934</link>
    <description>&lt;P&gt;I tried installing library and configuring spark configs, restarted the cluster and then in notebook ran the create cmd but it gives me error stating &lt;/P&gt;&lt;P&gt;java.io.FileNotFoundException: No such file or directory: s3://incred-databricks-data/hudi_dms_data/hudi_cow_pt_tbl&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My cmd in python notebook : &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
create table hudi_cow_pt_tbl (
id bigint,
name string,
ts bigint,
dt string,
hh string
) using hudi
tblproperties (
type = 'cow',
primaryKey = 'id',
preCombineField = 'ts'
)
partitioned by (dt, hh)
location 's3://incred-databricks-data/hudi_dms_data/hudi_cow_pt_tbl';&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;And also this doesn't work and gives error : ModuleNotFoundError: No module named 'org.apache.hudi'&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="Library"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/487iEC6E2F6451048AC5/image-size/large?v=v2&amp;amp;px=999" role="button" title="Library" alt="Library" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="configs"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/460iB7708A5FCEF9562C/image-size/large?v=v2&amp;amp;px=999" role="button" title="configs" alt="configs" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="12.2 LTS"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/467i5A20E8DC60ABBDE0/image-size/large?v=v2&amp;amp;px=999" role="button" title="12.2 LTS" alt="12.2 LTS" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
    <pubDate>Tue, 16 May 2023 06:34:31 GMT</pubDate>
    <dc:creator>ros</dc:creator>
    <dc:date>2023-05-16T06:34:31Z</dc:date>
    <item>
      <title>You can use apache hudi in databricks without a problem: - in cluster settings, install Maven library org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0...</title>
      <link>https://community.databricks.com/t5/data-engineering/you-can-use-apache-hudi-in-databricks-without-a-problem-in/m-p/6934#M2932</link>
      <description>&lt;P&gt;You can use apache &lt;A href="https://www.linkedin.com/feed/hashtag/?keywords=hudi&amp;amp;highlightedUpdateUrns=urn%3Ali%3Aactivity%3A7046177021705510913" alt="https://www.linkedin.com/feed/hashtag/?keywords=hudi&amp;amp;highlightedUpdateUrns=urn%3Ali%3Aactivity%3A7046177021705510913" target="_blank"&gt;hudi&lt;/A&gt; in &lt;A href="https://www.linkedin.com/feed/hashtag/?keywords=databricks&amp;amp;highlightedUpdateUrns=urn%3Ali%3Aactivity%3A7046177021705510913" alt="https://www.linkedin.com/feed/hashtag/?keywords=databricks&amp;amp;highlightedUpdateUrns=urn%3Ali%3Aactivity%3A7046177021705510913" target="_blank"&gt;databricks&lt;/A&gt; without a problem:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;- in cluster settings, install Maven library org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0 for Databricks 12.2 LTS&lt;/P&gt;&lt;P&gt;- in cluster spark config, add three lines:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;spark.sql.extensions org.apache.spark.sql.hudi.HoodieSparkSessionExtension
spark.sql.catalog.spark_catalog org.apache.spark.sql.hudi.catalog.HoodieCatalog
spark.serializer org.apache.spark.serializer.KryoSerializer&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Happy streaming with hudi!&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="hudi"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/468iD33017AEAFDBBC85/image-size/large?v=v2&amp;amp;px=999" role="button" title="hudi" alt="hudi" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 11:46:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/you-can-use-apache-hudi-in-databricks-without-a-problem-in/m-p/6934#M2932</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-03-28T11:46:13Z</dc:date>
    </item>
    <item>
      <title>Re: You can use apache hudi in databricks without a problem: - in cluster settings, install Maven library org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0...</title>
      <link>https://community.databricks.com/t5/data-engineering/you-can-use-apache-hudi-in-databricks-without-a-problem-in/m-p/6935#M2933</link>
      <description>&lt;P&gt;Thanks @Hubert Dudek​. Is there any documentation available comparing the Hudi and Delta lake table formats ? &lt;/P&gt;</description>
      <pubDate>Tue, 28 Mar 2023 16:04:05 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/you-can-use-apache-hudi-in-databricks-without-a-problem-in/m-p/6935#M2933</guid>
      <dc:creator>pvignesh92</dc:creator>
      <dc:date>2023-03-28T16:04:05Z</dc:date>
    </item>
    <item>
      <title>Re: You can use apache hudi in databricks without a problem: - in cluster settings, install Maven library org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0...</title>
      <link>https://community.databricks.com/t5/data-engineering/you-can-use-apache-hudi-in-databricks-without-a-problem-in/m-p/6936#M2934</link>
      <description>&lt;P&gt;I tried installing library and configuring spark configs, restarted the cluster and then in notebook ran the create cmd but it gives me error stating &lt;/P&gt;&lt;P&gt;java.io.FileNotFoundException: No such file or directory: s3://incred-databricks-data/hudi_dms_data/hudi_cow_pt_tbl&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;My cmd in python notebook : &lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;%sql
create table hudi_cow_pt_tbl (
id bigint,
name string,
ts bigint,
dt string,
hh string
) using hudi
tblproperties (
type = 'cow',
primaryKey = 'id',
preCombineField = 'ts'
)
partitioned by (dt, hh)
location 's3://incred-databricks-data/hudi_dms_data/hudi_cow_pt_tbl';&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;And also this doesn't work and gives error : ModuleNotFoundError: No module named 'org.apache.hudi'&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;import org.apache.hudi.DataSourceReadOptions._
import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.config.HoodieWriteConfig._&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="Library"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/487iEC6E2F6451048AC5/image-size/large?v=v2&amp;amp;px=999" role="button" title="Library" alt="Library" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="configs"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/460iB7708A5FCEF9562C/image-size/large?v=v2&amp;amp;px=999" role="button" title="configs" alt="configs" /&gt;&lt;/span&gt;&lt;span class="lia-inline-image-display-wrapper" image-alt="12.2 LTS"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/467i5A20E8DC60ABBDE0/image-size/large?v=v2&amp;amp;px=999" role="button" title="12.2 LTS" alt="12.2 LTS" /&gt;&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 16 May 2023 06:34:31 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/you-can-use-apache-hudi-in-databricks-without-a-problem-in/m-p/6936#M2934</guid>
      <dc:creator>ros</dc:creator>
      <dc:date>2023-05-16T06:34:31Z</dc:date>
    </item>
  </channel>
</rss>

