<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Need help to insert huge data into cosmos db from azure data lake storage using databricks in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13360#M8059</link>
    <description>&lt;P&gt;You have probably found a solution, but for others that end up here I got dramatic improvements using the Mongo connector to CosmosDB: &lt;A href="https://www.mongodb.com/docs/spark-connector/current/write-to-mongodb/" target="test_blank"&gt;https://www.mongodb.com/docs/spark-connector/current/write-to-mongodb/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Sun, 12 Feb 2023 03:06:03 GMT</pubDate>
    <dc:creator>SteveMeckstroth</dc:creator>
    <dc:date>2023-02-12T03:06:03Z</dc:date>
    <item>
      <title>Need help to insert huge data into cosmos db from azure data lake storage using databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13357#M8056</link>
      <description>&lt;P&gt;I am trying to insert 6GB of data into cosmos db using OLTP Connector&lt;/P&gt;&lt;P&gt;Container RU's:40000&lt;/P&gt;&lt;P&gt;Cluster Config:&lt;span class="lia-inline-image-display-wrapper" image-alt="image.png"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/1688iEE7A23AFC339B650/image-size/large?v=v2&amp;amp;px=999" role="button" title="image.png" alt="image.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;cfg = { 
  "spark.cosmos.accountEndpoint" : cosmosdbendpoint,
  "spark.cosmos.accountKey" : cosmosdbmasterkey,
  "spark.cosmos.database" : cosmosdatabase,
  "spark.cosmos.container" : cosmosdbcontainer,
}
&amp;nbsp;
spark.conf.set("spark.sql.catalog.cosmosCatalog", "com.azure.cosmos.spark.CosmosCatalog")
spark.conf.set("spark.sql.catalog.cosmosCatalog.spark.cosmos.accountEndpoint", cosmosdbendpoint)
spark.conf.set("spark.sql.catalog.cosmosCatalog.spark.cosmos.accountKey", cosmosdbmasterkey)
spark.conf.set("spark.cosmos.write.bulk.enabled", "true")
&amp;nbsp;
json_df.write.format("cosmos.oltp").options(**cfg).mode("APPEND").save()&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;It is taking around 3hrs for me to load into cosmos db&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;1.Is increasing RU's is the only approach to decrease the execution time&lt;/P&gt;&lt;P&gt;2.Other than OLTP connector, do we have any ways to insert bulk data within less time&lt;/P&gt;&lt;P&gt;3.How to calculate RU's based on data size&lt;/P&gt;</description>
      <pubDate>Fri, 15 Jul 2022 08:30:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13357#M8056</guid>
      <dc:creator>manasa</dc:creator>
      <dc:date>2022-07-15T08:30:11Z</dc:date>
    </item>
    <item>
      <title>Re: Need help to insert huge data into cosmos db from azure data lake storage using databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13359#M8058</link>
      <description>&lt;P&gt;Hi @Kaniz Fatma​&amp;nbsp;, my problem is not with the resources. I tried every thing mentioned in the article but I need to insert bulk data in less time(def not 3hrs for 6gb data).So, I am looking for a optimized way.&lt;/P&gt;</description>
      <pubDate>Fri, 15 Jul 2022 12:45:11 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13359#M8058</guid>
      <dc:creator>manasa</dc:creator>
      <dc:date>2022-07-15T12:45:11Z</dc:date>
    </item>
    <item>
      <title>Re: Need help to insert huge data into cosmos db from azure data lake storage using databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13360#M8059</link>
      <description>&lt;P&gt;You have probably found a solution, but for others that end up here I got dramatic improvements using the Mongo connector to CosmosDB: &lt;A href="https://www.mongodb.com/docs/spark-connector/current/write-to-mongodb/" target="test_blank"&gt;https://www.mongodb.com/docs/spark-connector/current/write-to-mongodb/&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sun, 12 Feb 2023 03:06:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13360#M8059</guid>
      <dc:creator>SteveMeckstroth</dc:creator>
      <dc:date>2023-02-12T03:06:03Z</dc:date>
    </item>
    <item>
      <title>Re: Need help to insert huge data into cosmos db from azure data lake storage using databricks</title>
      <link>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13361#M8060</link>
      <description>&lt;P&gt;Did anyone find solution for this, I’m also using similar clutter and RAU and data ingestion taking lot of time….?&lt;/P&gt;</description>
      <pubDate>Wed, 22 Feb 2023 21:31:08 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/need-help-to-insert-huge-data-into-cosmos-db-from-azure-data/m-p/13361#M8060</guid>
      <dc:creator>ImAbhishekTomar</dc:creator>
      <dc:date>2023-02-22T21:31:08Z</dc:date>
    </item>
  </channel>
</rss>

