<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Generate embeddings for 50 million rows in dataframe in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/generate-embeddings-for-50-million-rows-in-dataframe/m-p/138616#M50978</link>
    <description>&lt;P&gt;The easiest and most reliable way to generate embeddings for millions of rows is to let &lt;STRONG&gt;Databricks Vector Search&lt;/STRONG&gt; compute them automatically during synchronization from a Delta table.&lt;BR /&gt;Vector Search can &lt;STRONG&gt;generate embeddings for you&lt;/STRONG&gt;, keep them updated when new records are inserted or updated, and handle batching, scaling, and retries behind the scenes.&lt;/P&gt;&lt;P&gt;You don’t have to manually loop over rows or call a model serving endpoint, Vector Search handles that for you.&lt;/P&gt;&lt;P&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/generative-ai/create-query-vector-search" target="_blank" rel="noopener"&gt;https://learn.microsoft.com/en-us/azure/databricks/generative-ai/create-query-vector-search&lt;/A&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;Handles &lt;STRONG&gt;full backfill&lt;/STRONG&gt; (5M+ rows) efficiently&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Supports &lt;STRONG&gt;incremental updates&lt;/STRONG&gt; automatically via Delta change data&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;No manual code or loops required&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Fully managed and &lt;STRONG&gt;Unity Catalog–governed&lt;/STRONG&gt;&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 11 Nov 2025 16:08:42 GMT</pubDate>
    <dc:creator>bianca_unifeye</dc:creator>
    <dc:date>2025-11-11T16:08:42Z</dc:date>
    <item>
      <title>Generate embeddings for 50 million rows in dataframe</title>
      <link>https://community.databricks.com/t5/data-engineering/generate-embeddings-for-50-million-rows-in-dataframe/m-p/138594#M50975</link>
      <description>&lt;P&gt;Hello All,&lt;/P&gt;&lt;P&gt;I have dataframe with 5 million rows and before we can setup vector search endpoint against index, we want to generate embeddings column for each of those rows. Please suggest whats an optimal way to do this?&lt;/P&gt;&lt;P&gt;We are in development phase so we need to do full load but later we will need to do same for incremental load.&lt;/P&gt;&lt;P&gt;Thanks &amp;amp; Regards,&lt;/P&gt;&lt;P&gt;Vikram&lt;/P&gt;</description>
      <pubDate>Tue, 11 Nov 2025 13:22:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/generate-embeddings-for-50-million-rows-in-dataframe/m-p/138594#M50975</guid>
      <dc:creator>vikram_p</dc:creator>
      <dc:date>2025-11-11T13:22:56Z</dc:date>
    </item>
    <item>
      <title>Re: Generate embeddings for 50 million rows in dataframe</title>
      <link>https://community.databricks.com/t5/data-engineering/generate-embeddings-for-50-million-rows-in-dataframe/m-p/138616#M50978</link>
      <description>&lt;P&gt;The easiest and most reliable way to generate embeddings for millions of rows is to let &lt;STRONG&gt;Databricks Vector Search&lt;/STRONG&gt; compute them automatically during synchronization from a Delta table.&lt;BR /&gt;Vector Search can &lt;STRONG&gt;generate embeddings for you&lt;/STRONG&gt;, keep them updated when new records are inserted or updated, and handle batching, scaling, and retries behind the scenes.&lt;/P&gt;&lt;P&gt;You don’t have to manually loop over rows or call a model serving endpoint, Vector Search handles that for you.&lt;/P&gt;&lt;P&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/generative-ai/create-query-vector-search" target="_blank" rel="noopener"&gt;https://learn.microsoft.com/en-us/azure/databricks/generative-ai/create-query-vector-search&lt;/A&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;&lt;P&gt;Handles &lt;STRONG&gt;full backfill&lt;/STRONG&gt; (5M+ rows) efficiently&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Supports &lt;STRONG&gt;incremental updates&lt;/STRONG&gt; automatically via Delta change data&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;No manual code or loops required&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;Fully managed and &lt;STRONG&gt;Unity Catalog–governed&lt;/STRONG&gt;&lt;/P&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 11 Nov 2025 16:08:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/generate-embeddings-for-50-million-rows-in-dataframe/m-p/138616#M50978</guid>
      <dc:creator>bianca_unifeye</dc:creator>
      <dc:date>2025-11-11T16:08:42Z</dc:date>
    </item>
  </channel>
</rss>

