<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Generate embeddings from third party API in Delta Live Tables in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/generate-embeddings-from-third-party-api-in-delta-live-tables/m-p/71628#M34359</link>
    <description>&lt;P class=""&gt;Hi&lt;/P&gt;&lt;P class=""&gt;We currently have a Delta Live Tables flow through which textual data is flowing. As a final enrichment step we would also want to generate embeddings using a third party api provider (probably &lt;A href="https://www.voyageai.com/" target="_blank" rel="noopener"&gt;&lt;SPAN class=""&gt;Voyage.AI&lt;/SPAN&gt;&lt;/A&gt;). They support batch embedding which would greatly help speed this up (but we would still rate limits).&lt;/P&gt;&lt;P class=""&gt;Is it best-practice to include this as a UDF in the DLT flow? Or should these be generated in another step?&lt;/P&gt;&lt;P class=""&gt;Thanks&lt;/P&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 04 Jun 2024 14:58:29 GMT</pubDate>
    <dc:creator>LucasBelpaire</dc:creator>
    <dc:date>2024-06-04T14:58:29Z</dc:date>
    <item>
      <title>Generate embeddings from third party API in Delta Live Tables</title>
      <link>https://community.databricks.com/t5/data-engineering/generate-embeddings-from-third-party-api-in-delta-live-tables/m-p/71628#M34359</link>
      <description>&lt;P class=""&gt;Hi&lt;/P&gt;&lt;P class=""&gt;We currently have a Delta Live Tables flow through which textual data is flowing. As a final enrichment step we would also want to generate embeddings using a third party api provider (probably &lt;A href="https://www.voyageai.com/" target="_blank" rel="noopener"&gt;&lt;SPAN class=""&gt;Voyage.AI&lt;/SPAN&gt;&lt;/A&gt;). They support batch embedding which would greatly help speed this up (but we would still rate limits).&lt;/P&gt;&lt;P class=""&gt;Is it best-practice to include this as a UDF in the DLT flow? Or should these be generated in another step?&lt;/P&gt;&lt;P class=""&gt;Thanks&lt;/P&gt;&lt;P class=""&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 04 Jun 2024 14:58:29 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/generate-embeddings-from-third-party-api-in-delta-live-tables/m-p/71628#M34359</guid>
      <dc:creator>LucasBelpaire</dc:creator>
      <dc:date>2024-06-04T14:58:29Z</dc:date>
    </item>
  </channel>
</rss>

