<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Multi-agent chatbot Optimization in Generative AI</title>
    <link>https://community.databricks.com/t5/generative-ai/multi-agent-chatbot-optimization/m-p/139961#M1437</link>
    <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/197729"&gt;@Saurabh2406&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;This sounds like a fairly advanced use case - are you in touch with your account team at Databricks? They would be able to provide you with more detailed guidance on this use case. They could also get you connected with internal specialists.&lt;/P&gt;
&lt;P&gt;In the meantime, you should take a look at these resources to help:&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/mlflow3/genai/tracing/integrations/langgraph" target="_self"&gt;Tracing LangGraph&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/vector-search/vector-search-retrieval-quality" target="_self"&gt;Vector Search Retrieval Quality Guide&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/ai-gateway/configure-ai-gateway-endpoints" target="_self"&gt;Consider enabling AI Gateway&lt;/A&gt; for your external model endpoints to enable traffic policies, payload logging, and rate limiting
&lt;UL&gt;
&lt;LI&gt;Other general recommendations:
&lt;UL&gt;
&lt;LI&gt;Prefer streaming responses for chat/completions to shave tail latency&lt;/LI&gt;
&lt;LI&gt;Batch inference: for large volumes, use SQL/Python ai_query when invoking Databricks‑hosted models (including Gemini 2.5 Pro/Flash) to process data at scale with automatic backend capacity management&lt;/LI&gt;
&lt;/UL&gt;
&lt;/LI&gt;
&lt;/UL&gt;
&lt;/LI&gt;
&lt;LI&gt;Use &lt;A href="https://docs.databricks.com/aws/en/mlflow3/genai/tracing#want-to-get-started-with-tracing" target="_self"&gt;MLFlow trace timelines&lt;/A&gt; to find slow spans&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 21 Nov 2025 18:40:59 GMT</pubDate>
    <dc:creator>stbjelcevic</dc:creator>
    <dc:date>2025-11-21T18:40:59Z</dc:date>
    <item>
      <title>Multi-agent chatbot Optimization</title>
      <link>https://community.databricks.com/t5/generative-ai/multi-agent-chatbot-optimization/m-p/139787#M1434</link>
      <description>&lt;P&gt;We have developed a multi-agent chatbot using &lt;STRONG&gt;LangGraph&lt;/STRONG&gt; within the &lt;STRONG&gt;Databricks&lt;/STRONG&gt; environment. The solution is functional, but we are facing challenges related to performance observability and end-to-end optimization.&lt;/P&gt;&lt;P&gt;We need guidance in the following areas:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Tracing and Logging Enablement&lt;/STRONG&gt;&lt;BR /&gt;How to implement effective distributed tracing and structured logging across LangGraph agents, Databricks components, and external model calls to identify bottlenecks.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Vector Index Optimization&lt;/STRONG&gt;&lt;BR /&gt;Best practices for optimizing our vector index (index type selection, parameters, retrieval tuning) to improve retrieval accuracy and reduce latency.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Gemini External Model API Optimization&lt;/STRONG&gt;&lt;BR /&gt;Recommendations on improving performance and cost efficiency of Gemini API calls, including batching, streaming, prompt optimization, and retry patterns.&lt;/P&gt;&lt;/LI&gt;&lt;LI&gt;&lt;P&gt;&lt;STRONG&gt;Response Latency Analysis &amp;amp; Architecture Review&lt;/STRONG&gt;&lt;BR /&gt;We are experiencing higher-than-expected response latency. We need help validating whether our current architecture and implementation approach is optimal, and identifying improvements if not.&lt;/P&gt;&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;Looking for expert insights, recommended configurations, code samples, or architectural guidance to help us tune the system for &lt;STRONG&gt;lower latency&lt;/STRONG&gt;, &lt;STRONG&gt;better observability&lt;/STRONG&gt;, and &lt;STRONG&gt;more efficient multi-agent performance&lt;/STRONG&gt;.&lt;/P&gt;</description>
      <pubDate>Thu, 20 Nov 2025 10:00:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/generative-ai/multi-agent-chatbot-optimization/m-p/139787#M1434</guid>
      <dc:creator>Saurabh2406</dc:creator>
      <dc:date>2025-11-20T10:00:59Z</dc:date>
    </item>
    <item>
      <title>Re: Multi-agent chatbot Optimization</title>
      <link>https://community.databricks.com/t5/generative-ai/multi-agent-chatbot-optimization/m-p/139961#M1437</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/197729"&gt;@Saurabh2406&lt;/a&gt;&amp;nbsp;,&lt;/P&gt;
&lt;P&gt;This sounds like a fairly advanced use case - are you in touch with your account team at Databricks? They would be able to provide you with more detailed guidance on this use case. They could also get you connected with internal specialists.&lt;/P&gt;
&lt;P&gt;In the meantime, you should take a look at these resources to help:&lt;/P&gt;
&lt;UL&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/mlflow3/genai/tracing/integrations/langgraph" target="_self"&gt;Tracing LangGraph&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/vector-search/vector-search-retrieval-quality" target="_self"&gt;Vector Search Retrieval Quality Guide&lt;/A&gt;&lt;/LI&gt;
&lt;LI&gt;&lt;A href="https://docs.databricks.com/aws/en/ai-gateway/configure-ai-gateway-endpoints" target="_self"&gt;Consider enabling AI Gateway&lt;/A&gt; for your external model endpoints to enable traffic policies, payload logging, and rate limiting
&lt;UL&gt;
&lt;LI&gt;Other general recommendations:
&lt;UL&gt;
&lt;LI&gt;Prefer streaming responses for chat/completions to shave tail latency&lt;/LI&gt;
&lt;LI&gt;Batch inference: for large volumes, use SQL/Python ai_query when invoking Databricks‑hosted models (including Gemini 2.5 Pro/Flash) to process data at scale with automatic backend capacity management&lt;/LI&gt;
&lt;/UL&gt;
&lt;/LI&gt;
&lt;/UL&gt;
&lt;/LI&gt;
&lt;LI&gt;Use &lt;A href="https://docs.databricks.com/aws/en/mlflow3/genai/tracing#want-to-get-started-with-tracing" target="_self"&gt;MLFlow trace timelines&lt;/A&gt; to find slow spans&lt;/LI&gt;
&lt;/UL&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 21 Nov 2025 18:40:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/generative-ai/multi-agent-chatbot-optimization/m-p/139961#M1437</guid>
      <dc:creator>stbjelcevic</dc:creator>
      <dc:date>2025-11-21T18:40:59Z</dc:date>
    </item>
  </channel>
</rss>

