<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Help with hybrid search function in Generative AI</title>
    <link>https://community.databricks.com/t5/generative-ai/help-with-hybrid-search-function/m-p/140003#M1439</link>
    <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/133487"&gt;@the_peterlandis&lt;/a&gt;, Yes, currently vector_search SQL function doesn't provide pre filter support. However, if you must implement the UC function for this, you can do it something like below using Python code with filters.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;DIV&gt;&lt;LI-CODE lang="python"&gt;%sql
CREATE OR REPLACE FUNCTION kaushal.kaushal.vector_similarity_search(
  query_text STRING,
  filter_id INT,
  num_results INT
)
RETURNS STRING
LANGUAGE PYTHON
COMMENT "Vector similarity search using authenticated client"
ENVIRONMENT (
  dependencies = '["databricks-vectorsearch", "databricks-sdk"]',
  environment_version = 'None'
)
AS $$
import json
import os

# Get credentials from Databricks secrets
# You'll need to set these up first
def get_secret(scope, key):
    from databricks.sdk import WorkspaceClient
    w = WorkspaceClient()
    return w.secrets.get_secret(scope=scope, key=key).value

# Alternative: if dbutils is available in UDF context
# token = dbutils.secrets.get(scope="your-scope", key="databricks-token")
# host = dbutils.secrets.get(scope="your-scope", key="databricks-host")

# Set environment variables for authentication
os.environ['DATABRICKS_HOST'] = get_secret("your-scope", "databricks-host")
os.environ['DATABRICKS_TOKEN'] = get_secret("your-scope", "databricks-token")

from databricks.vector_search.client import VectorSearchClient

# Initialize client - should now pick up environment variables
client = VectorSearchClient()
index = client.get_index(
    endpoint_name="vector-search-demo-endpoint-kaushal",
    index_name="kaushal.kaushal.my_text_data_index"
)

results = index.similarity_search(
    query_text=query_text,
    columns=["id", "content"],
    filters={"id": [filter_id]},
    num_results=num_results
)

return json.dumps(results.get('result', {}).get('data_array', []))
$$;&lt;/LI-CODE&gt;
&lt;P&gt;&lt;BR /&gt;Then run your UC function with SQL, and you should get the expected results.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="KaushalVachhani_0-1763803856179.png" style="width: 999px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/21900i6F5156D34CBB71F9/image-size/large?v=v2&amp;amp;px=999" role="button" title="KaushalVachhani_0-1763803856179.png" alt="KaushalVachhani_0-1763803856179.png" /&gt;&lt;/span&gt;&lt;/DIV&gt;</description>
    <pubDate>Sat, 22 Nov 2025 09:33:49 GMT</pubDate>
    <dc:creator>KaushalVachhani</dc:creator>
    <dc:date>2025-11-22T09:33:49Z</dc:date>
    <item>
      <title>Help with hybrid search function</title>
      <link>https://community.databricks.com/t5/generative-ai/help-with-hybrid-search-function/m-p/139942#M1435</link>
      <description>&lt;P&gt;I need help writing a filter.&amp;nbsp; I want to pre-filter a vector index before performing a hybrid search and create this as a function.&amp;nbsp; Below is a simple example of searching for products for a given customer.&amp;nbsp; A prefilter is key as this provides authorizations for searching a vector index before applying the top k which reduces the vector space searching as a prefilter before the search.&amp;nbsp; I am not seeing any filter capability like how you would call the API.&lt;BR /&gt;Example of the search API with prefilter&lt;BR /&gt;&lt;BR /&gt;results = index.similarity_search(&lt;BR /&gt;&amp;nbsp; &amp;nbsp;query_text=question,&lt;BR /&gt;&amp;nbsp; &amp;nbsp;query_type = "HYBRID",&lt;BR /&gt;&amp;nbsp; &amp;nbsp;columns=["content", "product", "product_description", "product_id", "purchase_date"],&lt;BR /&gt;&amp;nbsp; &amp;nbsp;filters="{"customer_emai":&lt;SPAN&gt;customer_email&lt;/SPAN&gt;},&lt;BR /&gt;&amp;nbsp; &amp;nbsp;num_results=5&lt;BR /&gt;)&lt;/P&gt;&lt;P&gt;Below is the SQL Function I need help on&lt;/P&gt;&lt;DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;%sql&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;CREATE OR REPLACE&lt;/SPAN&gt; &lt;SPAN&gt;FUNCTION&lt;/SPAN&gt; &lt;SPAN&gt;get_customer_products&lt;/SPAN&gt;&lt;SPAN&gt;(&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;search_query &lt;/SPAN&gt;&lt;SPAN&gt;string&lt;/SPAN&gt; &lt;SPAN&gt;comment&lt;/SPAN&gt; &lt;SPAN&gt;'Search query'&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;customer_email &lt;/SPAN&gt;&lt;SPAN&gt;string&lt;/SPAN&gt; &lt;SPAN&gt;comment&lt;/SPAN&gt; &lt;SPAN&gt;'Customer email'&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;RETURNS&lt;/SPAN&gt; &lt;SPAN&gt;TABLE&lt;/SPAN&gt;&lt;SPAN&gt; (&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;product&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN&gt;STRING&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;BR /&gt;product_description STRING,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;product_id &lt;/SPAN&gt;&lt;SPAN&gt;int&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;purchase_date_timestamp &lt;/SPAN&gt;&lt;SPAN&gt;timestamp&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;COMMENT&lt;/SPAN&gt; &lt;SPAN&gt;'Retrieve the customer products'&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;LANGUAGE&lt;/SPAN&gt; &lt;SPAN&gt;SQL&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;RETURN&lt;/SPAN&gt;&lt;SPAN&gt; (&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;SELECT&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;content,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;product_id,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;purchase_date_timestamp&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;from&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;vector_search(&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;index&lt;/SPAN&gt;&lt;SPAN&gt;=&amp;gt;&lt;/SPAN&gt; &lt;SPAN&gt;'gold.my_store.products'&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;query&lt;/SPAN&gt; &lt;SPAN&gt;=&amp;gt;&lt;/SPAN&gt;&lt;SPAN&gt; search_query,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;query_type &lt;/SPAN&gt;&lt;SPAN&gt;=&amp;gt;&lt;/SPAN&gt; &lt;SPAN&gt;'HYBRID'&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;filter&lt;/SPAN&gt; &lt;SPAN&gt;=&amp;gt;&lt;/SPAN&gt; &lt;SPAN&gt;'{"customer_email": "'&lt;/SPAN&gt; &lt;SPAN&gt;||&lt;/SPAN&gt;&lt;SPAN&gt; customer_email &lt;/SPAN&gt;&lt;SPAN&gt;||&lt;/SPAN&gt; &lt;SPAN&gt;'"}'&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;num_results &lt;/SPAN&gt;&lt;SPAN&gt;=&amp;gt;&lt;/SPAN&gt; &lt;SPAN&gt;5&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN&gt;)&lt;/SPAN&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Fri, 21 Nov 2025 15:49:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/generative-ai/help-with-hybrid-search-function/m-p/139942#M1435</guid>
      <dc:creator>the_peterlandis</dc:creator>
      <dc:date>2025-11-21T15:49:58Z</dc:date>
    </item>
    <item>
      <title>Re: Help with hybrid search function</title>
      <link>https://community.databricks.com/t5/generative-ai/help-with-hybrid-search-function/m-p/139963#M1438</link>
      <description>&lt;P&gt;&lt;SPAN&gt;Based on this&amp;nbsp;&lt;/SPAN&gt;&lt;A class="" href="https://docs.databricks.com/aws/en/sql/language-manual/functions/vector_search#limitations" target="_blank" rel="noopener noreferrer"&gt;documentation&lt;/A&gt;&lt;SPAN&gt;, it says, it indicate sql function&amp;nbsp;&lt;/SPAN&gt;VECTOR_SEARCH&lt;SPAN&gt;&amp;nbsp;cannot apply &lt;STRONG&gt;pre filter&amp;nbsp;&lt;/STRONG&gt;which prefilter is a fundamental capability for vector search.&amp;nbsp; Just very surprised this is not supported.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Fri, 21 Nov 2025 18:58:48 GMT</pubDate>
      <guid>https://community.databricks.com/t5/generative-ai/help-with-hybrid-search-function/m-p/139963#M1438</guid>
      <dc:creator>the_peterlandis</dc:creator>
      <dc:date>2025-11-21T18:58:48Z</dc:date>
    </item>
    <item>
      <title>Re: Help with hybrid search function</title>
      <link>https://community.databricks.com/t5/generative-ai/help-with-hybrid-search-function/m-p/140003#M1439</link>
      <description>&lt;P&gt;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/133487"&gt;@the_peterlandis&lt;/a&gt;, Yes, currently vector_search SQL function doesn't provide pre filter support. However, if you must implement the UC function for this, you can do it something like below using Python code with filters.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;DIV&gt;&lt;LI-CODE lang="python"&gt;%sql
CREATE OR REPLACE FUNCTION kaushal.kaushal.vector_similarity_search(
  query_text STRING,
  filter_id INT,
  num_results INT
)
RETURNS STRING
LANGUAGE PYTHON
COMMENT "Vector similarity search using authenticated client"
ENVIRONMENT (
  dependencies = '["databricks-vectorsearch", "databricks-sdk"]',
  environment_version = 'None'
)
AS $$
import json
import os

# Get credentials from Databricks secrets
# You'll need to set these up first
def get_secret(scope, key):
    from databricks.sdk import WorkspaceClient
    w = WorkspaceClient()
    return w.secrets.get_secret(scope=scope, key=key).value

# Alternative: if dbutils is available in UDF context
# token = dbutils.secrets.get(scope="your-scope", key="databricks-token")
# host = dbutils.secrets.get(scope="your-scope", key="databricks-host")

# Set environment variables for authentication
os.environ['DATABRICKS_HOST'] = get_secret("your-scope", "databricks-host")
os.environ['DATABRICKS_TOKEN'] = get_secret("your-scope", "databricks-token")

from databricks.vector_search.client import VectorSearchClient

# Initialize client - should now pick up environment variables
client = VectorSearchClient()
index = client.get_index(
    endpoint_name="vector-search-demo-endpoint-kaushal",
    index_name="kaushal.kaushal.my_text_data_index"
)

results = index.similarity_search(
    query_text=query_text,
    columns=["id", "content"],
    filters={"id": [filter_id]},
    num_results=num_results
)

return json.dumps(results.get('result', {}).get('data_array', []))
$$;&lt;/LI-CODE&gt;
&lt;P&gt;&lt;BR /&gt;Then run your UC function with SQL, and you should get the expected results.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="KaushalVachhani_0-1763803856179.png" style="width: 999px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/21900i6F5156D34CBB71F9/image-size/large?v=v2&amp;amp;px=999" role="button" title="KaushalVachhani_0-1763803856179.png" alt="KaushalVachhani_0-1763803856179.png" /&gt;&lt;/span&gt;&lt;/DIV&gt;</description>
      <pubDate>Sat, 22 Nov 2025 09:33:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/generative-ai/help-with-hybrid-search-function/m-p/140003#M1439</guid>
      <dc:creator>KaushalVachhani</dc:creator>
      <dc:date>2025-11-22T09:33:49Z</dc:date>
    </item>
  </channel>
</rss>

