<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Can't query Legacy Serving Endpoint in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/can-t-query-legacy-serving-endpoint/m-p/89879#M3665</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I was able to deploy an endpoint using legacy serving (It's the only option we have to deploy endpoints in DB). Now I am having trouble querying the endpoint itself. When I try to query it I get the following error:&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="semsim_0-1726245119742.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/11174iC8A3A98FEC1E1661/image-size/medium?v=v2&amp;amp;px=400" role="button" title="semsim_0-1726245119742.png" alt="semsim_0-1726245119742.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;Here is the code I am using to query the endpoint:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;mport os
import requests
import numpy as np
import pandas as pd
import json


token = user_token

def create_tf_serving_json(data):
  return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}

def score_model(dataset):
  url = 'url_to_model'
  headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}
  ds_dict = {"dataframe_split": dataset.to_dict(orient='split')} if isinstance(dataset, pd.DataFrame) else create_tf_serving_json(dataset)
  data_json = json.dumps(ds_dict, allow_nan=True)
  response = requests.request(method='POST', headers=headers, url=url, data=data_json)
  if response.status_code != 200:
    raise Exception(f'Request failed with status {response.status_code}, {response.text}')
  return response.json()

# Scoring a model that accepts pandas DataFrames
data =  pd.DataFrame([{
  "sepal_length": 5.1,
  "sepal_width": 3.5,
  "petal_length": 1.4,
  "petal_width": 0.2
}])
score_model(data) #MODEL_VERSION_URI, DATABRICKS_API_TOKEN, 


# Scoring a model that accepts tensors
#data = np.asarray([[5.1, 3.5, 1.4, 0.2]])
#score_model(MODEL_VERSION_URI, DATABRICKS_API_TOKEN, data)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 13 Sep 2024 16:32:45 GMT</pubDate>
    <dc:creator>semsim</dc:creator>
    <dc:date>2024-09-13T16:32:45Z</dc:date>
    <item>
      <title>Can't query Legacy Serving Endpoint</title>
      <link>https://community.databricks.com/t5/machine-learning/can-t-query-legacy-serving-endpoint/m-p/89879#M3665</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I was able to deploy an endpoint using legacy serving (It's the only option we have to deploy endpoints in DB). Now I am having trouble querying the endpoint itself. When I try to query it I get the following error:&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="semsim_0-1726245119742.png" style="width: 400px;"&gt;&lt;img src="https://community.databricks.com/t5/image/serverpage/image-id/11174iC8A3A98FEC1E1661/image-size/medium?v=v2&amp;amp;px=400" role="button" title="semsim_0-1726245119742.png" alt="semsim_0-1726245119742.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;Here is the code I am using to query the endpoint:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;mport os
import requests
import numpy as np
import pandas as pd
import json


token = user_token

def create_tf_serving_json(data):
  return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}

def score_model(dataset):
  url = 'url_to_model'
  headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}
  ds_dict = {"dataframe_split": dataset.to_dict(orient='split')} if isinstance(dataset, pd.DataFrame) else create_tf_serving_json(dataset)
  data_json = json.dumps(ds_dict, allow_nan=True)
  response = requests.request(method='POST', headers=headers, url=url, data=data_json)
  if response.status_code != 200:
    raise Exception(f'Request failed with status {response.status_code}, {response.text}')
  return response.json()

# Scoring a model that accepts pandas DataFrames
data =  pd.DataFrame([{
  "sepal_length": 5.1,
  "sepal_width": 3.5,
  "petal_length": 1.4,
  "petal_width": 0.2
}])
score_model(data) #MODEL_VERSION_URI, DATABRICKS_API_TOKEN, 


# Scoring a model that accepts tensors
#data = np.asarray([[5.1, 3.5, 1.4, 0.2]])
#score_model(MODEL_VERSION_URI, DATABRICKS_API_TOKEN, data)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 13 Sep 2024 16:32:45 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/can-t-query-legacy-serving-endpoint/m-p/89879#M3665</guid>
      <dc:creator>semsim</dc:creator>
      <dc:date>2024-09-13T16:32:45Z</dc:date>
    </item>
    <item>
      <title>Re: Can't query Legacy Serving Endpoint</title>
      <link>https://community.databricks.com/t5/machine-learning/can-t-query-legacy-serving-endpoint/m-p/136535#M4385</link>
      <description>&lt;P&gt;Hey&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/104721"&gt;@semsim&lt;/a&gt;&amp;nbsp;, sorry for the delayed response.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;DIV class="paragraph"&gt;Thanks for the screenshot—this pinpoints the problem.&lt;/DIV&gt;
&lt;DIV class="paragraph"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;H3 class="paragraph"&gt;Root cause from the error&lt;/H3&gt;
&lt;UL&gt;
&lt;LI class="paragraph"&gt;Your model’s predict path is trying to create or write to &lt;STRONG&gt;/Workspace/Shared&lt;/STRONG&gt;, and the serving container does not permit that filesystem location. The stack trace ends with PermissionError: [Errno 1] Operation not permitted: '/Workspace/Shared'.&lt;/LI&gt;
&lt;/UL&gt;
&lt;HR /&gt;
&lt;H3 class="paragraph"&gt;How to fix it in your model code&lt;/H3&gt;
&lt;DIV class="paragraph"&gt;Serving predict functions must be side‑effect free (no writes to workspace paths). Update your model to avoid writing under /Workspace and use ephemeral or supported storage instead:&lt;/DIV&gt;
&lt;UL&gt;
&lt;LI&gt;Remove any os.makedirs(...) or file writes to /Workspace/… inside predict(). Use &lt;STRONG&gt;tempfile&lt;/STRONG&gt; and write under &lt;STRONG&gt;/tmp&lt;/STRONG&gt; or &lt;STRONG&gt;/local_disk0/tmp&lt;/STRONG&gt; for ephemeral files. Example:&lt;/LI&gt;
&lt;/UL&gt;
&lt;PRE&gt;&lt;CODE class="markdown-code-python"&gt;import tempfile, os

def predict(self, context, model_input):
    tmpdir = tempfile.mkdtemp(dir="/local_disk0/tmp")  # or dir=None for /tmp
    out_path = os.path.join(tmpdir, "artifact.bin")
    with open(out_path, "wb") as f:
        f.write(b"...")  # if absolutely needed
    # ...perform inference without persisting to /Workspace
    return outputs&lt;/CODE&gt;&lt;/PRE&gt;
&lt;UL&gt;
&lt;LI&gt;
&lt;DIV class="paragraph"&gt;If you need to load static assets (tokenizers, feature maps, etc.), bundle them as &lt;STRONG&gt;MLflow model artifacts&lt;/STRONG&gt; and read them relative to the model directory, not from /Workspace. Avoid writes during inference.&lt;/DIV&gt;
&lt;/LI&gt;
&lt;LI&gt;
&lt;DIV class="paragraph"&gt;If persistence is actually required (for logs or results), write to external storage or databases the endpoint is authorized to access; do not write inside /Workspace from serving. Keep inference pure; log elsewhere asynchronously.&lt;/DIV&gt;
&lt;/LI&gt;
&lt;/UL&gt;
&lt;HR /&gt;
&lt;H3 class="paragraph"&gt;Client request adjustments (legacy serving)&lt;/H3&gt;
&lt;DIV class="paragraph"&gt;Your client code is mostly fine—make these tweaks to avoid common request issues:&lt;/DIV&gt;
&lt;UL&gt;
&lt;LI&gt;
&lt;DIV class="paragraph"&gt;Use the legacy invocations URL format:&lt;BR /&gt;https://&amp;lt;workspace-host&amp;gt;/model/&amp;lt;registered-model-name&amp;gt;/&amp;lt;version-or-stage&amp;gt;/invocations.&lt;/DIV&gt;
&lt;/LI&gt;
&lt;LI&gt;
&lt;DIV class="paragraph"&gt;Send the payload with requests.post(..., json=payload) rather than pre-dumping to data=.... Keep Content-Type: application/json and Authorization: Bearer &amp;lt;token&amp;gt;.&lt;/DIV&gt;
&lt;/LI&gt;
&lt;LI&gt;
&lt;DIV class="paragraph"&gt;Match the scoring protocol to your MLflow version:
&lt;UL&gt;
&lt;LI&gt;For MLflow 2.x models, send a top-level &lt;STRONG&gt;"dataframe_split"&lt;/STRONG&gt; for pandas, or &lt;STRONG&gt;"inputs" / "instances"&lt;/STRONG&gt; for tensors.&lt;/LI&gt;
&lt;LI&gt;If the model was logged with MLflow 1.x, older formats like &lt;STRONG&gt;"dataframe_records"&lt;/STRONG&gt; may be required; protocol mismatches can yield BAD_REQUEST. The Serving tab “Query endpoint” shows the expected format for your exact model.&lt;/LI&gt;
&lt;/UL&gt;
&lt;/DIV&gt;
&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV class="paragraph"&gt;Here’s a corrected minimal example:&lt;/DIV&gt;
&lt;PRE&gt;&lt;CODE class="markdown-code-python"&gt;import os
import requests
import numpy as np
import pandas as pd

token = os.getenv("DATABRICKS_API_TOKEN") or "dapi_..."
model_uri = "https://&amp;lt;workspace-host&amp;gt;/model/&amp;lt;registered-model-name&amp;gt;/&amp;lt;Production-or-version&amp;gt;/invocations"

headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"}

def score_dataframe(df: pd.DataFrame):
    payload = {"dataframe_split": df.to_dict(orient="split")}
    resp = requests.post(model_uri, headers=headers, json=payload)
    if resp.status_code != 200:
        raise Exception(f"Request failed: {resp.status_code}, {resp.text}")
    return resp.json()

def score_tensor(arr: np.ndarray):
    payload = {"inputs": arr.tolist()}  # or {"instances": arr.tolist()}
    resp = requests.post(model_uri, headers=headers, json=payload)
    if resp.status_code != 200:
        raise Exception(f"Request failed: {resp.status_code}, {resp.text}")
    return resp.json()&lt;/CODE&gt;&lt;/PRE&gt;
&lt;HR /&gt;
&lt;H3 class="paragraph"&gt;Quick validation&lt;/H3&gt;
&lt;UL&gt;
&lt;LI class="paragraph"&gt;Use the model’s Serving tab “Query endpoint” in the UI to copy the exact URL and sample request payload; this confirms both the path and protocol your model expects.&lt;/LI&gt;
&lt;/UL&gt;
&lt;DIV class="paragraph"&gt;&amp;nbsp;&lt;/DIV&gt;
&lt;DIV class="paragraph"&gt;Hope this helps, Louis.&lt;/DIV&gt;</description>
      <pubDate>Wed, 29 Oct 2025 10:35:57 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/can-t-query-legacy-serving-endpoint/m-p/136535#M4385</guid>
      <dc:creator>Louis_Frolio</dc:creator>
      <dc:date>2025-10-29T10:35:57Z</dc:date>
    </item>
  </channel>
</rss>

