<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Ai Query Prompt Token and Completition token in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/129863#M10620</link>
    <description>&lt;P&gt;Hi&lt;/P&gt;&lt;P&gt;I would like to know how can I get the Completition token and Prompt token quantity when using Ai_Query?&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
    <pubDate>Wed, 27 Aug 2025 04:27:37 GMT</pubDate>
    <dc:creator>Andreyai</dc:creator>
    <dc:date>2025-08-27T04:27:37Z</dc:date>
    <item>
      <title>Ai Query Prompt Token and Completition token</title>
      <link>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/129863#M10620</link>
      <description>&lt;P&gt;Hi&lt;/P&gt;&lt;P&gt;I would like to know how can I get the Completition token and Prompt token quantity when using Ai_Query?&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;</description>
      <pubDate>Wed, 27 Aug 2025 04:27:37 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/129863#M10620</guid>
      <dc:creator>Andreyai</dc:creator>
      <dc:date>2025-08-27T04:27:37Z</dc:date>
    </item>
    <item>
      <title>Re: Ai Query Prompt Token and Completition token</title>
      <link>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/129867#M10621</link>
      <description>&lt;P&gt;Hello&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/181223"&gt;@Andreyai&lt;/a&gt;&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;good day!!&lt;BR /&gt;&lt;BR /&gt;For AI_queries, we have documentation from databricks. :&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;A href="https://docs.databricks.com/aws/en/sql/language-manual/functions/ai_query" target="_blank" rel="noopener"&gt;https://docs.databricks.com/aws/en/sql/language-manual/functions/ai_query&lt;/A&gt;&amp;nbsp;I am 100% sure you will get better insights from the documentations.&amp;nbsp;&lt;/P&gt;&lt;P&gt;But I have something for you from internet:&lt;/P&gt;&lt;DIV&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;Estimating Token Counts (Without Running the Query)&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;&amp;nbsp;You can use a tokenizer to approximate prompt and completion tokens based on your input text and expected output. &lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/DIV&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;DIV&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;For Databricks foundation models like DBRX or Meta Llama series, use the &lt;/SPAN&gt;&lt;SPAN class=""&gt;cl100k_base&lt;/SPAN&gt;&lt;SPAN class=""&gt; encoding from OpenAI's &lt;/SPAN&gt;&lt;SPAN class=""&gt;tiktoken&lt;/SPAN&gt;&lt;SPAN class=""&gt; library (it's compatible).&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;UL&gt;&lt;LI&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;Install &lt;/SPAN&gt;&lt;SPAN class=""&gt;tiktoken&lt;/SPAN&gt;&lt;SPAN class=""&gt; in a Databricks notebook (via &lt;/SPAN&gt;&lt;SPAN class=""&gt;%pip install tiktoken&lt;/SPAN&gt;&lt;SPAN class=""&gt;).&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;/LI&gt;&lt;LI&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;Example Python code to estimate:&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;DIV class=""&gt;&lt;SPAN class=""&gt;&lt;SPAN class=""&gt;python&lt;/SPAN&gt;&lt;/SPAN&gt;&lt;DIV class=""&gt;&amp;nbsp;&lt;/DIV&gt;&lt;/DIV&gt;&lt;PRE&gt;&lt;SPAN class=""&gt;import&lt;/SPAN&gt;&lt;SPAN&gt; tiktoken&lt;/SPAN&gt;
&lt;SPAN class=""&gt;def&lt;/SPAN&gt; &lt;SPAN class=""&gt;count_tokens&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;text&lt;/SPAN&gt;&lt;SPAN class=""&gt;:&lt;/SPAN&gt; &lt;SPAN class=""&gt;str&lt;/SPAN&gt;&lt;SPAN class=""&gt;,&lt;/SPAN&gt;&lt;SPAN&gt; encoding_name&lt;/SPAN&gt;&lt;SPAN class=""&gt;:&lt;/SPAN&gt; &lt;SPAN class=""&gt;str&lt;/SPAN&gt; &lt;SPAN class=""&gt;=&lt;/SPAN&gt; &lt;SPAN class=""&gt;"cl100k_base"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt; &lt;SPAN class=""&gt;-&lt;/SPAN&gt;&lt;SPAN class=""&gt;&amp;gt;&lt;/SPAN&gt; &lt;SPAN class=""&gt;int&lt;/SPAN&gt;&lt;SPAN class=""&gt;:&lt;/SPAN&gt;
&lt;SPAN&gt;    encoding &lt;/SPAN&gt;&lt;SPAN class=""&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; tiktoken&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;get_encoding&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;encoding_name&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;
    &lt;SPAN class=""&gt;return&lt;/SPAN&gt; &lt;SPAN class=""&gt;len&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;encoding&lt;/SPAN&gt;&lt;SPAN class=""&gt;.&lt;/SPAN&gt;&lt;SPAN&gt;encode&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;text&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;

&lt;SPAN class=""&gt;# Example usage&lt;/SPAN&gt;
&lt;SPAN&gt;prompt &lt;/SPAN&gt;&lt;SPAN class=""&gt;=&lt;/SPAN&gt; &lt;SPAN class=""&gt;"Your prompt text here"&lt;/SPAN&gt;  &lt;SPAN class=""&gt;# Replace with your actual prompt&lt;/SPAN&gt;
&lt;SPAN&gt;estimated_prompt_tokens &lt;/SPAN&gt;&lt;SPAN class=""&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; count_tokens&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;prompt&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;
&lt;SPAN class=""&gt;print&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;f"Estimated prompt tokens: &lt;/SPAN&gt;&lt;SPAN class=""&gt;{&lt;/SPAN&gt;&lt;SPAN class=""&gt;estimated_prompt_tokens&lt;/SPAN&gt;&lt;SPAN class=""&gt;}&lt;/SPAN&gt;&lt;SPAN class=""&gt;"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;

&lt;SPAN class=""&gt;# For completion, estimate based on expected output length (e.g., max_tokens param)&lt;/SPAN&gt;
&lt;SPAN&gt;example_completion &lt;/SPAN&gt;&lt;SPAN class=""&gt;=&lt;/SPAN&gt; &lt;SPAN class=""&gt;"Sample generated response"&lt;/SPAN&gt;  &lt;SPAN class=""&gt;# Simulate or use a sample&lt;/SPAN&gt;
&lt;SPAN&gt;estimated_completion_tokens &lt;/SPAN&gt;&lt;SPAN class=""&gt;=&lt;/SPAN&gt;&lt;SPAN&gt; count_tokens&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN&gt;example_completion&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;
&lt;SPAN class=""&gt;print&lt;/SPAN&gt;&lt;SPAN class=""&gt;(&lt;/SPAN&gt;&lt;SPAN class=""&gt;f"Estimated completion tokens: &lt;/SPAN&gt;&lt;SPAN class=""&gt;{&lt;/SPAN&gt;&lt;SPAN class=""&gt;estimated_completion_tokens&lt;/SPAN&gt;&lt;SPAN class=""&gt;}&lt;/SPAN&gt;&lt;SPAN class=""&gt;"&lt;/SPAN&gt;&lt;SPAN class=""&gt;)&lt;/SPAN&gt;&lt;/PRE&gt;&lt;/DIV&gt;&lt;/DIV&gt;&lt;/LI&gt;&lt;/UL&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 27 Aug 2025 08:32:13 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/129867#M10621</guid>
      <dc:creator>Khaja_Zaffer</dc:creator>
      <dc:date>2025-08-27T08:32:13Z</dc:date>
    </item>
    <item>
      <title>Re: Ai Query Prompt Token and Completition token</title>
      <link>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/129871#M10622</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you for your response.&lt;BR /&gt;But I was expecting a response from ai_query with the usage information like when you use a completion.create call on OpenAi. Is it possible? So on it call it will return an response and the usage.&lt;/P&gt;&lt;P&gt;In my case I have a set of images, where for each Ai_Query each image I am passing the prompt consist on text with commands and an image. And it returns a description of the image. And with that I would like to get the token quantity so I can infer the cost of the operation. I am using the Llama 4 maverick and Claude 3.7 Sonnet.&lt;/P&gt;&lt;P&gt;link OpenAI:&amp;nbsp;&lt;A href="https://platform.openai.com/docs/api-reference/chat/list" target="_blank" rel="noopener"&gt;https://platform.openai.com/docs/api-reference/chat/list&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 27 Aug 2025 09:41:15 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/129871#M10622</guid>
      <dc:creator>Andreyai</dc:creator>
      <dc:date>2025-08-27T09:41:15Z</dc:date>
    </item>
    <item>
      <title>Re: Ai Query Prompt Token and Completition token</title>
      <link>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/133759#M10776</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/181223"&gt;@Andreyai&lt;/a&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;The batch inference requests hit a model serving endpoint; as long as inference tables and usage tracking are enabled on that endpoint, the requests will get logged regardless of how they were submitted to the endpoint. &lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;See the schema for the endpoint usage and inference table schema, and it has both input tokens and output tokens information.&lt;/P&gt;
&lt;P&gt;&lt;A href="https://docs.databricks.com/aws/en/ai-gateway/inference-tables#query-and-analyze-results-in-the-inference-table" target="_blank"&gt;https://docs.databricks.com/aws/en/ai-gateway/inference-tables#query-and-analyze-results-in-the-inference-table&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://docs.databricks.com/aws/en/ai-gateway/configure-ai-gateway-endpoints#systemservingendpoint_usage-usage-tracking-table-schema" target="_blank"&gt;https://docs.databricks.com/aws/en/ai-gateway/configure-ai-gateway-endpoints#systemservingendpoint_usage-usage-tracking-table-schema&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;&lt;A href="https://docs.databricks.com/aws/en/ai-gateway/inference-tables#ai-gateway-enabled-inference-table-schema" target="_blank"&gt;https://docs.databricks.com/aws/en/ai-gateway/inference-tables#ai-gateway-enabled-inference-table-schema&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;Hope this helps.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 04 Oct 2025 04:05:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/ai-query-prompt-token-and-completition-token/m-p/133759#M10776</guid>
      <dc:creator>Krishna_S</dc:creator>
      <dc:date>2025-10-04T04:05:02Z</dc:date>
    </item>
  </channel>
</rss>

