<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Endpoint creation without scale-to-zero in Machine Learning</title>
    <link>https://community.databricks.com/t5/machine-learning/endpoint-creation-without-scale-to-zero/m-p/98970#M3788</link>
    <description>&lt;P&gt;Can you try with the following:&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;from mlflow.deployments import get_deploy_client

client = get_deploy_client("databricks")

endpoint = client.create_endpoint(
    name="llama3_1_8b_instruct",
    config={
        "served_entities": [
            {
                "name": "llama3_1_8b_instruct-entity",
                "entity_name": "system.ai.meta_llama_v3_1_8b_instruct",
                "entity_version": "2",
                "workload_size": "Small",
                "scale_to_zero_enabled": False
            }
        ],
        "traffic_config": {
            "routes": [
                {
                    "served_model_name": "llama3_1_8b_instruct-entity",
                    "traffic_percentage": 100
                }
            ]
        }
    }
)&lt;/LI-CODE&gt;</description>
    <pubDate>Fri, 15 Nov 2024 16:16:18 GMT</pubDate>
    <dc:creator>Walter_C</dc:creator>
    <dc:date>2024-11-15T16:16:18Z</dc:date>
    <item>
      <title>Endpoint creation without scale-to-zero</title>
      <link>https://community.databricks.com/t5/machine-learning/endpoint-creation-without-scale-to-zero/m-p/98865#M3787</link>
      <description>&lt;P&gt;&lt;SPAN&gt;Hi, I've got a question about deploying an endpoint for Llama 3.1 8b. The following code should create the endpoint &lt;I&gt;without&lt;/I&gt; scale-to-zero. The endpoint is being created, but &lt;I&gt;with &lt;/I&gt;scale-to-zero, although scale_to_zero_enabled is set to False. Instead of a boolean, I have also tried to pass the value as a string (both upper and lower case), but unfortunately this does not change the result. What do I have to change so that scale-to-zero is really deactivated?&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;cl = mlflow.deployments.get_deploy_client("databricks")
cl.create_endpoint(
   name="llama3_1_8b_instruct",
   config={
       "served_entities": [
           {
               "entity_name": "system.ai.meta_llama_v3_1_8b_instruct",
               "entity_version": "2",
               "max_provisioned_throughput": 12000,
               "scale_to_zero_enabled": False,
           }
       ],
       "traffic_config": {
           "routes": [
               {
                   "served_model_name": "meta_llama_v3_1_8b_instruct-2",
                   "traffic_percentage": "100",
               }
           ]
       },
   },
)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 15 Nov 2024 06:43:02 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/endpoint-creation-without-scale-to-zero/m-p/98865#M3787</guid>
      <dc:creator>damselfly20</dc:creator>
      <dc:date>2024-11-15T06:43:02Z</dc:date>
    </item>
    <item>
      <title>Re: Endpoint creation without scale-to-zero</title>
      <link>https://community.databricks.com/t5/machine-learning/endpoint-creation-without-scale-to-zero/m-p/98970#M3788</link>
      <description>&lt;P&gt;Can you try with the following:&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;from mlflow.deployments import get_deploy_client

client = get_deploy_client("databricks")

endpoint = client.create_endpoint(
    name="llama3_1_8b_instruct",
    config={
        "served_entities": [
            {
                "name": "llama3_1_8b_instruct-entity",
                "entity_name": "system.ai.meta_llama_v3_1_8b_instruct",
                "entity_version": "2",
                "workload_size": "Small",
                "scale_to_zero_enabled": False
            }
        ],
        "traffic_config": {
            "routes": [
                {
                    "served_model_name": "llama3_1_8b_instruct-entity",
                    "traffic_percentage": 100
                }
            ]
        }
    }
)&lt;/LI-CODE&gt;</description>
      <pubDate>Fri, 15 Nov 2024 16:16:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/endpoint-creation-without-scale-to-zero/m-p/98970#M3788</guid>
      <dc:creator>Walter_C</dc:creator>
      <dc:date>2024-11-15T16:16:18Z</dc:date>
    </item>
    <item>
      <title>Re: Endpoint creation without scale-to-zero</title>
      <link>https://community.databricks.com/t5/machine-learning/endpoint-creation-without-scale-to-zero/m-p/99103#M3792</link>
      <description>&lt;P&gt;Thanks for the reply&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/88823"&gt;@Walter_C&lt;/a&gt;. This didn't quite work, since it used a CPU and didn't consider the&amp;nbsp;&lt;FONT face="courier new,courier" size="2"&gt;max_provisioned_throughput&lt;/FONT&gt;, but I finally got it to work like this:&lt;/P&gt;&lt;DIV&gt;&amp;nbsp;&lt;/DIV&gt;&lt;LI-CODE lang="python"&gt;from mlflow.deployments import get_deploy_client

client = get_deploy_client("databricks")

endpoint = client.create_endpoint(
    name="llama3_1_8b_instruct-test",
    config={
        "served_entities": [
            {
                "name": "llama3_1_8b_instruct-entity",
                "entity_name": "system.ai.meta_llama_v3_1_8b_instruct",
                "entity_version": "2",
                "scale_to_zero_enabled": "false",
                "min_provisioned_throughput": 12000,
                "max_provisioned_throughput": 12000
            }
        ],
        "traffic_config": {
            "routes": [
                {
                    "served_model_name": "llama3_1_8b_instruct-entity",
                    "traffic_percentage": 100
                }
            ]
        }
    }
)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 18 Nov 2024 09:57:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/machine-learning/endpoint-creation-without-scale-to-zero/m-p/99103#M3792</guid>
      <dc:creator>damselfly20</dc:creator>
      <dc:date>2024-11-18T09:57:03Z</dc:date>
    </item>
  </channel>
</rss>

