Walter_C
Databricks Employee
Databricks Employee

Can you try with the following:

from mlflow.deployments import get_deploy_client

client = get_deploy_client("databricks")

endpoint = client.create_endpoint(
    name="llama3_1_8b_instruct",
    config={
        "served_entities": [
            {
                "name": "llama3_1_8b_instruct-entity",
                "entity_name": "system.ai.meta_llama_v3_1_8b_instruct",
                "entity_version": "2",
                "workload_size": "Small",
                "scale_to_zero_enabled": False
            }
        ],
        "traffic_config": {
            "routes": [
                {
                    "served_model_name": "llama3_1_8b_instruct-entity",
                    "traffic_percentage": 100
                }
            ]
        }
    }
)