binnisbj
New Contributor II

 

resources:
  model_serving_endpoints:
    embed_serving:
      name: embedding-gpu-m
      config:
        served_entities:
        - name: embedding-gpu-m
          entity_name: system.ai.bge_large_en_v1_5
          entity_version: "3"
          workload_size: Small
          scale_to_zero_enabled: true
          workload_type: GPU_MEDIUM
          min_provisioned_throughput: 0