cancel
Showing results forĀ 
Search instead forĀ 
Did you mean:Ā 
Data Engineering
Join discussions on data engineering best practices, architectures, and optimization strategies within the Databricks Community. Exchange insights and solutions with fellow data engineers.
cancel
Showing results forĀ 
Search instead forĀ 
Did you mean:Ā 

Facing issues databricks asset bundle, All jobs are getting Deployed into specified targets Instead

umahesb3
New Contributor
Facing issues databricks asset bundle, All jobs are getting Deployed into specified targets Instead of defined target following was files i am using resourser yaml and databricks yml file , i am using Databricks CLI v0.240.0 , i am using databricks bundle init default-python template, Can you please help to resolve this issue. Which is show stopper for my use cases.
example 1:
#asset_bundles_job1.yaml
resources:
  jobs:
    asset_bundles_job1:
      name: asset_bundles_job1
      description: >+
        Extracts Data form Apis.
      health:
        rules:
          - metric: RUN_DURATION_SECONDS
            op: GREATER_THAN
            value: 3600
      schedule:
        quartz_cron_expression: 0 0/15 * * * ?
        timezone_id: UTC
        pause_status: ${var.job_status}
      max_concurrent_runs: 1
      tasks:
        - task_key: task1
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script1.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
      tasks:
        - task_key: task2
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script2.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
  
      job_clusters:
        - job_cluster_key: '${var.job_cluster_name}'
          new_cluster:
            spark_version: 15.4.x-scala2.12
            spark_conf:
              spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
              spark.databricks.delta.retentionDurationCheck.enabled: false
              spark.databricks.hive.metastore.glueCatalog.enabled: true
              spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
              spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
            aws_attributes:
              first_on_demand: 1
              availability: SPOT_WITH_FALLBACK
              zone_id: auto
              instance_profile_arn: '${var.instance_profilearn}'
              spot_bid_price_percent: 100
              ebs_volume_count: 0
            node_type_id: '${var.node_type}'
            driver_node_type_id: '${var.node_type}'
            spark_env_vars:
              PYSPARK_PYTHON: /databricks/python3/bin/python3
            enable_elastic_disk: true
            data_security_mode: NONE
            runtime_engine: PHOTON
            autoscale:
              min_workers: 1
              max_workers: '${var.max_workers_instance}'
      queue:
        enabled: false
 
#asset_bundles_job2.yaml
resources:
  jobs:
    asset_bundles_job2:
      name: asset_bundles_job2
      description: >+
        Extracts Data form Apis.
      health:
        rules:
          - metric: RUN_DURATION_SECONDS
            op: GREATER_THAN
            value: 3600
      schedule:
        quartz_cron_expression: 0 0/15 * * * ?
        timezone_id: UTC
        pause_status: ${var.job_status}
      max_concurrent_runs: 1
      tasks:
        - task_key: task1
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script1.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
      tasks:
        - task_key: task2
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script2.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
  
      job_clusters:
        - job_cluster_key: '${var.job_cluster_name}'
          new_cluster:
            spark_version: 15.4.x-scala2.12
            spark_conf:
              spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
              spark.databricks.delta.retentionDurationCheck.enabled: false
              spark.databricks.hive.metastore.glueCatalog.enabled: true
              spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
              spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
            aws_attributes:
              first_on_demand: 1
              availability: SPOT_WITH_FALLBACK
              zone_id: auto
              instance_profile_arn: '${var.instance_profilearn}'
              spot_bid_price_percent: 100
              ebs_volume_count: 0
            node_type_id: '${var.node_type}'
            driver_node_type_id: '${var.node_type}'
            spark_env_vars:
              PYSPARK_PYTHON: /databricks/python3/bin/python3
            enable_elastic_disk: true
            data_security_mode: NONE
            runtime_engine: PHOTON
            autoscale:
              min_workers: 1
              max_workers: '${var.max_workers_instance}'
      queue:
        enabled: false
 
#asset_bundles_job3.yaml
resources:
  jobs:
    asset_bundles_job3:
      name: asset_bundles_job3
      description: >+
        Extracts Data form Apis.
      health:
        rules:
          - metric: RUN_DURATION_SECONDS
            op: GREATER_THAN
            value: 3600
      schedule:
        quartz_cron_expression: 0 0/15 * * * ?
        timezone_id: UTC
        pause_status: ${var.job_status}
      max_concurrent_runs: 1
      tasks:
        - task_key: task1
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script1.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
      tasks:
        - task_key: task2
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script2.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
  
      job_clusters:
        - job_cluster_key: '${var.job_cluster_name}'
          new_cluster:
            spark_version: 15.4.x-scala2.12
            spark_conf:
              spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
              spark.databricks.delta.retentionDurationCheck.enabled: false
              spark.databricks.hive.metastore.glueCatalog.enabled: true
              spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
              spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
            aws_attributes:
              first_on_demand: 1
              availability: SPOT_WITH_FALLBACK
              zone_id: auto
              instance_profile_arn: '${var.instance_profilearn}'
              spot_bid_price_percent: 100
              ebs_volume_count: 0
            node_type_id: '${var.node_type}'
            driver_node_type_id: '${var.node_type}'
            spark_env_vars:
              PYSPARK_PYTHON: /databricks/python3/bin/python3
            enable_elastic_disk: true
            data_security_mode: NONE
            runtime_engine: PHOTON
            autoscale:
              min_workers: 1
              max_workers: '${var.max_workers_instance}'
      queue:
        enabled: false
 
#databricks.yaml
bundle:
  name: mulitple_jobs
include:
  - resources/*.yml
variables:
  config_file:
    description: Config file for Respective Enivroment
    default: ../../../resources/config/dit.ini
  config_file_a:
    description: Config file for Respective Enivroment
    default: ../../../resources/config/a_dit.ini
  config_file_b:
    description: Config file for Respective Enivroment
    default: ../../../resources/config/b_dit.ini
  config_region:
    description: config region
    default: regiona
  glue_catalog:
    description: Glue Catalogid Details
  instance_profilearn:
    description: Instance Profile Arn Details
  instance_profilearn_a:
    description: Instance Profile Arn Details
  max_workers_instance:
    description: Max workers
    default: 2
  node_type:
    description: Ec2 Instance Type
    default: r5d.xlarge
  job_cluster_name:
    description: Name of the Job Cluster
    default: job_cluster
  max_retries:
    description: Max retries for the task
    default: 1
  intv_seconds:
    description: Retry interval millis
    default: 15000
  job_status:
    description: determines whether the jobs should be pause or unpaused by enviornment
    default: PAUSED
 
targets:
  dev:
    variables:
      config_file: ../../../resources/config/a_fit.ini
      config_file_b: ../../../resources/config/b_dit.ini
      glue_catalog: '123456'
      instance_profilearn: >-
        arn:aws:iam::123456:instance-profile/Databricks-role
      instance_profilearn_wisely: >-
        arn:aws:iam::123456:instance-profile/Databricks-role
      max_workers_instance: 2
      node_type: i3.2xlarge
      job_cluster_name: '${bundle.name}_Job'
      job_status: PAUSED
    mode: development
    default: true
    workspace:
      root_path: >-
        /Users/test1@gmail.com/.bundle/${bundle.name}/${bundle.target}
    run_as:
      user_name: test1@gmail.com
  
resources:
jobs:
  asset_bundles_job1:
permissions:
  - user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
  on_failure: [test1@gmail.com]
  on_duration_warning_threshold_exceeded: [test1@gmail.com]
  no_alert_for_skipped_runs: true
 
  asset_bundles_job2:
permissions:
  - user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
  on_failure: [test1@gmail.com]
  on_duration_warning_threshold_exceeded: [test1@gmail.com]
  no_alert_for_skipped_runs: true
 
  dev_ca:
      config_file: ../../../resources/config/a_fit.ini
      config_file_b: ../../../resources/config/b_dit.ini
      glue_catalog: '123456'
      instance_profilearn: >-
        arn:aws:iam::123456:instance-profile/Databricks-role
      instance_profilearn_wisely: >-
        arn:aws:iam::123456:instance-profile/Databricks-role
      max_workers_instance: 2
      node_type: i3.2xlarge
      job_cluster_name: '${bundle.name}_Job'
      job_status: PAUSED
    mode: development
    default: true
    workspace:
      root_path: >-
        /Users/test1@gmail.com/.bundle/${bundle.name}/${bundle.target}
    run_as:
      user_name: test1@gmail.com
 
resources:
jobs:
  asset_bundles_job2:
permissions:
  - user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
  on_failure: [test1@gmail.com]
  on_duration_warning_threshold_exceeded: [test1@gmail.com]
  no_alert_for_skipped_runs: true
example 2 :
#asset_bundles_job1.yaml
resources:
  jobs:
    asset_bundles_job1:
      name: asset_bundles_job1
      description: >+
        Extracts Data form Apis.
      health:
        rules:
          - metric: RUN_DURATION_SECONDS
            op: GREATER_THAN
            value: 3600
      schedule:
        quartz_cron_expression: 0 0/15 * * * ?
        timezone_id: UTC
        pause_status: ${var.job_status}
      max_concurrent_runs: 1
      tasks:
        - task_key: task1
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script1.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
      tasks:
        - task_key: task2
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script2.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
  
      job_clusters:
        - job_cluster_key: '${var.job_cluster_name}'
          new_cluster:
            spark_version: 15.4.x-scala2.12
            spark_conf:
              spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
              spark.databricks.delta.retentionDurationCheck.enabled: false
              spark.databricks.hive.metastore.glueCatalog.enabled: true
              spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
              spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
            aws_attributes:
              first_on_demand: 1
              availability: SPOT_WITH_FALLBACK
              zone_id: auto
              instance_profile_arn: '${var.instance_profilearn}'
              spot_bid_price_percent: 100
              ebs_volume_count: 0
            node_type_id: '${var.node_type}'
            driver_node_type_id: '${var.node_type}'
            spark_env_vars:
              PYSPARK_PYTHON: /databricks/python3/bin/python3
            enable_elastic_disk: true
            data_security_mode: NONE
            runtime_engine: PHOTON
            autoscale:
              min_workers: 1
              max_workers: '${var.max_workers_instance}'
      queue:
        enabled: false
 
#asset_bundles_job2.yaml
resources:
  jobs:
    asset_bundles_job2:
      name: asset_bundles_job2
      description: >+
        Extracts Data form Apis.
      health:
        rules:
          - metric: RUN_DURATION_SECONDS
            op: GREATER_THAN
            value: 3600
      schedule:
        quartz_cron_expression: 0 0/15 * * * ?
        timezone_id: UTC
        pause_status: ${var.job_status}
      max_concurrent_runs: 1
      tasks:
        - task_key: task1
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script1.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
      tasks:
        - task_key: task2
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script2.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
  
      job_clusters:
        - job_cluster_key: '${var.job_cluster_name}'
          new_cluster:
            spark_version: 15.4.x-scala2.12
            spark_conf:
              spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
              spark.databricks.delta.retentionDurationCheck.enabled: false
              spark.databricks.hive.metastore.glueCatalog.enabled: true
              spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
              spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
            aws_attributes:
              first_on_demand: 1
              availability: SPOT_WITH_FALLBACK
              zone_id: auto
              instance_profile_arn: '${var.instance_profilearn}'
              spot_bid_price_percent: 100
              ebs_volume_count: 0
            node_type_id: '${var.node_type}'
            driver_node_type_id: '${var.node_type}'
            spark_env_vars:
              PYSPARK_PYTHON: /databricks/python3/bin/python3
            enable_elastic_disk: true
            data_security_mode: NONE
            runtime_engine: PHOTON
            autoscale:
              min_workers: 1
              max_workers: '${var.max_workers_instance}'
      queue:
        enabled: false
 
#asset_bundles_job3.yaml
resources:
  jobs:
    asset_bundles_job3:
      name: asset_bundles_job3
      description: >+
        Extracts Data form Apis.
      health:
        rules:
          - metric: RUN_DURATION_SECONDS
            op: GREATER_THAN
            value: 3600
      schedule:
        quartz_cron_expression: 0 0/15 * * * ?
        timezone_id: UTC
        pause_status: ${var.job_status}
      max_concurrent_runs: 1
      tasks:
        - task_key: task1
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script1.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
      tasks:
        - task_key: task2
          job_cluster_key: '${var.job_cluster_name}'
          notebook_task:
            notebook_path: ../src/script2.py
            base_parameters:
              configFile: '${var.config_file}'
              config_region: '${var.config_region}'
          max_retries: '${var.max_retries}'
          min_retry_interval_millis: '${var.intv_seconds}'
  
      job_clusters:
        - job_cluster_key: '${var.job_cluster_name}'
          new_cluster:
            spark_version: 15.4.x-scala2.12
            spark_conf:
              spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
              spark.databricks.delta.retentionDurationCheck.enabled: false
              spark.databricks.hive.metastore.glueCatalog.enabled: true
              spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
              spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
            aws_attributes:
              first_on_demand: 1
              availability: SPOT_WITH_FALLBACK
              zone_id: auto
              instance_profile_arn: '${var.instance_profilearn}'
              spot_bid_price_percent: 100
              ebs_volume_count: 0
            node_type_id: '${var.node_type}'
            driver_node_type_id: '${var.node_type}'
            spark_env_vars:
              PYSPARK_PYTHON: /databricks/python3/bin/python3
            enable_elastic_disk: true
            data_security_mode: NONE
            runtime_engine: PHOTON
            autoscale:
              min_workers: 1
              max_workers: '${var.max_workers_instance}'
      queue:
        enabled: false
 
#databricks.yaml
bundle:
  name: mulitple_jobs
include:
  - resources/asset_bundles_job1.yml
  - resources/asset_bundles_job2.yml
  - resources/asset_bundles_job3.yml
variables:
  config_file:
    description: Config file for Respective Enivroment
    default: ../../../resources/config/dit.ini
  config_file_a:
    description: Config file for Respective Enivroment
    default: ../../../resources/config/a_dit.ini
  config_file_b:
    description: Config file for Respective Enivroment
    default: ../../../resources/config/b_dit.ini
  config_region:
    description: config region
    default: regiona
  glue_catalog:
    description: Glue Catalogid Details
  instance_profilearn:
    description: Instance Profile Arn Details
  instance_profilearn_a:
    description: Instance Profile Arn Details
  max_workers_instance:
    description: Max workers
    default: 2
  node_type:
    description: Ec2 Instance Type
    default: r5d.xlarge
  job_cluster_name:
    description: Name of the Job Cluster
    default: job_cluster
  max_retries:
    description: Max retries for the task
    default: 1
  intv_seconds:
    description: Retry interval millis
    default: 15000
  job_status:
    description: determines whether the jobs should be pause or unpaused by enviornment
    default: PAUSED
 
targets:
  dev:
    variables:
      config_file: ../../../resources/config/a_fit.ini
      config_file_b: ../../../resources/config/b_dit.ini
      glue_catalog: '123456'
      instance_profilearn: >-
        arn:aws:iam::123456:instance-profile/Databricks-role
      instance_profilearn_wisely: >-
        arn:aws:iam::123456:instance-profile/Databricks-role
      max_workers_instance: 2
      node_type: i3.2xlarge
      job_cluster_name: '${bundle.name}_Job'
      job_status: PAUSED
    mode: development
    default: true
    workspace:
      root_path: >-
        /Users/test1@gmail.com/.bundle/${bundle.name}/${bundle.target}
    run_as:
      user_name: test1@gmail.com
 
  dev_ca:
      config_file: ../../../resources/config/a_fit.ini
      config_file_b: ../../../resources/config/b_dit.ini
      glue_catalog: '123456'
      instance_profilearn: >-
        arn:aws:iam::123456:instance-profile/Databricks-role
      instance_profilearn_wisely: >-
        arn:aws:iam::123456:instance-profile/Databricks-role
      max_workers_instance: 2
      node_type: i3.2xlarge
      job_cluster_name: '${bundle.name}_Job'
      job_status: PAUSED
    mode: development
    default: true
    workspace:
      root_path: >-
        /Users/test1@gmail.com/.bundle/${bundle.name}/${bundle.target}
    run_as:
      user_name: test1@gmail.com
 
  resources:
    jobs:
      asset_bundles_job1:
        deployments: [dev]
        permissions:
          - user_name: test1@gmail.com
            level: CAN_MANAGEs
        email_notifications:
          on_failure: [test1@gmail.com]
          on_duration_warning_threshold_exceeded: [test1@gmail.com]
          no_alert_for_skipped_runs: true
 
      asset_bundles_job2:
        deployments: [dev]
        permissions:
          - user_name: test1@gmail.com
            level: CAN_MANAGEs
        email_notifications:
          on_failure: [test1@gmail.com]
          on_duration_warning_threshold_exceeded: [test1@gmail.com]
          no_alert_for_skipped_runs: true
 
      asset_bundles_job2:
        deployments: [dev_ca]
        permissions:
          - user_name: test1@gmail.com
            level: CAN_MANAGEs
        email_notifications:
          on_failure: [test1@gmail.com]
          on_duration_warning_threshold_exceeded: [test1@gmail.com]
          no_alert_for_skipped_runs: true
0 REPLIES 0

Connect with Databricks Users in Your Area

Join a Regional User Group to connect with local Databricks users. Events will be happening in your city, and you wonā€™t want to miss the chance to attend and share knowledge.

If there isnā€™t a group near you, start one and help create a community that brings people together.

Request a New Group