Facing issues databricks asset bundle, All jobs are getting Deployed into specified targets Instead of defined target following was files i am using resourser yaml and databricks yml file , i am using Databricks CLI v0.240.0 , i am using databricks bundle init default-python template, Can you please help to resolve this issue. Which is show stopper for my use cases.
example 1:
#asset_bundles_job1.yaml
resources:
jobs:
asset_bundles_job1:
name: asset_bundles_job1
description: >+
Extracts Data form Apis.
health:
rules:
- metric: RUN_DURATION_SECONDS
op: GREATER_THAN
value: 3600
schedule:
quartz_cron_expression: 0 0/15 * * * ?
timezone_id: UTC
pause_status: ${var.job_status}
max_concurrent_runs: 1
tasks:
- task_key: task1
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script1.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
tasks:
- task_key: task2
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script2.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
job_clusters:
- job_cluster_key: '${var.job_cluster_name}'
new_cluster:
spark_version: 15.4.x-scala2.12
spark_conf:
spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
spark.databricks.delta.retentionDurationCheck.enabled: false
spark.databricks.hive.metastore.glueCatalog.enabled: true
spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
aws_attributes:
first_on_demand: 1
availability: SPOT_WITH_FALLBACK
zone_id: auto
instance_profile_arn: '${var.instance_profilearn}'
spot_bid_price_percent: 100
ebs_volume_count: 0
node_type_id: '${var.node_type}'
driver_node_type_id: '${var.node_type}'
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: true
data_security_mode: NONE
runtime_engine: PHOTON
autoscale:
min_workers: 1
max_workers: '${var.max_workers_instance}'
queue:
enabled: false
#asset_bundles_job2.yaml
resources:
jobs:
asset_bundles_job2:
name: asset_bundles_job2
description: >+
Extracts Data form Apis.
health:
rules:
- metric: RUN_DURATION_SECONDS
op: GREATER_THAN
value: 3600
schedule:
quartz_cron_expression: 0 0/15 * * * ?
timezone_id: UTC
pause_status: ${var.job_status}
max_concurrent_runs: 1
tasks:
- task_key: task1
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script1.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
tasks:
- task_key: task2
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script2.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
job_clusters:
- job_cluster_key: '${var.job_cluster_name}'
new_cluster:
spark_version: 15.4.x-scala2.12
spark_conf:
spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
spark.databricks.delta.retentionDurationCheck.enabled: false
spark.databricks.hive.metastore.glueCatalog.enabled: true
spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
aws_attributes:
first_on_demand: 1
availability: SPOT_WITH_FALLBACK
zone_id: auto
instance_profile_arn: '${var.instance_profilearn}'
spot_bid_price_percent: 100
ebs_volume_count: 0
node_type_id: '${var.node_type}'
driver_node_type_id: '${var.node_type}'
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: true
data_security_mode: NONE
runtime_engine: PHOTON
autoscale:
min_workers: 1
max_workers: '${var.max_workers_instance}'
queue:
enabled: false
#asset_bundles_job3.yaml
resources:
jobs:
asset_bundles_job3:
name: asset_bundles_job3
description: >+
Extracts Data form Apis.
health:
rules:
- metric: RUN_DURATION_SECONDS
op: GREATER_THAN
value: 3600
schedule:
quartz_cron_expression: 0 0/15 * * * ?
timezone_id: UTC
pause_status: ${var.job_status}
max_concurrent_runs: 1
tasks:
- task_key: task1
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script1.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
tasks:
- task_key: task2
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script2.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
job_clusters:
- job_cluster_key: '${var.job_cluster_name}'
new_cluster:
spark_version: 15.4.x-scala2.12
spark_conf:
spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
spark.databricks.delta.retentionDurationCheck.enabled: false
spark.databricks.hive.metastore.glueCatalog.enabled: true
spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
aws_attributes:
first_on_demand: 1
availability: SPOT_WITH_FALLBACK
zone_id: auto
instance_profile_arn: '${var.instance_profilearn}'
spot_bid_price_percent: 100
ebs_volume_count: 0
node_type_id: '${var.node_type}'
driver_node_type_id: '${var.node_type}'
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: true
data_security_mode: NONE
runtime_engine: PHOTON
autoscale:
min_workers: 1
max_workers: '${var.max_workers_instance}'
queue:
enabled: false
#databricks.yaml
bundle:
name: mulitple_jobs
include:
- resources/*.yml
variables:
config_file:
description: Config file for Respective Enivroment
default: ../../../resources/config/dit.ini
config_file_a:
description: Config file for Respective Enivroment
default: ../../../resources/config/a_dit.ini
config_file_b:
description: Config file for Respective Enivroment
default: ../../../resources/config/b_dit.ini
config_region:
description: config region
default: regiona
glue_catalog:
description: Glue Catalogid Details
instance_profilearn:
description: Instance Profile Arn Details
instance_profilearn_a:
description: Instance Profile Arn Details
max_workers_instance:
description: Max workers
default: 2
node_type:
description: Ec2 Instance Type
default: r5d.xlarge
job_cluster_name:
description: Name of the Job Cluster
default: job_cluster
max_retries:
description: Max retries for the task
default: 1
intv_seconds:
description: Retry interval millis
default: 15000
job_status:
description: determines whether the jobs should be pause or unpaused by enviornment
default: PAUSED
targets:
dev:
variables:
config_file: ../../../resources/config/a_fit.ini
config_file_b: ../../../resources/config/b_dit.ini
glue_catalog: '123456'
instance_profilearn: >-
arn:aws:iam::123456:instance-profile/Databricks-role
instance_profilearn_wisely: >-
arn:aws:iam::123456:instance-profile/Databricks-role
max_workers_instance: 2
node_type: i3.2xlarge
job_cluster_name: '${bundle.name}_Job'
job_status: PAUSED
mode: development
default: true
workspace:
root_path: >-
/Users/test1@gmail.com/.bundle/${bundle.name}/${bundle.target}
run_as:
user_name: test1@gmail.com
resources:
jobs:
asset_bundles_job1:
permissions:
- user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
on_failure: [test1@gmail.com]
on_duration_warning_threshold_exceeded: [test1@gmail.com]
no_alert_for_skipped_runs: true
asset_bundles_job2:
permissions:
- user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
on_failure: [test1@gmail.com]
on_duration_warning_threshold_exceeded: [test1@gmail.com]
no_alert_for_skipped_runs: true
dev_ca:
config_file: ../../../resources/config/a_fit.ini
config_file_b: ../../../resources/config/b_dit.ini
glue_catalog: '123456'
instance_profilearn: >-
arn:aws:iam::123456:instance-profile/Databricks-role
instance_profilearn_wisely: >-
arn:aws:iam::123456:instance-profile/Databricks-role
max_workers_instance: 2
node_type: i3.2xlarge
job_cluster_name: '${bundle.name}_Job'
job_status: PAUSED
mode: development
default: true
workspace:
root_path: >-
/Users/test1@gmail.com/.bundle/${bundle.name}/${bundle.target}
run_as:
user_name: test1@gmail.com
resources:
jobs:
asset_bundles_job2:
permissions:
- user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
on_failure: [test1@gmail.com]
on_duration_warning_threshold_exceeded: [test1@gmail.com]
no_alert_for_skipped_runs: true
example 2 :
#asset_bundles_job1.yaml
resources:
jobs:
asset_bundles_job1:
name: asset_bundles_job1
description: >+
Extracts Data form Apis.
health:
rules:
- metric: RUN_DURATION_SECONDS
op: GREATER_THAN
value: 3600
schedule:
quartz_cron_expression: 0 0/15 * * * ?
timezone_id: UTC
pause_status: ${var.job_status}
max_concurrent_runs: 1
tasks:
- task_key: task1
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script1.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
tasks:
- task_key: task2
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script2.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
job_clusters:
- job_cluster_key: '${var.job_cluster_name}'
new_cluster:
spark_version: 15.4.x-scala2.12
spark_conf:
spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
spark.databricks.delta.retentionDurationCheck.enabled: false
spark.databricks.hive.metastore.glueCatalog.enabled: true
spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
aws_attributes:
first_on_demand: 1
availability: SPOT_WITH_FALLBACK
zone_id: auto
instance_profile_arn: '${var.instance_profilearn}'
spot_bid_price_percent: 100
ebs_volume_count: 0
node_type_id: '${var.node_type}'
driver_node_type_id: '${var.node_type}'
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: true
data_security_mode: NONE
runtime_engine: PHOTON
autoscale:
min_workers: 1
max_workers: '${var.max_workers_instance}'
queue:
enabled: false
#asset_bundles_job2.yaml
resources:
jobs:
asset_bundles_job2:
name: asset_bundles_job2
description: >+
Extracts Data form Apis.
health:
rules:
- metric: RUN_DURATION_SECONDS
op: GREATER_THAN
value: 3600
schedule:
quartz_cron_expression: 0 0/15 * * * ?
timezone_id: UTC
pause_status: ${var.job_status}
max_concurrent_runs: 1
tasks:
- task_key: task1
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script1.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
tasks:
- task_key: task2
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script2.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
job_clusters:
- job_cluster_key: '${var.job_cluster_name}'
new_cluster:
spark_version: 15.4.x-scala2.12
spark_conf:
spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
spark.databricks.delta.retentionDurationCheck.enabled: false
spark.databricks.hive.metastore.glueCatalog.enabled: true
spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
aws_attributes:
first_on_demand: 1
availability: SPOT_WITH_FALLBACK
zone_id: auto
instance_profile_arn: '${var.instance_profilearn}'
spot_bid_price_percent: 100
ebs_volume_count: 0
node_type_id: '${var.node_type}'
driver_node_type_id: '${var.node_type}'
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: true
data_security_mode: NONE
runtime_engine: PHOTON
autoscale:
min_workers: 1
max_workers: '${var.max_workers_instance}'
queue:
enabled: false
#asset_bundles_job3.yaml
resources:
jobs:
asset_bundles_job3:
name: asset_bundles_job3
description: >+
Extracts Data form Apis.
health:
rules:
- metric: RUN_DURATION_SECONDS
op: GREATER_THAN
value: 3600
schedule:
quartz_cron_expression: 0 0/15 * * * ?
timezone_id: UTC
pause_status: ${var.job_status}
max_concurrent_runs: 1
tasks:
- task_key: task1
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script1.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
tasks:
- task_key: task2
job_cluster_key: '${var.job_cluster_name}'
notebook_task:
notebook_path: ../src/script2.py
base_parameters:
configFile: '${var.config_file}'
config_region: '${var.config_region}'
max_retries: '${var.max_retries}'
min_retry_interval_millis: '${var.intv_seconds}'
job_clusters:
- job_cluster_key: '${var.job_cluster_name}'
new_cluster:
spark_version: 15.4.x-scala2.12
spark_conf:
spark.databricks.repl.allowedLanguages: 'sql,python,r,scala'
spark.databricks.delta.retentionDurationCheck.enabled: false
spark.databricks.hive.metastore.glueCatalog.enabled: true
spark.hadoop.fs.s3a.acl.default: BucketOwnerFullControl
spark.hadoop.hive.metastore.glue.catalogid: '${var.glue_catalog}'
aws_attributes:
first_on_demand: 1
availability: SPOT_WITH_FALLBACK
zone_id: auto
instance_profile_arn: '${var.instance_profilearn}'
spot_bid_price_percent: 100
ebs_volume_count: 0
node_type_id: '${var.node_type}'
driver_node_type_id: '${var.node_type}'
spark_env_vars:
PYSPARK_PYTHON: /databricks/python3/bin/python3
enable_elastic_disk: true
data_security_mode: NONE
runtime_engine: PHOTON
autoscale:
min_workers: 1
max_workers: '${var.max_workers_instance}'
queue:
enabled: false
#databricks.yaml
bundle:
name: mulitple_jobs
include:
- resources/asset_bundles_job1.yml
- resources/asset_bundles_job2.yml
- resources/asset_bundles_job3.yml
variables:
config_file:
description: Config file for Respective Enivroment
default: ../../../resources/config/dit.ini
config_file_a:
description: Config file for Respective Enivroment
default: ../../../resources/config/a_dit.ini
config_file_b:
description: Config file for Respective Enivroment
default: ../../../resources/config/b_dit.ini
config_region:
description: config region
default: regiona
glue_catalog:
description: Glue Catalogid Details
instance_profilearn:
description: Instance Profile Arn Details
instance_profilearn_a:
description: Instance Profile Arn Details
max_workers_instance:
description: Max workers
default: 2
node_type:
description: Ec2 Instance Type
default: r5d.xlarge
job_cluster_name:
description: Name of the Job Cluster
default: job_cluster
max_retries:
description: Max retries for the task
default: 1
intv_seconds:
description: Retry interval millis
default: 15000
job_status:
description: determines whether the jobs should be pause or unpaused by enviornment
default: PAUSED
targets:
dev:
variables:
config_file: ../../../resources/config/a_fit.ini
config_file_b: ../../../resources/config/b_dit.ini
glue_catalog: '123456'
instance_profilearn: >-
arn:aws:iam::123456:instance-profile/Databricks-role
instance_profilearn_wisely: >-
arn:aws:iam::123456:instance-profile/Databricks-role
max_workers_instance: 2
node_type: i3.2xlarge
job_cluster_name: '${bundle.name}_Job'
job_status: PAUSED
mode: development
default: true
workspace:
root_path: >-
/Users/test1@gmail.com/.bundle/${bundle.name}/${bundle.target}
run_as:
user_name: test1@gmail.com
dev_ca:
config_file: ../../../resources/config/a_fit.ini
config_file_b: ../../../resources/config/b_dit.ini
glue_catalog: '123456'
instance_profilearn: >-
arn:aws:iam::123456:instance-profile/Databricks-role
instance_profilearn_wisely: >-
arn:aws:iam::123456:instance-profile/Databricks-role
max_workers_instance: 2
node_type: i3.2xlarge
job_cluster_name: '${bundle.name}_Job'
job_status: PAUSED
mode: development
default: true
workspace:
root_path: >-
/Users/test1@gmail.com/.bundle/${bundle.name}/${bundle.target}
run_as:
user_name: test1@gmail.com
resources:
jobs:
asset_bundles_job1:
deployments: [dev]
permissions:
- user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
on_failure: [test1@gmail.com]
on_duration_warning_threshold_exceeded: [test1@gmail.com]
no_alert_for_skipped_runs: true
asset_bundles_job2:
deployments: [dev]
permissions:
- user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
on_failure: [test1@gmail.com]
on_duration_warning_threshold_exceeded: [test1@gmail.com]
no_alert_for_skipped_runs: true
asset_bundles_job2:
deployments: [dev_ca]
permissions:
- user_name: test1@gmail.com
level: CAN_MANAGEs
email_notifications:
on_failure: [test1@gmail.com]
on_duration_warning_threshold_exceeded: [test1@gmail.com]
no_alert_for_skipped_runs: true