Hi @Daniel_dlh ,
you can try to use YAML anchor, have a look at this example:
before:
databricks.yml
variables:
my_cluster:
description: "Base cluster configuration"
default:
spark_version: "15.4.x-scala2.12"
node_type_id: "Standard_DS3_v2"
num_workers: 1
# my_jobs.yml
resources:
jobs:
sample_etl_job:
job_clusters:
- job_cluster_key: etl_cluster
new_cluster:
<<: ${var.my_cluster} # ERROR: map merge requires map or sequence of maps
custom_tags:
environment: ${bundle.target}
with anchor:
# Define base cluster as YAML anchor
definitions:
base_cluster: &base_cluster
spark_version: "15.4.x-scala2.12"
node_type_id: "Standard_DS3_v2"
num_workers: 1
spark_conf:
spark.databricks.cluster.profile: "serverless"
spark.master: "local[*, 4]"
resources:
jobs:
sample_etl_job:
name: sample_etl_job
job_clusters:
- job_cluster_key: etl_cluster
new_cluster:
<<: *base_cluster
custom_tags:
environment: ${bundle.target}
project: dlt_telco
tasks:
- task_key: etl_task
job_cluster_key: etl_cluster
spark_python_task:
python_file: ../src/sample_etl.py
schedule:
quartz_cron_expression: "0 0 1 * * ?"
timezone_id: "UTC"
max_concurrent_runs: 1
timeout_seconds: 3600
or explicit fields:
# Serverless Jobs Configuration
resources:
jobs:
sample_etl_job:
name: sample_etl_job
job_clusters:
- job_cluster_key: etl_cluster
new_cluster:
spark_version: ${var.my_cluster.spark_version}
node_type_id: ${var.my_cluster.node_type_id}
num_workers: ${var.my_cluster.num_workers}
spark_conf: ${var.my_cluster.spark_conf}
custom_tags:
environment: ${bundle.target}
project: dlt_telco
tasks:
- task_key: etl_task
job_cluster_key: etl_cluster
spark_python_task:
python_file: ../src/sample_etl.py
schedule:
quartz_cron_expression: "0 0 1 * * ?"
timezone_id: "UTC"
max_concurrent_runs: 1
timeout_seconds: 3600
The key concept: YAML anchors only work within a single file, so if you want to share an anchor across multiple jobs, you must put the anchor definition AND all the jobs that use it in the SAME file