<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Move whole workflow from Dev to Prod in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10992#M6033</link>
    <description>&lt;P&gt;&lt;B&gt;Option 1:&lt;/B&gt;&lt;/P&gt;&lt;P&gt;You can use Terraform. with Azure Devops to automate the deployments:&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.databricks.com/blog/2022/12/5/databricks-workflows-through-terraform.html" target="test_blank"&gt;https://www.databricks.com/blog/2022/12/5/databricks-workflows-through-terraform.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;Option 2:&lt;/B&gt;&lt;/P&gt;&lt;P&gt;You can use Databricks to automate the deployments:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt; &lt;A href="https://docs.databricks.com/dev-tools/cli/jobs-cli.html" alt="https://docs.databricks.com/dev-tools/cli/jobs-cli.html" target="_blank"&gt;Jobs CLI | Databricks on AWS&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Mon, 23 Jan 2023 11:06:34 GMT</pubDate>
    <dc:creator>SRK</dc:creator>
    <dc:date>2023-01-23T11:06:34Z</dc:date>
    <item>
      <title>Move whole workflow from Dev to Prod</title>
      <link>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10989#M6030</link>
      <description>&lt;P&gt;I have a workflow created in Dev, now I want to move the whole thing to prod and schedule it. The workflow has multiple notebooks, dependent libraries, parameters and such. How to move the whole thing to prod, instead of moving each notebooks and recreate the workflow again in PROD? Or, what are the other options here? Thanks in advance! &lt;/P&gt;</description>
      <pubDate>Sat, 21 Jan 2023 13:21:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10989#M6030</guid>
      <dc:creator>KKo</dc:creator>
      <dc:date>2023-01-21T13:21:59Z</dc:date>
    </item>
    <item>
      <title>Re: Move whole workflow from Dev to Prod</title>
      <link>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10990#M6031</link>
      <description>&lt;P&gt;databricsk CLI will be helpful here, if the Prod is in Different Shard, if you don't know this then you have to do it manually by downloading DBC files to other shards.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Sat, 21 Jan 2023 14:25:43 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10990#M6031</guid>
      <dc:creator>Aviral-Bhardwaj</dc:creator>
      <dc:date>2023-01-21T14:25:43Z</dc:date>
    </item>
    <item>
      <title>Re: Move whole workflow from Dev to Prod</title>
      <link>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10991#M6032</link>
      <description>&lt;P&gt;@Kris Koirala​&amp;nbsp;&lt;/P&gt;&lt;P&gt;You'll need CI/CD pipelines to do that programmatically. You can use Terraform/AzureARM/Bicep or any other tool that you (or you're planning to) use.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;For example - Azure + Azure DevOps:&lt;/P&gt;&lt;P&gt;&lt;A href="https://learn.microsoft.com/en-us/azure/databricks/dev-tools/ci-cd/ci-cd-azure-devops" target="test_blank"&gt;https://learn.microsoft.com/en-us/azure/databricks/dev-tools/ci-cd/ci-cd-azure-devops&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Jan 2023 09:36:33 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10991#M6032</guid>
      <dc:creator>daniel_sahal</dc:creator>
      <dc:date>2023-01-23T09:36:33Z</dc:date>
    </item>
    <item>
      <title>Re: Move whole workflow from Dev to Prod</title>
      <link>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10992#M6033</link>
      <description>&lt;P&gt;&lt;B&gt;Option 1:&lt;/B&gt;&lt;/P&gt;&lt;P&gt;You can use Terraform. with Azure Devops to automate the deployments:&lt;/P&gt;&lt;P&gt;&lt;A href="https://www.databricks.com/blog/2022/12/5/databricks-workflows-through-terraform.html" target="test_blank"&gt;https://www.databricks.com/blog/2022/12/5/databricks-workflows-through-terraform.html&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;B&gt;Option 2:&lt;/B&gt;&lt;/P&gt;&lt;P&gt;You can use Databricks to automate the deployments:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt; &lt;A href="https://docs.databricks.com/dev-tools/cli/jobs-cli.html" alt="https://docs.databricks.com/dev-tools/cli/jobs-cli.html" target="_blank"&gt;Jobs CLI | Databricks on AWS&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Jan 2023 11:06:34 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10992#M6033</guid>
      <dc:creator>SRK</dc:creator>
      <dc:date>2023-01-23T11:06:34Z</dc:date>
    </item>
    <item>
      <title>Re: Move whole workflow from Dev to Prod</title>
      <link>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10993#M6034</link>
      <description>&lt;P&gt;Alternatively, you can just click the three dots options in workflow and choose "view JSON" and save JSON. Then use it in the Rest API call to create new workflow/job using that JSON (but usually some part needs to be removed)&lt;/P&gt;</description>
      <pubDate>Mon, 23 Jan 2023 13:46:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/10993#M6034</guid>
      <dc:creator>Hubert-Dudek</dc:creator>
      <dc:date>2023-01-23T13:46:42Z</dc:date>
    </item>
    <item>
      <title>Re: Move whole workflow from Dev to Prod</title>
      <link>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/70707#M34132</link>
      <description>&lt;P&gt;I ended up creating a python script to just do the export, here is the code below.&lt;BR /&gt;It will match on Job name, if it matches it will update otherwise it will import.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;import requests

source_token = ''
source_instance = 'adb-000000000000000.00.azuredatabricks.net'
target_token = ''
target_instance = 'adb-000000000000000.00.azuredatabricks.net'

target_cluster_id = "0000-000000-x00x0xxx"
target_owner_email = 'produseremail'


# Headers for API requests
headers_source = {
    'Authorization': f'Bearer {source_token}',
    'Content-Type': 'application/json'
}

headers_target = {
    'Authorization': f'Bearer {target_token}',
    'Content-Type': 'application/json'
}

# Function to list jobs in the environment
def list_jobs(instance, headers):
    response = requests.get(f'https://{instance}/api/2.0/jobs/list', headers=headers)
    response.raise_for_status()
    return response.json().get('jobs', [])

# Function to get job configuration
def get_job_config(instance, headers, job_id):
    response = requests.get(f'https://{instance}/api/2.0/jobs/get?job_id={job_id}', headers=headers)
    response.raise_for_status()
    return response.json()

# Function to create job in the target environment
def create_job(instance, headers, job_config):
    response = requests.post(f'https://{instance}/api/2.0/jobs/create', headers=headers, json=job_config)
    response.raise_for_status()
    return response.json()

# Function to update job in the target environment
def update_job(instance, headers, job_id, job_config):
    response = requests.post(f'https://{instance}/api/2.0/jobs/update?job_id={job_id}', headers=headers, json=job_config)
    response.raise_for_status()
    return response.json()

# Function to filter jobs by name
def filter_jobs_by_name(jobs, name):
    return [job for job in jobs if job['settings']['name'] == name]

# Function to export a job
def export_job(job, target_jobs, target_cluster_id, display_job_config = False) -&amp;gt; str:
    error_message = ''
    try:
        job_id = job['job_id']
        job_name = job['settings']['name']

        print(f"\nExporting job: {job_name}") 

        # Get job configuration from the source environment
        job_config = get_job_config(source_instance, headers_source, job_id)
       
        # Check if the job already exists in the target environment
        target_job = filter_jobs_by_name(target_jobs, job_name)

        # Prepare the job configuration
        job_config.pop('job_id', None)
        job_config['creator_user_name'] = target_owner_email
        job_config['run_as_user_name'] = target_owner_email

        job_settings = job_config.get('settings', {})
        tasks = job_settings.pop('tasks', [])

        # Remove the settings
        job_config.pop('settings', None)

        # Copy settings contents to root level
        for key, value in job_settings.items():
            job_config[key] = value

        # Update cluster ID in tasks
        for task in tasks:
            if 'existing_cluster_id' in task:
                task['existing_cluster_id'] = target_cluster_id
        # Add tasks to the root level
        job_config['tasks'] = tasks

        if target_job:
            target_job_id = target_job[0]['job_id']
            update_job(target_instance, headers_target, target_job_id, job_config)
        else:
            create_job(target_instance, headers_target, job_config)
    except Exception as e:
        error_message = f"Job with name '{job_name}' failed to export. Error: {e}"
        print(error_message)
    finally:
        print(f"Finished processing: {job_name}")    
        if display_job_config:
            print(f"\nOriginal job: {job_config}\n")
            print(f"Modified job: {job_config}\n") 
        return error_message


# Function to export all jobs
def export_all_jobs(display_job_config = False):

    source_jobs = list_jobs(source_instance, headers_source)
    target_jobs = list_jobs(target_instance, headers_target)

    for job in source_jobs:
      export_job(job, target_jobs, target_cluster_id, display_job_config)
    

# Function to export jobs by name
def export_jobs_by_name(job_name):
    source_jobs = list_jobs(source_instance, headers_source)
    target_jobs = list_jobs(target_instance, headers_target)

    filtered_jobs = filter_jobs_by_name(source_jobs, job_name)

    if not filtered_jobs:
        print(f"No jobs found with name '{job_name}' in the source environment.")
        return

    for job in filtered_jobs:
        export_job(job, target_jobs, target_cluster_id)
                
    print(f"Jobs with name '{job_name}' have been processed.")


export_all_jobs()
#export_jobs_by_name("job-name")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 27 May 2024 11:20:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/move-whole-workflow-from-dev-to-prod/m-p/70707#M34132</guid>
      <dc:creator>mkassa</dc:creator>
      <dc:date>2024-05-27T11:20:18Z</dc:date>
    </item>
  </channel>
</rss>

