<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to deploy to Databricks Assets Bundle from Azure DevOps using Service Principal? in Administration &amp; Architecture</title>
    <link>https://community.databricks.com/t5/administration-architecture/how-to-deploy-to-databricks-assets-bundle-from-azure-devops/m-p/93897#M2053</link>
    <description>&lt;P&gt;I have a CI/CD process that after a Pull Request (PR) to main it deploys to staging.&lt;/P&gt;&lt;P&gt;It works using a Personal Access Token using Azure Pipelines.&lt;/P&gt;&lt;P&gt;From local, deploying using Service Principal works (&lt;A href="https://community.databricks.com/t5/administration-architecture/use-a-service-principal-token-instead-of-personal-access-token/td-p/91629" target="_blank"&gt;https://community.databricks.com/t5/administration-architecture/use-a-service-principal-token-instead-of-personal-access-token/td-p/91629&lt;/A&gt;).&lt;/P&gt;&lt;P&gt;But I want to deploy from Azure Pipelines using the Service Principal. How can I do that?&lt;/P&gt;&lt;P&gt;If it helps, I have the local Azure Pipelines yaml.&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;    jobs:
      - job: onMainPullRequestJob
        workspace:
          clean: all
        steps:
          - task: UsePythonVersion@0
            displayName: Set up Python 3.10
            inputs:
              versionSpec: '3.10'

          - script:  curl -sSL https://install.python-poetry.org | python - --version 1.8.3
            displayName: Install Poetry

          - script: poetry config http-basic.$(ARTIFACT-FEED) $(USERNAME-FEED) $(System.AccessToken)
            displayName: Configure credentials to Feed

          - script: poetry install --with dev,test
            displayName: Install dependencies

          - script: poetry run pre-commit run --all-files
            displayName:  Run pre-commit check

          - script: poetry run pytest tests/unit -s -vvv
            displayName: Run unit tests

          - script: curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh
            displayName: Install Databricks CLI

          - script: |
              databricks bundle deploy --target staging
            env:
              DATABRICKS_HOST: $(DATABRICKS-HOST)
              DATABRICKS_TOKEN: $(DATABRICKS-TOKEN)
            displayName: Deploy the job

          - script: |
              databricks bundle run --target staging dab_job
            env:
              DATABRICKS_HOST: $(DATABRICKS-HOST)
              DATABRICKS_TOKEN: $(DATABRICKS-TOKEN)
            displayName: Launch worflow&lt;/LI-CODE&gt;</description>
    <pubDate>Mon, 14 Oct 2024 12:55:49 GMT</pubDate>
    <dc:creator>PabloCSD</dc:creator>
    <dc:date>2024-10-14T12:55:49Z</dc:date>
    <item>
      <title>How to deploy to Databricks Assets Bundle from Azure DevOps using Service Principal?</title>
      <link>https://community.databricks.com/t5/administration-architecture/how-to-deploy-to-databricks-assets-bundle-from-azure-devops/m-p/93897#M2053</link>
      <description>&lt;P&gt;I have a CI/CD process that after a Pull Request (PR) to main it deploys to staging.&lt;/P&gt;&lt;P&gt;It works using a Personal Access Token using Azure Pipelines.&lt;/P&gt;&lt;P&gt;From local, deploying using Service Principal works (&lt;A href="https://community.databricks.com/t5/administration-architecture/use-a-service-principal-token-instead-of-personal-access-token/td-p/91629" target="_blank"&gt;https://community.databricks.com/t5/administration-architecture/use-a-service-principal-token-instead-of-personal-access-token/td-p/91629&lt;/A&gt;).&lt;/P&gt;&lt;P&gt;But I want to deploy from Azure Pipelines using the Service Principal. How can I do that?&lt;/P&gt;&lt;P&gt;If it helps, I have the local Azure Pipelines yaml.&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;    jobs:
      - job: onMainPullRequestJob
        workspace:
          clean: all
        steps:
          - task: UsePythonVersion@0
            displayName: Set up Python 3.10
            inputs:
              versionSpec: '3.10'

          - script:  curl -sSL https://install.python-poetry.org | python - --version 1.8.3
            displayName: Install Poetry

          - script: poetry config http-basic.$(ARTIFACT-FEED) $(USERNAME-FEED) $(System.AccessToken)
            displayName: Configure credentials to Feed

          - script: poetry install --with dev,test
            displayName: Install dependencies

          - script: poetry run pre-commit run --all-files
            displayName:  Run pre-commit check

          - script: poetry run pytest tests/unit -s -vvv
            displayName: Run unit tests

          - script: curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh
            displayName: Install Databricks CLI

          - script: |
              databricks bundle deploy --target staging
            env:
              DATABRICKS_HOST: $(DATABRICKS-HOST)
              DATABRICKS_TOKEN: $(DATABRICKS-TOKEN)
            displayName: Deploy the job

          - script: |
              databricks bundle run --target staging dab_job
            env:
              DATABRICKS_HOST: $(DATABRICKS-HOST)
              DATABRICKS_TOKEN: $(DATABRICKS-TOKEN)
            displayName: Launch worflow&lt;/LI-CODE&gt;</description>
      <pubDate>Mon, 14 Oct 2024 12:55:49 GMT</pubDate>
      <guid>https://community.databricks.com/t5/administration-architecture/how-to-deploy-to-databricks-assets-bundle-from-azure-devops/m-p/93897#M2053</guid>
      <dc:creator>PabloCSD</dc:creator>
      <dc:date>2024-10-14T12:55:49Z</dc:date>
    </item>
    <item>
      <title>Re: How to deploy to Databricks Assets Bundle from Azure DevOps using Service Principal?</title>
      <link>https://community.databricks.com/t5/administration-architecture/how-to-deploy-to-databricks-assets-bundle-from-azure-devops/m-p/94199#M2079</link>
      <description>&lt;P&gt;I needed to deploy a job using CI/CD Azure Pipelines &lt;STRONG&gt;without using the OAuth&lt;/STRONG&gt;, &lt;EM&gt;this is the way&lt;/EM&gt;:&lt;/P&gt;&lt;P&gt;First you need to have configured the Service Principal, for that you need to generate it in your workspace with this you will have:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;A host: Which is your workspace url which follows this pattern: &lt;A href="https://adb-XXXXXXXXXXXXXXXX.YY.azuredatabricks.net/" target="_blank" rel="noopener"&gt;https://adb-XXXXXXXXXXXXXXXX.YY.azuredatabricks.net/&lt;/A&gt;&lt;/LI&gt;&lt;LI&gt;A client_id: Which will be generated when you generate a secret&lt;/LI&gt;&lt;LI&gt;A client_secret: Which will be generated when you generate a secret&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;Now you need to do some pre-requisites:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Have an Azure Pipeline with your project with its corresponding azure-pipeline.yaml for CI/CD&lt;/LI&gt;&lt;LI&gt;Configure to the pipeline a variable for the HOST, the CLIENT_SECRET and the CLIENT_ID (in the below they are like these: YOUR-SERVICE-PRINCIPAL-CLIENT-ID, YOUR-SERVICE-PRINCIPAL-SECRET and YOUR-DATABRICKS-HOST).&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;Having this configured I show an azure-pipelines.yml file with the configurations I have:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;pool: Azure Pipelines

trigger: none

pr:
  autoCancel: true
  branches:
    include:
      - main

stages:
  - stage: onDevPullRequest
    # Similar to onMainPullRequest
    ...

  - stage: onMainPullRequest
    # This stage is triggered when a PR is created into main
    # For instance, if you create a PR from feature/1.0.0 to main, this stage will be triggered
    # This stage will be skipped if the PR is created from release/* to main
    condition: |
      and(
        not(startsWith(variables['System.PullRequest.SourceBranch'], 'refs/heads/release')),
        startsWith(variables['System.PullRequest.TargetBranch'], 'refs/heads/main')
      )
    jobs:
      - job: onMainPullRequestJob
        workspace:
          clean: all
        steps:
          - task: UsePythonVersion@0
            displayName: Set up Python 3.10
            inputs:
              versionSpec: '3.10'

          - script:  curl -sSL https://install.python-poetry.org | python - --version 1.8.3
            displayName: Install Poetry

          - script: poetry config http-basic.$(ARTIFACT-FEED) $(USERNAME-FEED) $(System.AccessToken)
            displayName: Configure credential with the artifact feed

          - script: poetry install --with dev,test
            displayName: Install dependencies

          - script: poetry run pre-commit run --all-files
            displayName:  Run pre-commit check

          - script: poetry run pytest tests/unit -s -vvv
            displayName: Run unit tests

          - bash: |
              # Install Databricks CLI
              curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh

              # Verify installation
              databricks --version

              # Create databricks config file
              file="~/.databrickscfg"

              # If exists, remove it
              if [ -f "$file" ] ; then
                  rm "$file"
              fi

              # Define the profile
              echo "[YOUR_WORKSPACE_PROFILE]" &amp;gt;&amp;gt; ~/.databrickscfg
              echo "host = $(YOUR-DATABRICKS-HOST)" &amp;gt;&amp;gt; ~/.databrickscfg
              echo "token = $(YOUR-SERVICE-PRINCIPAL-SECRET)" &amp;gt;&amp;gt; ~/.databrickscfg

              # Show the file
              cat ~/.databrickscfg

              # Export token
              export DATABRICKS_TOKEN_SP=$(curl --request POST \
                --url https://adb-XXXXXXXXXXXXXXXX.YY.azuredatabricks.net/oidc/v1/token \
                --user "$(YOUR-SERVICE-PRINCIPAL-CLIENT-ID):$(YOUR-SERVICE-PRINCIPAL-SECRET)" \
                --data 'grant_type=client_credentials&amp;amp;scope=all-apis' | jq -r '.access_token'
                )

              # Export for the usage of the next task
              echo "##vso[task.setvariable variable=DATABRICKS_TOKEN_SP]$DATABRICKS_TOKEN_SP"
            displayName: Install Databricks CLI and create config file

          - script: |
              databricks bundle deploy --target staging
            env:
              DATABRICKS_HOST: $(DATABRICKS-HOST)
              DATABRICKS_TOKEN: $(DATABRICKS_TOKEN_SP)
            displayName: Deploy the job

          - script: |
              databricks bundle run --target staging dab_your_workflow
            env:
              DATABRICKS_HOST: $(DATABRICKS-HOST)
              DATABRICKS_TOKEN: $(DATABRICKS_TOKEN_SP)
            displayName: Launch worflow

  - stage: onRelease
    # Similar to onMainPullRequest
    ...&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Steps:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Install the Databricks CLI&lt;/LI&gt;&lt;LI&gt;Generate a Databricks Configuration File for the profile you are deploying: ~/.databrickscfg&lt;/LI&gt;&lt;LI&gt;Generate a temporal token for deploying the job&lt;/LI&gt;&lt;LI&gt;Export the variable for using it in the deploying task&lt;/LI&gt;&lt;LI&gt;Deploy&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;With this way we assure to use the same environment variable "DATABRICKS-TOKEN-SP" in the next task. Also, we don't have to use OAuth for configuring the CI/CD which is a great step for user independent CI/CD processes.&lt;/P&gt;&lt;P&gt;If you need a template for the used databricks.yml here it is:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;bundle:
  name: dab_your_workflow

# Declare to Databricks Assets Bundles that this is a Python project
# This is the interaction with the "pyproject.toml" file
artifacts:
  default:
    type: whl
    build: poetry build
    path: .

resources:
  jobs:
    dab_your_workflow:
      name: dab_your_workflow
      tasks:
        - task_key: your_workflow_task
          job_cluster_key: ${bundle.target}-${bundle.name}-job-cluster
          python_wheel_task:
             package_name: dab_your_workflow
             entry_point: your_workflow_entry_point
             parameters:
               - --conf-file
               - "/Workspace${workspace.root_path}/files/conf/tasks/your_workflow_task_config.yml"
               - --env
               - ${bundle.target}
          libraries:
            - whl: ./dist/*.whl

targets:
  dev:
    # Similar to Staging
    ...

  prod:
    # Similar to Staging
    ...

  staging:
    mode: production
    workspace:
      host: https://adb-XXXXXXXXXXXXXXXX.YY.azuredatabricks.net/
    run_as:
      service_principal_name: AAAAAAAA-BBBB-CCCC-DDDD-EEEEEEEEEEEE
    resources:
      jobs:
        dab_your_workflow:
          job_clusters:
            - job_cluster_key: ${bundle.target}-${bundle.name}-job-cluster
              new_cluster:
                num_workers: 2
                spark_version: "14.3.x-cpu-ml-scala2.12"  # Specify the Spark version
                spark_conf:
                  # You can specify the Spark configuration
                node_type_id: Standard_F8 # Specify the node type
                spark_env_vars:
                  # You can specify the Spark environment variables for example:
                  PIP_EXTRA_INDEX_URL:  "{{secrets/kv-your-key-vault/your-url-for-pip-extra-index-url}}"&lt;/LI-CODE&gt;&lt;P&gt;I believe that there are ways to "improve this bonsai answer", so anyway to improve it, please comment it.&lt;/P&gt;</description>
      <pubDate>Tue, 15 Oct 2024 21:08:14 GMT</pubDate>
      <guid>https://community.databricks.com/t5/administration-architecture/how-to-deploy-to-databricks-assets-bundle-from-azure-devops/m-p/94199#M2079</guid>
      <dc:creator>PabloCSD</dc:creator>
      <dc:date>2024-10-15T21:08:14Z</dc:date>
    </item>
  </channel>
</rss>

