Have you read about following approach before?
Repository Structure Options
1. Monorepo with Multiple Bundles
repo-root/
├── .github/
│ └── workflows/
│ ├── bundle-ci.yml
│ └── bundle-deploy.yml
├── bundles/
│ ├── data-engineering/
│ │ ├── databricks.yml
│ │ ├── src/
│ │ └── tests/
│ ├── ml-workflows/
│ │ ├── databricks.yml
│ │ ├── src/
│ │ └── tests/
│ └── analytics/
│ ├── databricks.yml
│ ├── src/
│ └── tests/
└── scripts/
└── detect-changed-bundles.sh
2. Bundle-per-Directory with Shared Resources
repo-root/
├── .github/workflows/
├── shared/
│ ├── libraries/
│ └── configurations/
├── bundle-1/
│ └── databricks.yml
├── bundle-2/
│ └── databricks.yml
└── bundle-3/
└── databricks.yml
Dynamic Bundle Detection Strategy
The key challenge you're facing is detecting which bundles changed in a PR. Here's a
practical approach:
GitHub Actions Workflow with Dynamic Detection
name: Deploy Databricks Bundles
on:
pull_request:
branches: [main, develop]
push:
branches: [main, develop]
jobs:
detect-changed-bundles:
runs-on: ubuntu-latest
outputs:
bundles: ${{ steps.set-bundles.outputs.bundles }}
has-changes: ${{ steps.set-bundles.outputs.has-changes }}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0 # Important for git diff
- name: Detect changed bundles
id: set-bundles
run: |
# Get the base branch for comparison
if [ "${{ github.event_name }}" == "pull_request" ]; then
BASE_REF="${{ github.event.pull_request.base.sha }}"
else
BASE_REF="HEAD~1"
fi
# Find all bundle directories (containing databricks.yml)
ALL_BUNDLES=$(find bundles -name "databricks.yml" -exec dirname {} \; | sort
-u)
# Detect which bundles have changes
CHANGED_BUNDLES=""
for bundle_dir in $ALL_BUNDLES; do
if git diff --name-only $BASE_REF HEAD | grep -q "^${bundle_dir}/"; then
bundle_name=$(basename $bundle_dir)
CHANGED_BUNDLES="$CHANGED_BUNDLES\"$bundle_name\","
fi
done
# Format as JSON array for matrix strategy
if [ -n "$CHANGED_BUNDLES" ]; then
CHANGED_BUNDLES="[${CHANGED_BUNDLES%,}]"
echo "bundles=$CHANGED_BUNDLES" >> $GITHUB_OUTPUT
echo "has-changes=true" >> $GITHUB_OUTPUT
else
echo "bundles=[]" >> $GITHUB_OUTPUT
echo "has-changes=false" >> $GITHUB_OUTPUT
fi
echo "Changed bundles: $CHANGED_BUNDLES"
validate-bundles:
needs: detect-changed-bundles
if: needs.detect-changed-bundles.outputs.has-changes == 'true'
runs-on: ubuntu-latest
strategy:
matrix:
bundle: ${{ fromJson(needs.detect-changed-bundles.outputs.bundles) }}
steps:
- uses: actions/checkout@v3
- name: Setup Databricks CLI
uses: databricks/setup-cli@main
- name: Validate bundle
run: |
cd bundles/${{ matrix.bundle }}
databricks bundle validate -t dev
env:
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
deploy-bundles:
needs: [detect-changed-bundles, validate-bundles]
if: github.event_name == 'push' && needs.detect-changed-bundles.outputs.has-changes
== 'true'
runs-on: ubuntu-latest
strategy:
matrix:
bundle: ${{ fromJson(needs.detect-changed-bundles.outputs.bundles) }}
steps:
- uses: actions/checkout@v3
- name: Setup Databricks CLI
uses: databricks/setup-cli@main
- name: Deploy bundle
run: |
cd bundles/${{ matrix.bundle }}
databricks bundle deploy -t prod
env:
DATABRICKS_HOST: ${{ secrets.DATABRICKS_HOST }}
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
Alternative Approaches
1. Path Filters Approach
on:
pull_request:
paths:
- 'bundles/data-engineering/**'
- 'bundles/ml-workflows/**'
jobs:
deploy-data-engineering:
if: contains(github.event.pull_request.changed_files, 'bundles/data-engineering/')
# ... deployment steps
2. Bundle Configuration File
Create a bundles-config.json:
{
"bundles": [
{
"name": "data-engineering",
"path": "bundles/data-engineering",
"target": "prod"
},
{
"name": "ml-workflows",
"path": "bundles/ml-workflows",
"target": "prod"
}
]
}
3. Labels-Based Deployment (for PRs)
jobs:
deploy:
if: contains(github.event.pull_request.labels.*.name, 'deploy:bundle-name')
# ... deployment steps