- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
07-01-2024 01:36 PM
Hello
I have written a python script that uses Databricks Rest API(s). I am trying to clone/ update an Azure Devops Repository inside databricks using Azure Service Principal. I am able to retrieve the credential_id for the service principal I am using. Every time I try to clone or update the repo I get authentication error. I am unable to understand how to use it for authentication for cloning of the repo. Here's the script:
import getopt
import json
import sys
import requests
def create_git_credentials(host_name, token, devops_pat):
method = "GET"
url = f"{host_name}api/2.0/git-credentials"
headers = {"Authorization": "Bearer " + token, "Content-Type": "application/json"}
payload = json.dumps({
"git_username": "testuser",
"git_provider": "azureDevOpsServices",
"personal_access_token": devops_pat
})
response = requests.request(method, url, headers=headers, data=payload)
print('RESPONSE:', response.json())
if response.status_code == 200:
credentials_id = response.json()['credentials'][0]['credential_id']
return credentials_id
else:
raise ValueError(f"Error creating git credentials: {response.text}")
def checkIfRepoExists(host_name, token, repo_path_in_dbks):
method = "GET"
url = f"{host_name}api/2.0/repos"
headers = {"Authorization": "Bearer " + token, "Content-Type": "application/json"}
response = requests.request(method, url, headers=headers)
response_json = json.loads(response.text)
listToFilter = list(response_json["repos"])
while len(response_json) > 1:
new_response = requests.request(
method,
url,
headers=headers,
params={"next_page_token": response_json["next_page_token"]},
)
response_json = json.loads(new_response.text)
listToFilter.extend(response_json["repos"])
filtered_list = [
dictionary
for dictionary in listToFilter
if repo_path_in_dbks == dictionary["path"]
]
if len(filtered_list) == 0:
return 0
repo_id = filtered_list[0]["id"]
return repo_id
def createParentDirectoryIfNotExists(host_name, token, repo_path_in_dbks):
parent_path = "/".join(repo_path_in_dbks.split("/")[:-1])
method = "GET"
url = f"{host_name}api/2.0/workspace/list"
headers = {"Authorization": "Bearer " + token, "Content-Type": "application/json"}
response = requests.request(method, url, headers=headers, params={"path": parent_path})
response_json = json.loads(response.text)
if 'error_code' in response_json and response_json['error_code'] == 'RESOURCE_DOES_NOT_EXIST':
method = "POST"
url = f"{host_name}api/2.0/workspace/mkdirs"
payload = json.dumps({"path": parent_path})
response = requests.request(method, url, headers=headers, data=payload)
if response.status_code != 200:
raise ValueError(f"Failed to create parent directory {parent_path}: {response.text}")
def updateRepo(host_name, token, repoId, branch):
method = "PATCH"
url = f"{host_name}api/2.0/repos/{str(repoId)}"
headers = {"Authorization": "Bearer " + token, "Content-Type": "application/json"}
payload = json.dumps({"branch": branch})
print("payload is\n" + payload)
print(method + "-Request: " + url + "\n" + "payload:\n" + payload)
response = requests.request(method, url, headers=headers, data=payload)
if response.status_code == 200:
return response.text
else:
raise ValueError(response.text)
def cloneRepo(host_name, token, git_url, repo_path_in_dbks, branch):
method = "POST"
url = f"{host_name}api/2.0/repos"
headers = {"Authorization": "Bearer " + token, "Content-Type": "application/json"}
payload = json.dumps({
"url": git_url,
"provider": "azureDevOpsServices",
"path": repo_path_in_dbks,
"branch": branch
})
print("payload is\n" + payload)
print(method + "-Request: " + url + "\n" + "payload:\n" + payload)
response = requests.request(method, url, headers=headers, data=payload)
print("RESPONSE:", response.json())
if response.status_code == 200:
return response.text
else:
raise ValueError(response.text)
def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
"hstcld",
["host_name=", "token=", "git_url=", "branch=", "repo_path_in_dbks=", "devops_pat="],
)
except getopt.GetoptError:
print(
"create_update_dbks_repo.py"
+ " -h <host_name>"
+ " -t <token>"
+ " -g <git_url>"
+ " -b <branch>"
+ " -p <repo_path_in_dbks>"
+ " -d <devops_pat>"
)
sys.exit(2)
for opt, arg in opts:
if opt in ("-h", "--host_name"):
host_name = arg
elif opt in ("-t", "--token"):
token = arg
elif opt in ("-g", "--git_url"):
git_url = arg
elif opt in ("-b", "--branch"):
branch = arg
elif opt in ("-p", "--repo_path_in_dbks"):
repo_path_in_dbks = arg
elif opt in ("-d", "--devops_pat"):
devops_pat = arg
print("-h is " + host_name)
print("-g is " + git_url)
print("-b is " + branch)
print("-p is " + repo_path_in_dbks)
print("-d is " + devops_pat)
credentials_id = create_git_credentials(host_name, token, devops_pat)
repoName = git_url.split("/")[-1]
repoId = checkIfRepoExists(host_name, token, repo_path_in_dbks)
if repoId != 0:
updateRepo(host_name, token, repoId, branch)
print("DBKS Repo: " + repoName + " updated successfully.")
else:
createParentDirectoryIfNotExists(host_name, token, repo_path_in_dbks)
cloneRepo(host_name, token, git_url, repo_path_in_dbks, branch)
print("DBKS Repo: " + repoName + " cloned successfully.")
if __name__ == "__main__":
main()
Is there a way to directly use the DevOps PAT to authenticate with hitting the get_credentials endpoint?
Accepted Solutions
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
09-27-2024 12:28 PM
Hello from the Databricks Git PM:
We have a section in the documentation for setting up Git credentials for a SP. The important step is to use the OBO token for the SP when you call the git credential API. https://docs.databricks.com/en/repos/ci-cd-techniques-with-repos.html#use-a-service-principal-with-d...
Let me know if this helps!
Nicole
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
09-27-2024 12:28 PM
Hello from the Databricks Git PM:
We have a section in the documentation for setting up Git credentials for a SP. The important step is to use the OBO token for the SP when you call the git credential API. https://docs.databricks.com/en/repos/ci-cd-techniques-with-repos.html#use-a-service-principal-with-d...
Let me know if this helps!
Nicole
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
2 weeks ago
@nicole_lu_PM So sorry for coming back to this issue after such a long time. But I looked into it and it seems like this concept of OBO token is applicable in case we use Databricks with AWS as our cloud provider. In case of Azure most of the comments I found are regarding using AAD token. But we tried in both cases and it still does not seems to work. Let me know if it is Okay for you or someone from your team and I can contact our Solutions Architect to loop your team in with regards to this issue so that we can share more details and investigate further.
Looking forward to hearing from you.
Regards,
Rachit Ahuja

