a week ago
Good day,
We have a customer who wanted to query an Azure SQL Database from a Databricks notebook. We have configured a connection and a catalog for him and he is able to query the Azure SQL Database. Now he has a new request. He also wants to write data back to the Azure SQL Database and that seems not to be supported by federated queries.
What is currently the best practice for writing data back to an Azure SQL Database from a Databricks notebook?
Kind Regards,
Marco
a week ago
Yes, @Marco37 , you are right that currently, federated queries in Databricks only support reading data from external sources like Azure SQL Database—writing data back is not supported through that connection.One practice I'm aware of for writing data from a Databricks notebook to an Azure SQL Database is to use a JDBC connection. This allows you to write a Spark DataFrame directly into an Azure SQL table.Another option is to write data to ADLS (Azure Data Lake Storage), and then use ADF (Azure Data Factory) to load the data onto the Azure SQL DB.
Hopefully, this helps you in your case.
Thanks!
@Sat_8
Monday
def PersistRemoteSQLTableFromDF(
df: DataFrame,
databaseName: str,
tableName: str,
mode: str = "overwrite",
schemaName: str = "",
tableLock: bool = True,
) -> None:
"""
Persist dataframe into remote SQL table using the SQL Server Spark connector.
Improvements:
- input validation and clearer errors
- tolerant handling of mode casing
- avoids double underscores from prefix
- normalizes schema handling
- ensures tableLock is passed as string expected by the connector
ARGS:
- df (DataFrame): dataframe to persist
- databaseName (str): database name (non-empty)
- tableName (str): table name (non-empty)
- mode (str): 'overwrite', 'append', 'ignore', 'error', 'errorifexists', 'default'
- prefix (str): optional prefix (no extra underscore added)
- schemaName (str): optional schema name (no trailing dot required)
- tableLock (bool): if True enables tableLock for better write performance
RAISES:
- TypeError, ValueError on invalid args
"""
# Basic type checks
if not isinstance(df, DataFrame):
raise TypeError("Argument Error - 'df' must be a pyspark.sql.DataFrame")
if not isinstance(databaseName, str) or not isinstance(tableName, str):
raise TypeError(
"Argument Error - 'databaseName' and 'tableName' must be strings"
)
# Normalize and validate string args
mode = (mode or "").lower()
allowed_modes = {
"overwrite",
"append",
"ignore",
"error",
"errorifexists",
"default",
}
if mode not in allowed_modes:
raise ValueError(
f"Argument Error - Mode '{mode}' is not valid. Accepted save modes: {sorted(allowed_modes)}"
)
databaseName = databaseName.strip()
tableName = tableName.strip()
if not databaseName or not tableName:
raise ValueError(
"Argument Error - 'databaseName' and 'tableName' must be non-empty"
)
# Normalise schema name (remove trailing dots) and prefix (avoid double underscores)
schemaName = (schemaName or "").strip().rstrip(".")
dbtable = f"{schemaName + '.' if schemaName else ''}{tableName}"
# tableLock option on connector expects "true"/"false" (string)
tableLock_opt = "true" if bool(tableLock) else "false"
try:
(
df.write.format("sqlserver")
.mode(mode)
.option("host", os.getenv("DEFAULT_SQL_SERVER_NAME"))
.option("port", "1433")
.option("database", databaseName)
.option("dbtable", dbtable)
.option("tableLock", tableLock_opt)
.option("user", os.getenv("DEFAULT_SQL_SERVER_USERNAME"))
.option("password", os.getenv("DEFAULT_SQL_SERVER_PASSWORD"))
.save()
)
except Exception as exc:
# Surface clearer context while preserving original exception
raise RuntimeError(
f"Failed to persist DataFrame to remote table '{dbtable}' in database '{databaseName}': {exc}"
) from exc
a week ago
Yes, @Marco37 , you are right that currently, federated queries in Databricks only support reading data from external sources like Azure SQL Database—writing data back is not supported through that connection.One practice I'm aware of for writing data from a Databricks notebook to an Azure SQL Database is to use a JDBC connection. This allows you to write a Spark DataFrame directly into an Azure SQL table.Another option is to write data to ADLS (Azure Data Lake Storage), and then use ADF (Azure Data Factory) to load the data onto the Azure SQL DB.
Hopefully, this helps you in your case.
Thanks!
@Sat_8
Monday
Thanks Sat_8 🙂
Monday
def PersistRemoteSQLTableFromDF(
df: DataFrame,
databaseName: str,
tableName: str,
mode: str = "overwrite",
schemaName: str = "",
tableLock: bool = True,
) -> None:
"""
Persist dataframe into remote SQL table using the SQL Server Spark connector.
Improvements:
- input validation and clearer errors
- tolerant handling of mode casing
- avoids double underscores from prefix
- normalizes schema handling
- ensures tableLock is passed as string expected by the connector
ARGS:
- df (DataFrame): dataframe to persist
- databaseName (str): database name (non-empty)
- tableName (str): table name (non-empty)
- mode (str): 'overwrite', 'append', 'ignore', 'error', 'errorifexists', 'default'
- prefix (str): optional prefix (no extra underscore added)
- schemaName (str): optional schema name (no trailing dot required)
- tableLock (bool): if True enables tableLock for better write performance
RAISES:
- TypeError, ValueError on invalid args
"""
# Basic type checks
if not isinstance(df, DataFrame):
raise TypeError("Argument Error - 'df' must be a pyspark.sql.DataFrame")
if not isinstance(databaseName, str) or not isinstance(tableName, str):
raise TypeError(
"Argument Error - 'databaseName' and 'tableName' must be strings"
)
# Normalize and validate string args
mode = (mode or "").lower()
allowed_modes = {
"overwrite",
"append",
"ignore",
"error",
"errorifexists",
"default",
}
if mode not in allowed_modes:
raise ValueError(
f"Argument Error - Mode '{mode}' is not valid. Accepted save modes: {sorted(allowed_modes)}"
)
databaseName = databaseName.strip()
tableName = tableName.strip()
if not databaseName or not tableName:
raise ValueError(
"Argument Error - 'databaseName' and 'tableName' must be non-empty"
)
# Normalise schema name (remove trailing dots) and prefix (avoid double underscores)
schemaName = (schemaName or "").strip().rstrip(".")
dbtable = f"{schemaName + '.' if schemaName else ''}{tableName}"
# tableLock option on connector expects "true"/"false" (string)
tableLock_opt = "true" if bool(tableLock) else "false"
try:
(
df.write.format("sqlserver")
.mode(mode)
.option("host", os.getenv("DEFAULT_SQL_SERVER_NAME"))
.option("port", "1433")
.option("database", databaseName)
.option("dbtable", dbtable)
.option("tableLock", tableLock_opt)
.option("user", os.getenv("DEFAULT_SQL_SERVER_USERNAME"))
.option("password", os.getenv("DEFAULT_SQL_SERVER_PASSWORD"))
.save()
)
except Exception as exc:
# Surface clearer context while preserving original exception
raise RuntimeError(
f"Failed to persist DataFrame to remote table '{dbtable}' in database '{databaseName}': {exc}"
) from exc
Passionate about hosting events and connecting people? Help us grow a vibrant local community—sign up today to get started!
Sign Up Now