Executed a spark-submit job through databricks cli with the following job configurations.
{
"job_id": 123,
"creator_user_name": "******",
"run_as_user_name": "******",
"run_as_owner": true,
"settings": {
"name": "44aa-8447-c123aad310",
"email_notifications": {},
"max_concurrent_runs": 1,
"tasks": [
{
"task_key": "4aa-8447-c90aad310",
"spark_submit_task": {
"parameters": [
"--driver-memory 3G",
"--executor-memory 3G",
"--conf",
"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2",
"--conf",
"spark.speculation=false",
"--conf",
"spark.sql.parquet.fs.optimized.committer.optimization-enabled=true",
"--conf",
"spark.executorEnv.JAVA_HOME=/usr/lib/jvm/jdk-11.0.1",
"--conf",
"spark.executor.instances=3",
"--conf",
"spark.network.timeout=600s",
"--conf",
"spark.yarn.appMasterEnv.JAVA_HOME=/usr/lib/jvm/jdk-11.0.1",
"--conf",
"spark.driver.maxResultSize=1g",
"--conf",
"spark.yarn.maxAppAttempts=1",
"--jars",
"/home/hadoop/somejar.jar,/home/hadoop/somejar2.jar",
"--class",
"we.databricks.some.path.ER",
"/home/hadoop/some-jar-SNAPSHOT.jar",
"'******'"
]
},
"new_cluster": {
"spark_version": "10.4.x-scala2.12",
"spark_conf": {
"spark.databricks.delta.preview.enabled": "true",
"spark.hadoop.fs.azure.account.key": "******"
},
"node_type_id": "Standard_DS3_v2",
"custom_tags": {
"application": "******",
"name": "******",
"environment": "******",
"owner": "******",
"CURRENT_VERSION": "1.20.0-ab6303d9d"
},
"cluster_log_conf": {
"dbfs": {
"destination": "******"
}
},
"spark_env_vars": {
"ENVIRONMENT": "******",
"AZURE_ACCOUNT_KEY": "******",
"AZURE_ACCOUNT_NAME": "******",
"PYSPARK_PYTHON": "/databricks/python3/bin/python3",
"JNAME": "zulu11-ca-amd64",
"AZURE_CONTAINER_NAME": "******"
},
"enable_elastic_disk": true,
"init_scripts": [
{
"abfss": {
"destination": "******"
}
}
],
"num_workers": 3
},
"timeout_seconds": 0
}
],
"format": "MULTI_TASK"
},
"created_time": 1662096418457
}
But this gives error in spark submit. Error: Unrecognized option: --executor-memory 3G