<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Notebook runs with error when run as a job in Get Started Discussions</title>
    <link>https://community.databricks.com/t5/get-started-discussions/notebook-runs-with-error-when-run-as-a-job/m-p/41529#M807</link>
    <description>&lt;P&gt;I am using a notebook to copy over my database on a schedule (I had no success connecting through the Data Explorer UI). When I run the notebook on its own, it works. When I run it as a scheduled job, I get this error.&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;org.apache.spark.SparkSQLException: Unsupported type ARRAY.
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
File &amp;lt;command-3916833176743968&amp;gt;:13
      1 tables = [
      2     "daily_aggregations",
      3     "journal_entries",
   (...)
      9     "users" 
     10 ]
     12 for table in tables:
---&amp;gt; 13     remote_table = (spark.read
     14         .format("jdbc")
     15         .option("driver", driver)
     16         .option("url", url)
     17         .option("dbtable", table)
     18         .option("user", user)
     19         .option("password", password)
     20         .load()
     21     )
     22     remote_table.write.mode("overwrite").saveAsTable(table)&lt;/LI-CODE&gt;&lt;P&gt;Any ideas why it only fails when run as a job?&lt;/P&gt;&lt;P&gt;This is my code.&lt;/P&gt;&lt;LI-CODE lang="python"&gt;driver = "org.postgresql.Driver"

user = dbutils.secrets.get("leaflet-database", "user")
password = dbutils.secrets.get("leaflet-database", "password")
host = dbutils.secrets.get("leaflet-database", "host")
port = dbutils.secrets.get("leaflet-database", "port")
name = dbutils.secrets.get("leaflet-database", "name")

url = f"jdbc:postgresql://{host}:{port}/{name}"

tables = [
    "daily_aggregations",
    "journal_entries",
    "plant_actions",
    "plant_classifications",
    "plants",
    "readings",
    "sensors",
    "users" 
]

for table in tables:
    remote_table = (spark.read
        .format("jdbc")
        .option("driver", driver)
        .option("url", url)
        .option("dbtable", table)
        .option("user", user)
        .option("password", password)
        .load()
    )
    remote_table.write.mode("overwrite").saveAsTable(table)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;This is the job definition JSON.&lt;/P&gt;&lt;LI-CODE lang="javascript"&gt;{
    "run_as": {
        "user_name": "jesse@krado.co"
    },
    "name": "migrate_leaflet_database",
    "email_notifications": {
        "no_alert_for_skipped_runs": false
    },
    "webhook_notifications": {},
    "timeout_seconds": 0,
    "schedule": {
        "quartz_cron_expression": "11 0 5 * * ?",
        "timezone_id": "America/Boise",
        "pause_status": "UNPAUSED"
    },
    "max_concurrent_runs": 1,
    "tasks": [
        {
            "task_key": "migrate_leaflet_database",
            "run_if": "ALL_SUCCESS",
            "notebook_task": {
                "notebook_path": "/Users/jesse@krado.co/leaflet-database-migration",
                "source": "WORKSPACE"
            },
            "job_cluster_key": "Job_cluster",
            "timeout_seconds": 0,
            "email_notifications": {
                "on_success": [
                    "jesse@krado.co"
                ],
                "on_failure": [
                    "jesse@krado.co"
                ]
            },
            "notification_settings": {
                "no_alert_for_skipped_runs": true,
                "no_alert_for_canceled_runs": true,
                "alert_on_last_attempt": true
            }
        }
    ],
    "job_clusters": [
        {
            "job_cluster_key": "Job_cluster",
            "new_cluster": {
                "cluster_name": "",
                "spark_version": "12.2.x-scala2.12",
                "aws_attributes": {
                    "first_on_demand": 1,
                    "availability": "SPOT_WITH_FALLBACK",
                    "zone_id": "us-east-1f",
                    "spot_bid_price_percent": 100,
                    "ebs_volume_count": 0
                },
                "node_type_id": "i3.xlarge",
                "spark_env_vars": {
                    "PYSPARK_PYTHON": "/databricks/python3/bin/python3"
                },
                "enable_elastic_disk": false,
                "data_security_mode": "SINGLE_USER",
                "runtime_engine": "STANDARD",
                "num_workers": 8
            }
        }
    ],
    "format": "MULTI_TASK"
}&lt;/LI-CODE&gt;</description>
    <pubDate>Fri, 25 Aug 2023 17:03:59 GMT</pubDate>
    <dc:creator>jlmontie</dc:creator>
    <dc:date>2023-08-25T17:03:59Z</dc:date>
    <item>
      <title>Notebook runs with error when run as a job</title>
      <link>https://community.databricks.com/t5/get-started-discussions/notebook-runs-with-error-when-run-as-a-job/m-p/41529#M807</link>
      <description>&lt;P&gt;I am using a notebook to copy over my database on a schedule (I had no success connecting through the Data Explorer UI). When I run the notebook on its own, it works. When I run it as a scheduled job, I get this error.&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;org.apache.spark.SparkSQLException: Unsupported type ARRAY.
---------------------------------------------------------------------------
Py4JJavaError                             Traceback (most recent call last)
File &amp;lt;command-3916833176743968&amp;gt;:13
      1 tables = [
      2     "daily_aggregations",
      3     "journal_entries",
   (...)
      9     "users" 
     10 ]
     12 for table in tables:
---&amp;gt; 13     remote_table = (spark.read
     14         .format("jdbc")
     15         .option("driver", driver)
     16         .option("url", url)
     17         .option("dbtable", table)
     18         .option("user", user)
     19         .option("password", password)
     20         .load()
     21     )
     22     remote_table.write.mode("overwrite").saveAsTable(table)&lt;/LI-CODE&gt;&lt;P&gt;Any ideas why it only fails when run as a job?&lt;/P&gt;&lt;P&gt;This is my code.&lt;/P&gt;&lt;LI-CODE lang="python"&gt;driver = "org.postgresql.Driver"

user = dbutils.secrets.get("leaflet-database", "user")
password = dbutils.secrets.get("leaflet-database", "password")
host = dbutils.secrets.get("leaflet-database", "host")
port = dbutils.secrets.get("leaflet-database", "port")
name = dbutils.secrets.get("leaflet-database", "name")

url = f"jdbc:postgresql://{host}:{port}/{name}"

tables = [
    "daily_aggregations",
    "journal_entries",
    "plant_actions",
    "plant_classifications",
    "plants",
    "readings",
    "sensors",
    "users" 
]

for table in tables:
    remote_table = (spark.read
        .format("jdbc")
        .option("driver", driver)
        .option("url", url)
        .option("dbtable", table)
        .option("user", user)
        .option("password", password)
        .load()
    )
    remote_table.write.mode("overwrite").saveAsTable(table)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;This is the job definition JSON.&lt;/P&gt;&lt;LI-CODE lang="javascript"&gt;{
    "run_as": {
        "user_name": "jesse@krado.co"
    },
    "name": "migrate_leaflet_database",
    "email_notifications": {
        "no_alert_for_skipped_runs": false
    },
    "webhook_notifications": {},
    "timeout_seconds": 0,
    "schedule": {
        "quartz_cron_expression": "11 0 5 * * ?",
        "timezone_id": "America/Boise",
        "pause_status": "UNPAUSED"
    },
    "max_concurrent_runs": 1,
    "tasks": [
        {
            "task_key": "migrate_leaflet_database",
            "run_if": "ALL_SUCCESS",
            "notebook_task": {
                "notebook_path": "/Users/jesse@krado.co/leaflet-database-migration",
                "source": "WORKSPACE"
            },
            "job_cluster_key": "Job_cluster",
            "timeout_seconds": 0,
            "email_notifications": {
                "on_success": [
                    "jesse@krado.co"
                ],
                "on_failure": [
                    "jesse@krado.co"
                ]
            },
            "notification_settings": {
                "no_alert_for_skipped_runs": true,
                "no_alert_for_canceled_runs": true,
                "alert_on_last_attempt": true
            }
        }
    ],
    "job_clusters": [
        {
            "job_cluster_key": "Job_cluster",
            "new_cluster": {
                "cluster_name": "",
                "spark_version": "12.2.x-scala2.12",
                "aws_attributes": {
                    "first_on_demand": 1,
                    "availability": "SPOT_WITH_FALLBACK",
                    "zone_id": "us-east-1f",
                    "spot_bid_price_percent": 100,
                    "ebs_volume_count": 0
                },
                "node_type_id": "i3.xlarge",
                "spark_env_vars": {
                    "PYSPARK_PYTHON": "/databricks/python3/bin/python3"
                },
                "enable_elastic_disk": false,
                "data_security_mode": "SINGLE_USER",
                "runtime_engine": "STANDARD",
                "num_workers": 8
            }
        }
    ],
    "format": "MULTI_TASK"
}&lt;/LI-CODE&gt;</description>
      <pubDate>Fri, 25 Aug 2023 17:03:59 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/notebook-runs-with-error-when-run-as-a-job/m-p/41529#M807</guid>
      <dc:creator>jlmontie</dc:creator>
      <dc:date>2023-08-25T17:03:59Z</dc:date>
    </item>
    <item>
      <title>Re: Notebook runs with error when run as a job</title>
      <link>https://community.databricks.com/t5/get-started-discussions/notebook-runs-with-error-when-run-as-a-job/m-p/44274#M1000</link>
      <description>&lt;P&gt;Hi, the error code is minimal, could you please post the whole error if that is possible?&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Also please tag&amp;nbsp;@Debayan​&amp;nbsp;with your next response which will notify me, Thank you!&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 11 Sep 2023 06:48:58 GMT</pubDate>
      <guid>https://community.databricks.com/t5/get-started-discussions/notebook-runs-with-error-when-run-as-a-job/m-p/44274#M1000</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2023-09-11T06:48:58Z</dc:date>
    </item>
  </channel>
</rss>

