<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Databricks cluster pools with init scripts in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databricks-cluster-pools-with-init-scripts/m-p/97644#M39518</link>
    <description>&lt;P&gt;Are you still facing issues with the job run submit API endpoint?&lt;/P&gt;</description>
    <pubDate>Mon, 04 Nov 2024 21:55:23 GMT</pubDate>
    <dc:creator>Walter_C</dc:creator>
    <dc:date>2024-11-04T21:55:23Z</dc:date>
    <item>
      <title>Databricks cluster pools with init scripts</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-cluster-pools-with-init-scripts/m-p/92854#M38560</link>
      <description>&lt;P&gt;Ability to submit a single job with cluster pools and init scripts&lt;/P&gt;&lt;P&gt;for the following payload:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="javascript"&gt;{
    "run_name": "A multitask job run",
    "timeout_seconds": 86400,
    "tasks": [
        {
            "task_key": "task_1",
            "depends_on": [],
            "notebook_task": {
                "notebook_path": "/Workspace/Users/johndoe/task_1",
                "source": "WORKSPACE"
            },
            "new_cluster": {
                "spark_version": "15.3.x-scala2.12",
                "instance_pool_id": "0926-080838-lute60-pool-91skna4w",
                "driver_instance_pool_id": "0926-080838-lute60-pool-91skna4w",
                "num_workers": 1,
                "init_scripts": [
                    {
                        "s3": {
                            "destination": "s3://bucket_name/init_scripts/install_utils.sh"
                        }
                    }
                ]
            }
        }
    ]
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;this endpoint&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;/api/2.1/jobs/runs/submit&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;runs the job &lt;STRONG&gt;without&lt;/STRONG&gt; passing the init scripts.&lt;/P&gt;&lt;P&gt;while if I send the same payload to&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;/api/2.1/jobs/create&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;it creates a job that uses both cluster pools and the init scripts.&lt;/P&gt;&lt;P&gt;I'm using the airflow operators such as &amp;nbsp;DatabricksSubmitRunOperator (or&amp;nbsp;DatabricksNotebookOperator)&lt;BR /&gt;which both invoke the submit endpoint, so If I want to use cluster pools the init scripts suddenly don't apply&lt;/P&gt;&lt;P&gt;Please let me know why is this behavior happening, is it on purpose? a known limitation?&lt;/P&gt;&lt;P&gt;thank you&lt;/P&gt;</description>
      <pubDate>Sat, 05 Oct 2024 19:59:54 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-cluster-pools-with-init-scripts/m-p/92854#M38560</guid>
      <dc:creator>radix</dc:creator>
      <dc:date>2024-10-05T19:59:54Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks cluster pools with init scripts</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-cluster-pools-with-init-scripts/m-p/97644#M39518</link>
      <description>&lt;P&gt;Are you still facing issues with the job run submit API endpoint?&lt;/P&gt;</description>
      <pubDate>Mon, 04 Nov 2024 21:55:23 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-cluster-pools-with-init-scripts/m-p/97644#M39518</guid>
      <dc:creator>Walter_C</dc:creator>
      <dc:date>2024-11-04T21:55:23Z</dc:date>
    </item>
  </channel>
</rss>

