<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Specifying cluster on running a job in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/specifying-cluster-on-running-a-job/m-p/6460#M2592</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Let's say that I am starting jobs with different parameters at a certain time each day in the following manner:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;response = requests.post(
"https://%s/api/2.0/jobs/run-now" % (DOMAIN),
headers={"Authorization": "Bearer %s" % TOKEN}, json={
            "job_id": job_id,
            "notebook_params": {
                "country_name": str(country_id),
            }
        })
&amp;nbsp;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I was wondering how I could go about specifying a specific cluster size for a run of a workflow? And how do you specify that the cluster should be shared among the tasks in the workflow? This could be interesting when you have one country_id for which a bigger cluster is needed compared to all other countries and other similar use-cases.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks in advance.&lt;/P&gt;</description>
    <pubDate>Tue, 04 Apr 2023 08:11:36 GMT</pubDate>
    <dc:creator>Tjadi</dc:creator>
    <dc:date>2023-04-04T08:11:36Z</dc:date>
    <item>
      <title>Specifying cluster on running a job</title>
      <link>https://community.databricks.com/t5/data-engineering/specifying-cluster-on-running-a-job/m-p/6460#M2592</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Let's say that I am starting jobs with different parameters at a certain time each day in the following manner:&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;response = requests.post(
"https://%s/api/2.0/jobs/run-now" % (DOMAIN),
headers={"Authorization": "Bearer %s" % TOKEN}, json={
            "job_id": job_id,
            "notebook_params": {
                "country_name": str(country_id),
            }
        })
&amp;nbsp;&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;I was wondering how I could go about specifying a specific cluster size for a run of a workflow? And how do you specify that the cluster should be shared among the tasks in the workflow? This could be interesting when you have one country_id for which a bigger cluster is needed compared to all other countries and other similar use-cases.&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Thanks in advance.&lt;/P&gt;</description>
      <pubDate>Tue, 04 Apr 2023 08:11:36 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/specifying-cluster-on-running-a-job/m-p/6460#M2592</guid>
      <dc:creator>Tjadi</dc:creator>
      <dc:date>2023-04-04T08:11:36Z</dc:date>
    </item>
    <item>
      <title>Re: Specifying cluster on running a job</title>
      <link>https://community.databricks.com/t5/data-engineering/specifying-cluster-on-running-a-job/m-p/6461#M2593</link>
      <description>&lt;P&gt;@Tjadi Peeters​&amp;nbsp;You can select option Autoscaling/Enhanced Scaling in workflows which will scale based on workload&lt;/P&gt;</description>
      <pubDate>Tue, 04 Apr 2023 16:13:42 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/specifying-cluster-on-running-a-job/m-p/6461#M2593</guid>
      <dc:creator>karthik_p</dc:creator>
      <dc:date>2023-04-04T16:13:42Z</dc:date>
    </item>
    <item>
      <title>Re: Specifying cluster on running a job</title>
      <link>https://community.databricks.com/t5/data-engineering/specifying-cluster-on-running-a-job/m-p/6462#M2594</link>
      <description>&lt;P&gt;Thanks for your reply. The autoscaling the functionality I am aware of only scales the amount of workers - or is there another one? I am looking to start jobs with different types of workers (i.e. one of the jobs starts with a m5d.2xlarge while the other has m5d.4xlarge).&lt;/P&gt;</description>
      <pubDate>Tue, 04 Apr 2023 16:42:09 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/specifying-cluster-on-running-a-job/m-p/6462#M2594</guid>
      <dc:creator>Tjadi</dc:creator>
      <dc:date>2023-04-04T16:42:09Z</dc:date>
    </item>
  </channel>
</rss>

