<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Databricks Job is slower. in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/databricks-job-is-slower/m-p/16751#M10872</link>
    <description>&lt;P&gt;Hello, I have a data bricks question. A Dataframe job that writes in an s3 bucket usually takes 8 minutes to finish, but now it takes from 8 to 9 hours to complete. Does anybody have some clues about this behavior?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;the data frame size is about 300 or 400 records&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;it is a simple query in a delta table:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;val results = spark
.table("table")
.filter()
.filter(by_date)
.drop(some_columns")
.select(a_struct_field)
.withColumn("image", image) 
&amp;nbsp;
listofString.foreach { mystring =&amp;gt;
  println(s"start writing .json to S3 for ${results}")
  results
  .filter($"struct.field.result" === results)
  .coalesce(1)
  .write
  .mode(SaveMode.Overwrite)
  .json(s"${filePath}/temp_${results}")
  println(s"complete writing .json to S3 for ${results}")
}&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;</description>
    <pubDate>Fri, 24 Jun 2022 18:18:52 GMT</pubDate>
    <dc:creator>Raymond_Garcia</dc:creator>
    <dc:date>2022-06-24T18:18:52Z</dc:date>
    <item>
      <title>Databricks Job is slower.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-job-is-slower/m-p/16751#M10872</link>
      <description>&lt;P&gt;Hello, I have a data bricks question. A Dataframe job that writes in an s3 bucket usually takes 8 minutes to finish, but now it takes from 8 to 9 hours to complete. Does anybody have some clues about this behavior?&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;the data frame size is about 300 or 400 records&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;it is a simple query in a delta table:&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;PRE&gt;&lt;CODE&gt;val results = spark
.table("table")
.filter()
.filter(by_date)
.drop(some_columns")
.select(a_struct_field)
.withColumn("image", image) 
&amp;nbsp;
listofString.foreach { mystring =&amp;gt;
  println(s"start writing .json to S3 for ${results}")
  results
  .filter($"struct.field.result" === results)
  .coalesce(1)
  .write
  .mode(SaveMode.Overwrite)
  .json(s"${filePath}/temp_${results}")
  println(s"complete writing .json to S3 for ${results}")
}&lt;/CODE&gt;&lt;/PRE&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;</description>
      <pubDate>Fri, 24 Jun 2022 18:18:52 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-job-is-slower/m-p/16751#M10872</guid>
      <dc:creator>Raymond_Garcia</dc:creator>
      <dc:date>2022-06-24T18:18:52Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks Job is slower.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-job-is-slower/m-p/16753#M10874</link>
      <description>&lt;P&gt;Hi thanks, I will check them out, and I will let you know. &lt;span class="lia-unicode-emoji" title=":slightly_smiling_face:"&gt;🙂&lt;/span&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 28 Jun 2022 21:35:20 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-job-is-slower/m-p/16753#M10874</guid>
      <dc:creator>Raymond_Garcia</dc:creator>
      <dc:date>2022-06-28T21:35:20Z</dc:date>
    </item>
    <item>
      <title>Re: Databricks Job is slower.</title>
      <link>https://community.databricks.com/t5/data-engineering/databricks-job-is-slower/m-p/16754#M10875</link>
      <description>&lt;P&gt;Hello, I was able to reduce the time significantly. I used the OPTIMIZE keyword before starting processing.&lt;/P&gt;&lt;P&gt;Thanks!&lt;/P&gt;</description>
      <pubDate>Wed, 29 Jun 2022 15:27:18 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/databricks-job-is-slower/m-p/16754#M10875</guid>
      <dc:creator>Raymond_Garcia</dc:creator>
      <dc:date>2022-06-29T15:27:18Z</dc:date>
    </item>
  </channel>
</rss>

