<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic applyInPandas function hangs in runtime 13.3 LTS ML and above in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/applyinpandas-function-hangs-in-runtime-13-3-lts-ml-and-above/m-p/56795#M30653</link>
    <description>&lt;P&gt;Hello, recently I've tried to upgrade my runtime env to the 13.3 LTS ML and found that it breaks my workload during applyInPandas.&lt;/P&gt;&lt;P&gt;My job started to hang during the applyInPandas execution. Thread dump shows that it hangs on direct memory allocation:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;sun.misc.Unsafe.setMemory(Native Method)
sun.misc.Unsafe.setMemory(Unsafe.java:529)
org.apache.spark.unsafe.Platform.allocateMemory(Platform.java:202)
org.apache.spark.unsafe.Platform.allocateDirectBuffer(Platform.java:237)
org.apache.spark.util.DirectByteBufferOutputStream.grow(DirectByteBufferOutputStream.scala:62)
org.apache.spark.util.DirectByteBufferOutputStream.ensureCapacity(DirectByteBufferOutputStream.scala:49)
org.apache.spark.util.DirectByteBufferOutputStream.write(DirectByteBufferOutputStream.scala:44)
java.io.DataOutputStream.write(DataOutputStream.java:107) =&amp;gt; holding Monitor(java.io.DataOutputStream@1991477395})
java.nio.channels.Channels$WritableByteChannelImpl.write(Channels.java:458) =&amp;gt; holding Monitor(java.lang.Object@2018869193})
org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:112)
org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:135)
org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303)
org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276)
org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:136)
org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:122)
org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeNextInputToArrowStream(PythonArrowInput.scala:149)
org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeNextInputToArrowStream$(PythonArrowInput.scala:134)
org.apache.spark.sql.execution.python.ArrowPythonRunner.writeNextInputToArrowStream(ArrowPythonRunner.scala:30)
org.apache.spark.sql.execution.python.PythonArrowInput$ArrowWriter.writeNextInputToStream(PythonArrowInput.scala:123)
org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.writeAdditionalInputToPythonWorker(PythonRunner.scala:928)
org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.read(PythonRunner.scala:851)
java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
java.io.BufferedInputStream.read(BufferedInputStream.java:265) =&amp;gt; holding Monitor(java.io.BufferedInputStream@1972989904})
java.io.DataInputStream.readInt(DataInputStream.java:387)
org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:104)
org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:635)
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:91)
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$2(FileFormatWriter.scala:531)
org.apache.spark.sql.execution.datasources.FileFormatWriter$$$Lambda$2268/313061404.apply(Unknown Source)
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1743)
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:538)
org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:116)
org.apache.spark.sql.execution.datasources.WriteFilesExec$$Lambda$2117/703248354.apply(Unknown Source)
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:931)
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:931)
org.apache.spark.rdd.RDD$$Lambda$2113/847512910.apply(Unknown Source)
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
org.apache.spark.rdd.RDD.$anonfun$computeOrReadCheckpoint$1(RDD.scala:407)
org.apache.spark.rdd.RDD$$Lambda$1350/1516776629.apply(Unknown Source)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:404)
org.apache.spark.rdd.RDD.iterator(RDD.scala:371)
org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:82)
org.apache.spark.scheduler.ResultTask$$Lambda$2058/1782952762.apply(Unknown Source)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:82)
org.apache.spark.scheduler.ResultTask$$Lambda$2055/2060074874.apply(Unknown Source)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:196)
org.apache.spark.scheduler.Task.doRunTask(Task.scala:181)
org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:146)
org.apache.spark.scheduler.Task$$Lambda$1135/1245833457.apply(Unknown Source)
com.databricks.unity.EmptyHandle$.runWithAndClose(UCSHandle.scala:125)
org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:146)
org.apache.spark.scheduler.Task$$Lambda$1117/2113811715.apply(Unknown Source)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.scheduler.Task.run(Task.scala:99)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$8(Executor.scala:897)
org.apache.spark.executor.Executor$TaskRunner$$Lambda$1115/949204975.apply(Unknown Source)
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1709)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:900)
org.apache.spark.executor.Executor$TaskRunner$$Lambda$1071/753832407.apply$mcV$sp(Unknown Source)
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:795)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:750)

&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;The thing is that this class&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;PRE&gt;DirectByteBufferOutputStream&lt;/PRE&gt;&lt;/DIV&gt;&lt;P&gt;should be introduced only in spark 4.0.0 (&lt;A href="https://github.com/apache/spark/commit/8aaff55839493e80e3ce376f928c04aa8f31d18c#diff-de88f98356725bde7b3d1f4ec63f1406f651bc70a4a1980887168e6f37e98016" target="_self"&gt;SPARK-44705&lt;/A&gt;&amp;nbsp;) and it corresponds to significant changes for PythonRunner&lt;/P&gt;&lt;P&gt;Looks like there is a problem with the allocation relatively big amount of memory.&lt;/P&gt;&lt;P&gt;Here are steps to reproduce the issue:&lt;/P&gt;&lt;P&gt;driver: r5d.large&lt;/P&gt;&lt;P&gt;executor:&amp;nbsp;r5d.xlarge&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from random import random
import pyspark.sql.functions as F
from pyspark.sql import SparkSession
import pandas as pd
import time

def long_running_pandas_udf(pdf:pd.DataFrame):
  time.sleep(random() * 20)
  print("printing for simulating logging from python function")
  return pdf


def test_df():
    data = []
    # create a big table of data. we need to make it relatevly heavy.
    dict_ = {f"some_field{i}": f"{random()}" for i in range(36)}
    for i in range(100_000):
        dict_1 = {k: v for k, v in dict_.items()}
        dict_1.update({f"group_key": '0'}) 
        data.append(dict_1)
        dict_2 = {k: v for k, v in dict_.items()}
        dict_2.update({f"group_key": '1'}) 
        data.append(dict_2)
    df = spark.createDataFrame(data)
    return df

# increase the size of final dataset even more
ndf = df 
for i in range(4):
  ndf = ndf.unionAll(ndf)

result = ndf.groupBy("group_key").applyInPandas(long_running_pandas_udf, schema=df.schema)

result.write.mode("overwrite").parquet(some_path)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This code hangs with the thread dump above.&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'll also include screenshots of memory consumption, etc.&lt;/P&gt;&lt;P&gt;Note that this code finishes successfully with the same cluster config on&amp;nbsp;runtime 12.2 LTS ML.&lt;/P&gt;&lt;P&gt;So there are two concerns:&lt;/P&gt;&lt;P&gt;1. looks like runtimes contain patched versions of spark. These patches are poorly tested.&lt;/P&gt;&lt;P&gt;2. This workload will pass if I significantly increase node sizes, but it is meaningless if the job succeeds on the same cluster with the previous version of the runtime&lt;/P&gt;</description>
    <pubDate>Tue, 09 Jan 2024 16:34:03 GMT</pubDate>
    <dc:creator>DmitriyLamzin</dc:creator>
    <dc:date>2024-01-09T16:34:03Z</dc:date>
    <item>
      <title>applyInPandas function hangs in runtime 13.3 LTS ML and above</title>
      <link>https://community.databricks.com/t5/data-engineering/applyinpandas-function-hangs-in-runtime-13-3-lts-ml-and-above/m-p/56795#M30653</link>
      <description>&lt;P&gt;Hello, recently I've tried to upgrade my runtime env to the 13.3 LTS ML and found that it breaks my workload during applyInPandas.&lt;/P&gt;&lt;P&gt;My job started to hang during the applyInPandas execution. Thread dump shows that it hangs on direct memory allocation:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;sun.misc.Unsafe.setMemory(Native Method)
sun.misc.Unsafe.setMemory(Unsafe.java:529)
org.apache.spark.unsafe.Platform.allocateMemory(Platform.java:202)
org.apache.spark.unsafe.Platform.allocateDirectBuffer(Platform.java:237)
org.apache.spark.util.DirectByteBufferOutputStream.grow(DirectByteBufferOutputStream.scala:62)
org.apache.spark.util.DirectByteBufferOutputStream.ensureCapacity(DirectByteBufferOutputStream.scala:49)
org.apache.spark.util.DirectByteBufferOutputStream.write(DirectByteBufferOutputStream.scala:44)
java.io.DataOutputStream.write(DataOutputStream.java:107) =&amp;gt; holding Monitor(java.io.DataOutputStream@1991477395})
java.nio.channels.Channels$WritableByteChannelImpl.write(Channels.java:458) =&amp;gt; holding Monitor(java.lang.Object@2018869193})
org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:112)
org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:135)
org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303)
org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276)
org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:136)
org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:122)
org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeNextInputToArrowStream(PythonArrowInput.scala:149)
org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeNextInputToArrowStream$(PythonArrowInput.scala:134)
org.apache.spark.sql.execution.python.ArrowPythonRunner.writeNextInputToArrowStream(ArrowPythonRunner.scala:30)
org.apache.spark.sql.execution.python.PythonArrowInput$ArrowWriter.writeNextInputToStream(PythonArrowInput.scala:123)
org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.writeAdditionalInputToPythonWorker(PythonRunner.scala:928)
org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.read(PythonRunner.scala:851)
java.io.BufferedInputStream.fill(BufferedInputStream.java:246)
java.io.BufferedInputStream.read(BufferedInputStream.java:265) =&amp;gt; holding Monitor(java.io.BufferedInputStream@1972989904})
java.io.DataInputStream.readInt(DataInputStream.java:387)
org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:104)
org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:635)
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:91)
org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeTask$2(FileFormatWriter.scala:531)
org.apache.spark.sql.execution.datasources.FileFormatWriter$$$Lambda$2268/313061404.apply(Unknown Source)
org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1743)
org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:538)
org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:116)
org.apache.spark.sql.execution.datasources.WriteFilesExec$$Lambda$2117/703248354.apply(Unknown Source)
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:931)
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:931)
org.apache.spark.rdd.RDD$$Lambda$2113/847512910.apply(Unknown Source)
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
org.apache.spark.rdd.RDD.$anonfun$computeOrReadCheckpoint$1(RDD.scala:407)
org.apache.spark.rdd.RDD$$Lambda$1350/1516776629.apply(Unknown Source)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:404)
org.apache.spark.rdd.RDD.iterator(RDD.scala:371)
org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:82)
org.apache.spark.scheduler.ResultTask$$Lambda$2058/1782952762.apply(Unknown Source)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:82)
org.apache.spark.scheduler.ResultTask$$Lambda$2055/2060074874.apply(Unknown Source)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:196)
org.apache.spark.scheduler.Task.doRunTask(Task.scala:181)
org.apache.spark.scheduler.Task.$anonfun$run$5(Task.scala:146)
org.apache.spark.scheduler.Task$$Lambda$1135/1245833457.apply(Unknown Source)
com.databricks.unity.EmptyHandle$.runWithAndClose(UCSHandle.scala:125)
org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:146)
org.apache.spark.scheduler.Task$$Lambda$1117/2113811715.apply(Unknown Source)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.scheduler.Task.run(Task.scala:99)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$8(Executor.scala:897)
org.apache.spark.executor.Executor$TaskRunner$$Lambda$1115/949204975.apply(Unknown Source)
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1709)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:900)
org.apache.spark.executor.Executor$TaskRunner$$Lambda$1071/753832407.apply$mcV$sp(Unknown Source)
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:795)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:750)

&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;The thing is that this class&amp;nbsp;&lt;/P&gt;&lt;DIV&gt;&lt;PRE&gt;DirectByteBufferOutputStream&lt;/PRE&gt;&lt;/DIV&gt;&lt;P&gt;should be introduced only in spark 4.0.0 (&lt;A href="https://github.com/apache/spark/commit/8aaff55839493e80e3ce376f928c04aa8f31d18c#diff-de88f98356725bde7b3d1f4ec63f1406f651bc70a4a1980887168e6f37e98016" target="_self"&gt;SPARK-44705&lt;/A&gt;&amp;nbsp;) and it corresponds to significant changes for PythonRunner&lt;/P&gt;&lt;P&gt;Looks like there is a problem with the allocation relatively big amount of memory.&lt;/P&gt;&lt;P&gt;Here are steps to reproduce the issue:&lt;/P&gt;&lt;P&gt;driver: r5d.large&lt;/P&gt;&lt;P&gt;executor:&amp;nbsp;r5d.xlarge&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="python"&gt;from random import random
import pyspark.sql.functions as F
from pyspark.sql import SparkSession
import pandas as pd
import time

def long_running_pandas_udf(pdf:pd.DataFrame):
  time.sleep(random() * 20)
  print("printing for simulating logging from python function")
  return pdf


def test_df():
    data = []
    # create a big table of data. we need to make it relatevly heavy.
    dict_ = {f"some_field{i}": f"{random()}" for i in range(36)}
    for i in range(100_000):
        dict_1 = {k: v for k, v in dict_.items()}
        dict_1.update({f"group_key": '0'}) 
        data.append(dict_1)
        dict_2 = {k: v for k, v in dict_.items()}
        dict_2.update({f"group_key": '1'}) 
        data.append(dict_2)
    df = spark.createDataFrame(data)
    return df

# increase the size of final dataset even more
ndf = df 
for i in range(4):
  ndf = ndf.unionAll(ndf)

result = ndf.groupBy("group_key").applyInPandas(long_running_pandas_udf, schema=df.schema)

result.write.mode("overwrite").parquet(some_path)&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This code hangs with the thread dump above.&amp;nbsp;&lt;/P&gt;&lt;P&gt;I'll also include screenshots of memory consumption, etc.&lt;/P&gt;&lt;P&gt;Note that this code finishes successfully with the same cluster config on&amp;nbsp;runtime 12.2 LTS ML.&lt;/P&gt;&lt;P&gt;So there are two concerns:&lt;/P&gt;&lt;P&gt;1. looks like runtimes contain patched versions of spark. These patches are poorly tested.&lt;/P&gt;&lt;P&gt;2. This workload will pass if I significantly increase node sizes, but it is meaningless if the job succeeds on the same cluster with the previous version of the runtime&lt;/P&gt;</description>
      <pubDate>Tue, 09 Jan 2024 16:34:03 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/applyinpandas-function-hangs-in-runtime-13-3-lts-ml-and-above/m-p/56795#M30653</guid>
      <dc:creator>DmitriyLamzin</dc:creator>
      <dc:date>2024-01-09T16:34:03Z</dc:date>
    </item>
    <item>
      <title>Re: applyInPandas function hangs in runtime 13.3 LTS ML and above</title>
      <link>https://community.databricks.com/t5/data-engineering/applyinpandas-function-hangs-in-runtime-13-3-lts-ml-and-above/m-p/57943#M30963</link>
      <description>&lt;P&gt;Same post :&amp;nbsp;&lt;A href="https://community.databricks.com/t5/data-engineering/applyinpandas-hangs-on-runtime-13-3-lts-ml-and-above/td-p/56796" target="_blank"&gt;https://community.databricks.com/t5/data-engineering/applyinpandas-hangs-on-runtime-13-3-lts-ml-and-above/td-p/56796&lt;/A&gt;&amp;nbsp; , Could you please share the expectation here?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 20 Jan 2024 07:12:01 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/applyinpandas-function-hangs-in-runtime-13-3-lts-ml-and-above/m-p/57943#M30963</guid>
      <dc:creator>Debayan</dc:creator>
      <dc:date>2024-01-20T07:12:01Z</dc:date>
    </item>
    <item>
      <title>Re: applyInPandas function hangs in runtime 13.3 LTS ML and above</title>
      <link>https://community.databricks.com/t5/data-engineering/applyinpandas-function-hangs-in-runtime-13-3-lts-ml-and-above/m-p/98081#M39603</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.databricks.com/t5/user/viewprofilepage/user-id/26078"&gt;@Debayan&lt;/a&gt;&amp;nbsp;the link just redirects to the same thread? Is there any update on this issue?&lt;/P&gt;&lt;P&gt;We share some similar issue on job hanging using mapInPandas.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 07 Nov 2024 13:34:44 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/applyinpandas-function-hangs-in-runtime-13-3-lts-ml-and-above/m-p/98081#M39603</guid>
      <dc:creator>Marcin_Milewski</dc:creator>
      <dc:date>2024-11-07T13:34:44Z</dc:date>
    </item>
  </channel>
</rss>

