<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to resolve our of memory error? in Data Engineering</title>
    <link>https://community.databricks.com/t5/data-engineering/how-to-resolve-our-of-memory-error/m-p/27246#M19123</link>
    <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;I have a data bricks notebook hosted on Azure. I am having this problem when doing INNER JOIN. I tried creating a much higher cluster configuration but it still making outOfMemoryError.&lt;/P&gt;
&lt;P&gt;org.apache.spark.memory.SparkOutOfMemoryError: Unable to acquire 262144 bytes of memory, got 65536&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;    at org.apache.spark.memory.MemoryConsumer.throwOom(MemoryConsumer.java:157)
    at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:98)
    at org.apache.spark.unsafe.map.BytesToBytesMap.allocate(BytesToBytesMap.java:812)
    at org.apache.spark.unsafe.map.BytesToBytesMap.&amp;lt;init&amp;gt;(BytesToBytesMap.java:204)
    at org.apache.spark.unsafe.map.BytesToBytesMap.&amp;lt;init&amp;gt;(BytesToBytesMap.java:219)
    at org.apache.spark.sql.execution.UnsafeFixedWidthAggregationMap.&amp;lt;init&amp;gt;(UnsafeFixedWidthAggregationMap.java:101)
    at org.apache.spark.sql.execution.aggregate.HashAggregateExec.createHashMap(HashAggregateExec.scala:569)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage944.sort_addToSorter_0$(Unknown Source)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage944.processNext(Unknown Source)
    at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
    at org.apache.spark.sql.execution.WholeStageCodegenExec$anonfun$13$anon$1.hasNext(WholeStageCodegenExec.scala:640)
    at org.apache.spark.sql.execution.RowIteratorFromScala.advanceNext(RowIterator.scala:83)
    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.advancedStreamed(SortMergeJoinExec.scala:916)
    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.findNextOuterJoinRows(SortMergeJoinExec.scala:875)
    at org.apache.spark.sql.execution.joins.OneSideOuterIterator.advanceStream(SortMergeJoinExec.scala:1048)
    at org.apache.spark.sql.execution.joins.OneSideOuterIterator.advanceNext(SortMergeJoinExec.scala:1084)
    at org.apache.spark.sql.execution.RowIteratorToScala.hasNext(RowIterator.scala:68)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage975.agg_doAggregateWithKeys_0$(Unknown Source)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage975.sort_addToSorter_0$(Unknown Source)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage975.processNext(Unknown Source)
    at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
    at org.apache.spark.sql.execution.WholeStageCodegenExec$anonfun$13$anon$1.hasNext(WholeStageCodegenExec.scala:640)
    at org.apache.spark.sql.execution.RowIteratorFromScala.advanceNext(RowIterator.scala:83)
    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.advancedBufferedToRowWithNullFreeJoinKey(SortMergeJoinExec.scala:933)
    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.&amp;lt;init&amp;gt;(SortMergeJoinExec.scala:803)
    at org.apache.spark.sql.execution.joins.SortMergeJoinExec$anonfun$doExecute$1.apply(SortMergeJoinExec.scala:223)
    at org.apache.spark.sql.execution.joins.SortMergeJoinExec$anonfun$doExecute$1.apply(SortMergeJoinExec.scala:158)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:101)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:101)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:101)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:101)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.sql.execution.joins.UnsafeCartesianRDD.compute(CartesianProductExec.scala:52)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
    at org.apache.spark.scheduler.Task.doRunTask(Task.scala:140)
    at org.apache.spark.scheduler.Task.run(Task.scala:113)
    at org.apache.spark.executor.Executor$TaskRunner$anonfun$13.apply(Executor.scala:533)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:539)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)&lt;/CODE&gt;&lt;/PRE&gt; 
&lt;P&gt;&lt;/P&gt;</description>
    <pubDate>Mon, 23 Mar 2020 22:23:16 GMT</pubDate>
    <dc:creator>Juan_MiguelTrin</dc:creator>
    <dc:date>2020-03-23T22:23:16Z</dc:date>
    <item>
      <title>How to resolve our of memory error?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-resolve-our-of-memory-error/m-p/27246#M19123</link>
      <description>&lt;P&gt;&lt;/P&gt;
&lt;P&gt;I have a data bricks notebook hosted on Azure. I am having this problem when doing INNER JOIN. I tried creating a much higher cluster configuration but it still making outOfMemoryError.&lt;/P&gt;
&lt;P&gt;org.apache.spark.memory.SparkOutOfMemoryError: Unable to acquire 262144 bytes of memory, got 65536&lt;/P&gt;
&lt;PRE&gt;&lt;CODE&gt;    at org.apache.spark.memory.MemoryConsumer.throwOom(MemoryConsumer.java:157)
    at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:98)
    at org.apache.spark.unsafe.map.BytesToBytesMap.allocate(BytesToBytesMap.java:812)
    at org.apache.spark.unsafe.map.BytesToBytesMap.&amp;lt;init&amp;gt;(BytesToBytesMap.java:204)
    at org.apache.spark.unsafe.map.BytesToBytesMap.&amp;lt;init&amp;gt;(BytesToBytesMap.java:219)
    at org.apache.spark.sql.execution.UnsafeFixedWidthAggregationMap.&amp;lt;init&amp;gt;(UnsafeFixedWidthAggregationMap.java:101)
    at org.apache.spark.sql.execution.aggregate.HashAggregateExec.createHashMap(HashAggregateExec.scala:569)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage944.sort_addToSorter_0$(Unknown Source)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage944.processNext(Unknown Source)
    at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
    at org.apache.spark.sql.execution.WholeStageCodegenExec$anonfun$13$anon$1.hasNext(WholeStageCodegenExec.scala:640)
    at org.apache.spark.sql.execution.RowIteratorFromScala.advanceNext(RowIterator.scala:83)
    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.advancedStreamed(SortMergeJoinExec.scala:916)
    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.findNextOuterJoinRows(SortMergeJoinExec.scala:875)
    at org.apache.spark.sql.execution.joins.OneSideOuterIterator.advanceStream(SortMergeJoinExec.scala:1048)
    at org.apache.spark.sql.execution.joins.OneSideOuterIterator.advanceNext(SortMergeJoinExec.scala:1084)
    at org.apache.spark.sql.execution.RowIteratorToScala.hasNext(RowIterator.scala:68)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage975.agg_doAggregateWithKeys_0$(Unknown Source)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage975.sort_addToSorter_0$(Unknown Source)
    at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage975.processNext(Unknown Source)
    at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
    at org.apache.spark.sql.execution.WholeStageCodegenExec$anonfun$13$anon$1.hasNext(WholeStageCodegenExec.scala:640)
    at org.apache.spark.sql.execution.RowIteratorFromScala.advanceNext(RowIterator.scala:83)
    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.advancedBufferedToRowWithNullFreeJoinKey(SortMergeJoinExec.scala:933)
    at org.apache.spark.sql.execution.joins.SortMergeJoinScanner.&amp;lt;init&amp;gt;(SortMergeJoinExec.scala:803)
    at org.apache.spark.sql.execution.joins.SortMergeJoinExec$anonfun$doExecute$1.apply(SortMergeJoinExec.scala:223)
    at org.apache.spark.sql.execution.joins.SortMergeJoinExec$anonfun$doExecute$1.apply(SortMergeJoinExec.scala:158)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:101)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:101)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:101)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:101)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.sql.execution.joins.UnsafeCartesianRDD.compute(CartesianProductExec.scala:52)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:60)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:353)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:317)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
    at org.apache.spark.scheduler.Task.doRunTask(Task.scala:140)
    at org.apache.spark.scheduler.Task.run(Task.scala:113)
    at org.apache.spark.executor.Executor$TaskRunner$anonfun$13.apply(Executor.scala:533)
    at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1541)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:539)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)&lt;/CODE&gt;&lt;/PRE&gt; 
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Mar 2020 22:23:16 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-resolve-our-of-memory-error/m-p/27246#M19123</guid>
      <dc:creator>Juan_MiguelTrin</dc:creator>
      <dc:date>2020-03-23T22:23:16Z</dc:date>
    </item>
    <item>
      <title>Re: How to resolve our of memory error?</title>
      <link>https://community.databricks.com/t5/data-engineering/how-to-resolve-our-of-memory-error/m-p/27247#M19124</link>
      <description>&lt;P&gt;&lt;/P&gt;&lt;P&gt;Hi @Juan Miguel Trinidad,&lt;/P&gt;&lt;P&gt;&lt;/P&gt;can you please the below suggestions,&lt;P&gt;&lt;/P&gt;&lt;A href="http://apache-spark-developers-list.1001551.n3.nabble.com/java-lang-OutOfMemoryError-Unable-to-acquire-bytes-of-memory-td16773.html" target="_blank"&gt;http://apache-spark-developers-list.1001551.n3.nabble.com/java-lang-OutOfMemoryError-Unable-to-acquire-bytes-of-memory-td16773.html&lt;/A&gt;</description>
      <pubDate>Mon, 30 Mar 2020 20:30:56 GMT</pubDate>
      <guid>https://community.databricks.com/t5/data-engineering/how-to-resolve-our-of-memory-error/m-p/27247#M19124</guid>
      <dc:creator>shyam_9</dc:creator>
      <dc:date>2020-03-30T20:30:56Z</dc:date>
    </item>
  </channel>
</rss>

