__max
New Contributor III

Hello,

Just in case, here is an example for proposed solution above:

import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions._
import org.apache.spark.sql.types._
val data = Seq(("A", Seq((3,4),(5,6),(7,10))), ("B", Seq((-1, 1)))).toDS
data.printSchema
root
 |-- _1: string (nullable = true)
 |-- _2: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- _1: integer (nullable = false)
 |    |    |-- _2: integer (nullable = false)
def fun(s: Seq[Row]): Seq[(Int, Int)] = {
  s.filter(tuple => tuple.getInt(0) > 0)
   .map(tuple => (tuple.getInt(0), tuple.getInt(1)))
}
val funUdf = udf(fun _)
data.select('_1, '_2, funUdf('_2) as "filtered").show(false)
+---+----------------------+----------------------+
|_1 |_2                    |filtered              |
+---+----------------------+----------------------+
|A  |[[3,4], [5,6], [7,10]]|[[3,4], [5,6], [7,10]]|
|B  |[[-1,1]]              |[]                    |
+---+----------------------+----------------------+

Best regards,

Maxim Gekk