Options
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
10-18-2017 05:40 PM
Hello,
Just in case, here is an example for proposed solution above:
import org.apache.spark.sql.functions._
import org.apache.spark.sql.expressions._
import org.apache.spark.sql.types._
val data = Seq(("A", Seq((3,4),(5,6),(7,10))), ("B", Seq((-1, 1)))).toDS
data.printSchema
root
|-- _1: string (nullable = true)
|-- _2: array (nullable = true)
| |-- element: struct (containsNull = true)
| | |-- _1: integer (nullable = false)
| | |-- _2: integer (nullable = false)
def fun(s: Seq[Row]): Seq[(Int, Int)] = {
s.filter(tuple => tuple.getInt(0) > 0)
.map(tuple => (tuple.getInt(0), tuple.getInt(1)))
}
val funUdf = udf(fun _)
data.select('_1, '_2, funUdf('_2) as "filtered").show(false)
+---+----------------------+----------------------+
|_1 |_2 |filtered |
+---+----------------------+----------------------+
|A |[[3,4], [5,6], [7,10]]|[[3,4], [5,6], [7,10]]|
|B |[[-1,1]] |[] |
+---+----------------------+----------------------+
Best regards,
Maxim Gekk