Функция «взорвать» может использоваться для преобразования столбца «информация» в тип массива, а затем отфильтровать новый столбец по «matchID»:
val df = Seq(
("First", 123, Array(Array(1.2, 4.5, 837), Array(1.4, 4.8, 123), Array(4.1, 4.7, 143))),
("First", 234, Array(Array(4.8, 8.9, 234), Array(1.1, 4.2, 321), Array(3.9, 5.7, 521))),
("Second", 345, Array(Array(7.7, 8.1, 457), Array(4.5, 4.9, 345), Array(1.9, 2.8, 776))))
.toDF("which_one", "matchID", "information")
val indexColumn = when($"which_one" === "First", 1).otherwise(
when($"which_one" === "Second", 2).otherwise(3))
val exploded = df
.withColumn("exploded", explode($"information"))
.withColumn("indexColumn", indexColumn)
exploded.show(false)
exploded
.where(expr("array_contains(exploded, matchID )"))
.withColumn("res", expr("element_at(exploded,indexColumn)"))
.drop("exploded", "indexColumn")
Выход:
+---------+-------+---------------------------------------------------------+-----------------+-----------+
|which_one|matchID|information |exploded |indexColumn|
+---------+-------+---------------------------------------------------------+-----------------+-----------+
|First |123 |[[1.2, 4.5, 837.0], [1.4, 4.8, 123.0], [4.1, 4.7, 143.0]]|[1.2, 4.5, 837.0]|1 |
|First |123 |[[1.2, 4.5, 837.0], [1.4, 4.8, 123.0], [4.1, 4.7, 143.0]]|[1.4, 4.8, 123.0]|1 |
|First |123 |[[1.2, 4.5, 837.0], [1.4, 4.8, 123.0], [4.1, 4.7, 143.0]]|[4.1, 4.7, 143.0]|1 |
|First |234 |[[4.8, 8.9, 234.0], [1.1, 4.2, 321.0], [3.9, 5.7, 521.0]]|[4.8, 8.9, 234.0]|1 |
|First |234 |[[4.8, 8.9, 234.0], [1.1, 4.2, 321.0], [3.9, 5.7, 521.0]]|[1.1, 4.2, 321.0]|1 |
|First |234 |[[4.8, 8.9, 234.0], [1.1, 4.2, 321.0], [3.9, 5.7, 521.0]]|[3.9, 5.7, 521.0]|1 |
|Second |345 |[[7.7, 8.1, 457.0], [4.5, 4.9, 345.0], [1.9, 2.8, 776.0]]|[7.7, 8.1, 457.0]|2 |
|Second |345 |[[7.7, 8.1, 457.0], [4.5, 4.9, 345.0], [1.9, 2.8, 776.0]]|[4.5, 4.9, 345.0]|2 |
|Second |345 |[[7.7, 8.1, 457.0], [4.5, 4.9, 345.0], [1.9, 2.8, 776.0]]|[1.9, 2.8, 776.0]|2 |
+---------+-------+---------------------------------------------------------+-----------------+-----------+
+---------+-------+---------------------------------------------------------+---+
|which_one|matchID|information |res|
+---------+-------+---------------------------------------------------------+---+
|First |123 |[[1.2, 4.5, 837.0], [1.4, 4.8, 123.0], [4.1, 4.7, 143.0]]|1.4|
|First |234 |[[4.8, 8.9, 234.0], [1.1, 4.2, 321.0], [3.9, 5.7, 521.0]]|4.8|
|Second |345 |[[7.7, 8.1, 457.0], [4.5, 4.9, 345.0], [1.9, 2.8, 776.0]]|4.9|
+---------+-------+---------------------------------------------------------+---+