Вы можете сгруппировать struct(Buyer_name, Buyer_state)
и struct(CoBuyer_name, CoBuyer_state)
в Array
, который затем расширяется с помощью explode
, как показано ниже:
import org.apache.spark.sql.functions._
import spark.implicits._
val df = Seq(
("Bob", "CA", "Joe", "CA", 20, "010119"),
("Stacy", "IL", "Jamie", "IL", 50, "020419")
).toDF("Buyer_name", "Buyer_state", "CoBuyer_name", "CoBuyer_state", "Price", "Date")
df.
withColumn("Buyers", array(
struct($"Buyer_name".as("_1"), $"Buyer_state".as("_2")),
struct($"CoBuyer_name".as("_1"), $"CoBuyer_state".as("_2"))
)).
withColumn("Buyer", explode($"Buyers")).
select(
$"Buyer._1".as("Buyer_name"), $"Buyer._2".as("Buyer_state"), $"Price", $"Date"
).show
// +----------+-----------+-----+------+
// |Buyer_name|Buyer_state|Price| Date|
// +----------+-----------+-----+------+
// | Bob| CA| 20|010119|
// | Joe| CA| 20|010119|
// | Stacy| IL| 50|020419|
// | Jamie| IL| 50|020419|
// +----------+-----------+-----+------+