Используйте groupBy
, затем используйте first(col,ignoreNull=true)
функции для этого случая.
val df=Seq(("0004","4433","2006-11-04","RMA","150.0","150.0","2006-11-04",null.asInstanceOf[String],null.asInstanceOf[String]),("0004","4433","2012-05-14","FCB","300.0",null.asInstanceOf[String],null.asInstanceOf[String],"300.0","2012-05-14"),("0004","1122","2011-10-17","RMA","100.0","100.0","2011-10-17",null.asInstanceOf[String],null.asInstanceOf[String]),("0004","1122","2015-12-05","FCB","500.0",null.asInstanceOf[String],null.asInstanceOf[String],"500.0","2015-12-05")).toDF("CONTR","COD","DATE","TYPCOD","Amount","RMA_AMOUNT","DATE_RMA","FCB_AMOUNT","DATE_FCB")
//+-----+----+----------+------+------+----------+----------+----------+----------+
//|CONTR| COD| DATE|TYPCOD|Amount|RMA_AMOUNT| DATE_RMA|FCB_AMOUNT| DATE_FCB|
//+-----+----+----------+------+------+----------+----------+----------+----------+
//| 0004|4433|2006-11-04| RMA| 150.0| 150.0|2006-11-04| null| null|
//| 0004|4433|2012-05-14| FCB| 300.0| null| null| 300.0|2012-05-14|
//| 0004|1122|2011-10-17| RMA| 100.0| 100.0|2011-10-17| null| null|
//| 0004|1122|2015-12-05| FCB| 500.0| null| null| 500.0|2015-12-05|
//+-----+----+----------+------+------+----------+----------+----------+----------+
df.groupBy("CONTR","COD").agg(first(col("RMA_AMOUNT"),true).alias("RMA_AMOUNT"),first(col("DATE_RMA"),true).alias("DATE_RMA"),first(col("FCB_AMOUNT"),true).alias("FCB_AMOUNT"),first(col("DATE_FCB"),true).alias("DATE_FCB")).show()
//+-----+----+----------+----------+----------+----------+
//|CONTR| COD|RMA_AMOUNT| DATE_RMA|FCB_AMOUNT| DATE_FCB|
//+-----+----+----------+----------+----------+----------+
//| 0004|4433| 150.0|2006-11-04| 300.0|2012-05-14|
//| 0004|1122| 100.0|2011-10-17| 500.0|2015-12-05|
//+-----+----+----------+----------+----------+----------+
//incase if you want to keep TYPCOD and DATE values
df.groupBy("CONTR","COD").agg(concat_ws(",",collect_list(col("TYPCOD"))).alias("TYPECOD"),concat_ws(",",collect_list(col("DATE"))).alias("DATE"),first(col("RMA_AMOUNT"),true).alias("RMA_AMOUNT"),first(col("DATE_RMA"),true).alias("DATE_RMA"),first(col("FCB_AMOUNT"),true).alias("FCB_AMOUNT"),first(col("DATE_FCB"),true).alias("DATE_FCB")).show(false)
//+-----+----+-------+---------------------+----------+----------+----------+----------+
//|CONTR|COD |TYPECOD|DATE |RMA_AMOUNT|DATE_RMA |FCB_AMOUNT|DATE_FCB |
//+-----+----+-------+---------------------+----------+----------+----------+----------+
//|0004 |4433|RMA,FCB|2006-11-04,2012-05-14|150.0 |2006-11-04|300.0 |2012-05-14|
//|0004 |1122|RMA,FCB|2011-10-17,2015-12-05|100.0 |2011-10-17|500.0 |2015-12-05|
//+-----+----+-------+---------------------+----------+----------+----------+----------+