Вы можете использовать функции Window для отображения неагрегированных столбцов или, скажем, показывать сумму в каждой строке.
Пожалуйста, посмотрите фрагмент кода ниже, если это поможет:
import org.apache.spark.sql.expressions.Window
val df = Seq(
(2010,"2018-11-24",71285,"USA","0.9192019", "0.1992019", "0.9955999"),
(2010,"2017-08-24",71286,"USA","0.9292018", "0.2992019", "0.99662018"),
(2010,"2019-02-24",71287,"USA","0.9392017", "0.3992019", "0.99772000")).
toDF("seq_id","load_date","company_id","country_code","item1_value","item2_value","item3_value").
withColumn("item1_value", $"item1_value".cast(DoubleType)).
withColumn("item2_value", $"item2_value".cast(DoubleType)).
withColumn("item3_value", $"item3_value".cast(DoubleType)).
withColumn("fiscal_year", year(col("load_date")).cast(IntegerType)).
withColumn("fiscal_quarter", quarter(col("load_date")).cast(IntegerType))
val byCountry = Window.partitionBy(col("country_code"))
val aggregateColumns = Seq("item1_value","item2_value","item3_value")
var aggDFs = aggregateColumns.map( c => {
df.withColumn("col_name",lit(c)).withColumn("sum_country", sum(c) over byCountry)
})
var combinedDF = aggDFs.reduce(_ union _)
combinedDF.
select("seq_id","load_date","company_id","country_code","col_name","sum_country").
distinct.show(100,false)
Вывод будет как это:
+------+----------+----------+------------+-----------+------------------+
|seq_id|load_date |company_id|country_code|col_name |sum_country |
+------+----------+----------+------------+-----------+------------------+
|2010 |2019-02-24|71287 |USA |item1_value|2.7876054 |
|2010 |2018-11-24|71285 |USA |item1_value|2.7876054 |
|2010 |2017-08-24|71286 |USA |item1_value|2.7876054 |
|2010 |2018-11-24|71285 |USA |item2_value|0.8976057000000001|
|2010 |2019-02-24|71287 |USA |item2_value|0.8976057000000001|
|2010 |2017-08-24|71286 |USA |item2_value|0.8976057000000001|
|2010 |2019-02-24|71287 |USA |item3_value|2.9899400800000002|
|2010 |2018-11-24|71285 |USA |item3_value|2.9899400800000002|
|2010 |2017-08-24|71286 |USA |item3_value|2.9899400800000002|
+------+----------+----------+------------+-----------+------------------+