Вот более чистая версия кода.
Я добавил методы для first
и last
, потому что они являются просто стандартными head
и last
операторами.
Другие (lead
, lag
, window
) могут быть реализованы с использованием sliding(2)
или sliding(window)
на List
, а затем каждый раз обрабатывать соответствующий элемент (с осторожностью, необходимой в «угловых» случаях в каждый конец списка).
case class Employee(
empid: Int,
name: String,
age: Int,
dept: String,
salary: Int,
)
// create the employee list
val empData = List(
Employee(1, "Ajay", 25, "Technical", 35000),
Employee(3, "Chandan", 22, "Technical", 30000),
Employee(4, "Arjun", 30, "Management", 54000),
Employee(2, "Arun", 28, "Sales", 9000),
Employee(8, "Anmol", 28, "Sales", 15000),
Employee(9, "Vivek", 20, "Management", 8000),
Employee(10, "Nikhil", 20, "Sales", 7000),
Employee(5, "Rahul", 30, "Management", 60000),
Employee(6, "Ganesh", 32, "Sales", 35000),
Employee(7, "Vishal", 32, "Technical", 40000),
Employee(11, "Anmol", 25, "Sales", 15000),
Employee(12, "Vivek", 25, "Management", 8000),
Employee(13, "Nikhil", 30, "Technical", 7000),
)
val byDept: Map[String, List[Employee]] =
empData.groupBy(_.dept)
val byDeptAge: Map[(String, Int), List[Employee]] =
empData.groupBy(row => (row.dept, row.age))
val empSum: Map[String, Int] =
byDept.map { case (dept, rows) => dept -> rows.map(_.salary).sum }
val empCount: Map[String, Int] =
byDept.map { case (dept, rows) => dept -> rows.length }
val empMax: Map[String, Int] =
byDept.map { case (dept, rows) => dept -> rows.map(_.salary).max }
val empMin: Map[String, Int] =
byDept.map { case (dept, rows) => dept -> rows.map(_.salary).min }
val empDeptName: Map[String, String] =
empData.map(row => (row.dept, row.dept)).toMap
val sumPartitionByDeptAge: Map[(String, Int), Int] =
byDeptAge.map { case (deptAge, rows) => deptAge -> rows.map(_.salary).sum }
val empDeptFirst: Map[String, Employee] =
byDept.map { case (dept, rows) => dept -> rows.head }
val empDeptLast: Map[String, Employee] =
byDept.map { case (dept, rows) => dept -> rows.last }
empData.sortBy(data => (data.dept, data.age))
.map(rows => (
rows.empid,
rows.name,
rows.age,
rows.salary,
empDeptName.getOrElse(rows.dept, 0),
empMax.getOrElse(rows.dept, 0),
empMin.getOrElse(rows.dept, 0),
empSum.getOrElse(rows.dept, 0),
sumPartitionByDeptAge.getOrElse((rows.dept, rows.age), 0)
)
)
.foreach(println)