package spark
import org.apache.spark.sql.SparkSession
object DataFramesExcept extends App {
val spark = SparkSession.builder()
.master("local")
.appName("DataFrame-example")
.getOrCreate()
import spark.implicits._
case class Sale(
corp: String,
product: String,
data: Long,
group: String,
sales: Long,
market: String
)
val df = Seq(
Sale("A", "Eli", 43831, "A", 100, "I"),
Sale("A", "Eli", 43831, "B", 100, "I"),
Sale("A", "Sut", 43831, "A", 80, "I"),
Sale("A", "Api", 43831, "C", 50, "C or D"),
Sale("A", "Api", 43831, "D", 50, "C or D"),
Sale("B", "Konkurent2", 43831, "C", 40, "C or D")
).toDF()
val dfE = Seq(
Sale("Z", "Eli", 43833, "A", 100, "M"),
Sale("Z", "Eli", 43833, "B", 100, "M"),
Sale("Z", "Sut", 43833, "A", 80, "M"),
Sale("Z", "Api", 43833, "C", 50, "M"),
Sale("Z", "Api", 43833, "D", 50, "M"),
Sale("Z", "Konkurent2", 43831, "C", 40, "M")
).toDF()
val cols = df.columns.toList
cols foreach(c => {
if(df.select(c).except(dfE.select(c)).rdd.isEmpty() == false) {
println(s"$c is not empty")
} else {
println(s"$c is empty")
}
})
// result
// corp is not empty
// product is empty
// data is empty
// group is empty
// sales is empty
// market is not empty
}