Проверьте это решение DF.
scala> val df = Seq(
| ("20181001","10"),
| ("20181002","40"),
| ("20181003","50")).toDF("Date","Key")
df: org.apache.spark.sql.DataFrame = [Date: string, Key: string]
scala> val df2 = df.withColumn("gencond",concat(lit("(Date="), 'Date, lit(" and Key=") ,'Key,lit(")")))
df2: org.apache.spark.sql.DataFrame = [Date: string, Key: string ... 1 more field]
scala> df2.agg(collect_list('gencond)).show(false)
+------------------------------------------------------------------------------------+
|collect_list(gencond) |
+------------------------------------------------------------------------------------+
|[(Date=20181001 and Key=10), (Date=20181002 and Key=40), (Date=20181003 and Key=50)]|
+------------------------------------------------------------------------------------+
EDIT1
Вы можете прочитать их из файлов паркета и просто изменить названия, как в этом решении.На последнем этапе снова замените имена из заголовка паркета.Проверьте это.
scala> val df = Seq(("101","Jack"),("103","wright")).toDF("id","name") // Original names from parquet
df: org.apache.spark.sql.DataFrame = [id: string, name: string]
scala> val df2= df.select("*").toDF("Date","Key") // replace it with Date/Key as we used in this question
df2: org.apache.spark.sql.DataFrame = [Date: string, Key: string]
scala> val df3 = df2.withColumn("gencond",concat(lit("(Date="), 'Date, lit(" and Key=") ,'Key,lit(")")))
df3: org.apache.spark.sql.DataFrame = [Date: string, Key: string ... 1 more field]
scala> val df4=df3.agg(collect_list('gencond).as("list"))
df4: org.apache.spark.sql.DataFrame = [list: array<string>]
scala> df4.select(concat_ws(" or ",'list)).show(false)
+----------------------------------------------------+
|concat_ws( or , list) |
+----------------------------------------------------+
|(Date=101 and Key=Jack) or (Date=103 and Key=wright)|
+----------------------------------------------------+
scala> val a = df.columns(0)
a: String = id
scala> val b = df.columns(1)
b: String = name
scala> df4.select(concat_ws(" or ",'list).as("new1")).select(regexp_replace('new1,"Date",a).as("colx")).select(regexp_replace('colx,"Key",b).as("colxy")).show(false)
+--------------------------------------------------+
|colxy |
+--------------------------------------------------+
|(id=101 and name=Jack) or (id=103 and name=wright)|
+--------------------------------------------------+
scala>