spark Как может быть подзапрос столбца?
Причина: java .lang.RuntimeException: Не удалось найти count (DISTINCT orderid) # 69L в [storeid # 4, combox_pid # 6, pid # 7, count (1) # 61L]
выберите b.pid, (выберите count (отличный orderid) от a, где a.pid = b.pid) как order_num, count (1) от b как b группу по b.pid
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SparkSession}
object OrderCountTset {
Logger.getRootLogger.setLevel(Level.WARN)
def main(args: Array[String]): Unit = {
val data = Seq(
Row("a", "100", "200", "300"),
Row("a", "100", "200", "300"),
Row("a", "101", "201", "300"),
Row("a", "101", "200", "3001"),
Row("a", "102", "200", "300"),
Row("a", "103", "201", "300")
)
val schme = new StructType()
.add("storeid", StringType)
.add("orderid", StringType)
.add("combox_pid", StringType)
.add("pid", StringType)
val spark = SparkSession.builder().master("local[*]").getOrCreate()
val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schme)
df.show()
df.createOrReplaceTempView("tab_tmp")
spark.sql("select storeid,orderid,combox_pid,pid,count(distinct orderid ),count(1) from tab_tmp group by storeid,orderid,combox_pid,pid").show()
// spark.sql(
// """
// |select b.storeid,b.combox_pid,b.pid,
// |(select count(distinct a.orderid) from tab_tmp as a where a.pid=b.pid) as order_num,
// |count(1)
// | from tab_tmp as b group by b.storeid,b.combox_pid,b.pid
// """.stripMargin).show()
spark.sql(
"""
|select storeid,combox_pid,pid,count(1) as num
| from tab_tmp group by storeid,combox_pid,pid
""".stripMargin).createOrReplaceTempView("tab_tmp_2")
spark.sql(
"""
|select b.storeid,b.combox_pid,b.pid,num,
|(select count(distinct a.orderid) from tab_tmp as a where a.pid=b.pid) as order_num
| from tab_tmp_2 as b
""".stripMargin).show()
}
}```