У меня есть датафрейм из БД
val listvaluesDF = spark.sqlContext.read.format("jdbc").option("driver" , "com.microsoft.sqlserver.jdbc.SQLServerDriver").option("url", "jdbc:sqlserver://azure.cloud.acme.com:14481;databaseName=dbadmin3").option("dbtable", "(select distinct [key], value, internal from dbadmin3.V_LIST_VALUES where internal in ('year', 'wmt0SBU', 'wmt0Department', 'wmt0DeptCategory', 'wmt0DotcomOnly', 'wmt0WalmartWeek', 'wmt0SetWeek', 'wmt0Event', 'wmt1Qtr','seasonType')) tmp").option("user", "aaa").option("password", "xxx").load()
, разделенный на несколько фреймов данных
listvaluesDF.createOrReplaceTempView("listvaluesDF")
var dfYear = spark.sql("select key, value from listvaluesDF where internal = 'year'")
var dfSBU = spark.sql("select key, value from listvaluesDF where internal = 'wmt0SBU'")
var dfDept = spark.sql("select key, value from listvaluesDF where internal = 'wmt0Department'")
var dfDeptCategory = spark.sql("select key, value from listvaluesDF where internal = 'wmt0DeptCategory'")
var dfDotcom = spark.sql("select key, value from listvaluesDF where internal = 'wmt0DotcomOnly'")
var dfWalmartWeek = spark.sql("select key, value from listvaluesDF where internal = 'wmt0WalmartWeek'")
var dfSetWeek = spark.sql("select key, value from listvaluesDF where internal = 'wmt0SetWeek'")
var dfEvent = spark.sql("select key, value from listvaluesDF where internal = 'wmt0Event'")
var dfQtr = spark.sql("select key, value from listvaluesDF where internal = 'wmt1Qtr'")
var dfseasonType = spark.sql("select key, value from listvaluesDF where internal = 'seasonType'")
, и я выполняю множественное левое соединение с Main DF, как
val seasonFinalDF = seasonsDF.alias("seasonsDF").join(paletteDF.alias("primaryPalette"), seasonsDF("primaryPalette") === paletteDF("id"), "left_outer").join(flextypeDF.alias("SBU"), seasonsDF("hierarchy") === flextypeDF("key"), "left_outer").join(dfYear.alias("fiscalYearEnding"), seasonsDF("fiscalYearEnding") === dfYear("key"), "left_outer").join(dfSBU.alias("SBU"), seasonsDF("SBU") === dfSBU("key"), "left_outer").join(dfDept.alias("department"), seasonsDF("department") === dfDept("key"), "left_outer").join(dfDeptCategory.alias("dept_Category"), seasonsDF("dept_Category") === dfDeptCategory("key"), "left_outer").join(dfDotcom.alias("dotcomOnly"), seasonsDF("dotcomOnly") === dfDotcom("key"), "left_outer").join(dfseasonType.alias("type"), seasonsDF("type") === dfseasonType("key"), "left_outer").join(dfWalmartWeek.alias("walmartWeek"), seasonsDF("walmartWeek") === dfWalmartWeek("key"), "left_outer").join(dfSetWeek.alias("setWeek"), seasonsDF("setWeek") === dfSetWeek("key"), "left_outer").join(dfEvent.alias("event"), seasonsDF("event") === dfEvent("key"), "left_outer").join(dfQtr.alias("quarter"), seasonsDF("quarter") === dfQtr("key"), "left_outer").select("seasonsDF.seasonMasterID","seasonsDF.seasonName","fiscalYearEnding.value","SBU.value","department.value","dept_Category.value","dotcomOnly.value","seasonsDF.active","type.value","seasonsDF.createdDate","seasonsDF.createdBy","seasonsDF.updatedDate","seasonsDF.modifiedBy","seasonsDF.seasonId","seasonsDF.flexID","primaryPalette.paletteName","walmartWeek.value","setWeek.value","event.value","quarter.value","hierarchy.DisplayName").toDF("seasonMasterID","seasonName","fiscalYearEnding","SBU","department","dept_Category","dotcomOnly","active","type","createdDate","createdBy","updatedDate","modifiedBy","seasonId","flexID","primaryPalette","walmartWeek","setWeek","event","quarter","hierarchy")
и, наконец, получим результат = DF, как
scala> seasonFinalDF.printSchema
root
|-- seasonMasterID: long (nullable = true)
|-- seasonName: string (nullable = true)
|-- fiscalYearEnding: string (nullable = true)
|-- SBU: string (nullable = true)
|-- department: string (nullable = true)
|-- dept_Category: string (nullable = true)
|-- dotcomOnly: string (nullable = true)
|-- active: integer (nullable = true)
|-- type: string (nullable = true)
|-- createdDate: timestamp (nullable = true)
|-- createdBy: long (nullable = true)
|-- updatedDate: timestamp (nullable = true)
|-- modifiedBy: long (nullable = true)
|-- seasonId: long (nullable = true)
|-- flexID: string (nullable = true)
|-- primaryPalette: string (nullable = true)
|-- walmartWeek: string (nullable = true)
|-- setWeek: string (nullable = true)
|-- event: string (nullable = true)
|-- quarter: string (nullable = true)
Теперь, в этом случае все левые объединения становятся нулевыми, кроме первого, когда я делаю объяснение, они присоединяются к одному и тому же sql несколько раз
Я не уверен, где и как все идет не так, может ли кто-нибудь помочь найти правильный путь к левому соединению