Вот что я использовал
Идея состоит в том, чтобы создать Псевдоколонку , которая будет ==> NULL для Таблица A и будетиметь ==> name из таблицы B и затем выбрать ненулевое значение в качестве final_name
Sql Выполнено:
val resultSql = """ select sub.id,
sub.p,
max(nvl(sub.pseudo_name,sub.name)) as result_name
from
(
select a.id,
a.name,
a.p,
null as pseudo_name from a
union all
select b.id,
b.name,
b.p,
b.name as pseudo_name from b ) sub
group by 1,2 order by 1"""
Полный код =>
val table1 = Seq((1,"abc","A"),(2,"bcd","A"),(3,"first_table_value","A")).toDF("id","name","p")
val table2 = Seq((3,"second_table_value","A"),(4,"ddd","A"),(5,"eee","A")).toDF("id","name","p")
table1.createOrReplaceTempView("A")
table2.createOrReplaceTempView("B")
val resultSql = """ select sub.id, sub.p, max(nvl(sub.pseudo_name,sub.name)) as result_name from (select a.id,a.name, a.p, null as pseudo_name from a union all select b.id, b.name,b.p, b.name as pseudo_name from b ) sub group by 1,2 order by 1"""
spark.sql(resultSql).show(false)
И ВЫХОД =>
|id |p |result_name |
+---+---+------------------+
|1 |A |abc |
|2 |A |bcd |
|3 |A |second_table_value|
|4 |A |ddd |
|5 |A |eee |
+---+---+------------------+