valuesA = [(100,'SYS1','N'),(101,'SYS2','N'),(102,'SYS3','N'),(103,'SYS4','Y')]
TableA = spark.createDataFrame(valuesA,['BATCH_ID','SRC_SYS_ID','ACT_IND'])
valuesB = [(99,'SYS0','N'),(100,'SYS1','N'),(101,'SYS2','N'),(102,'SYS3','Y')]
TableB = spark.createDataFrame(valuesB,['BATCH_ID','SRC_SYS_ID','ACT_IND'])
ta = TableA.alias('ta')
tb = TableB.alias('tb')
diff = (ta.select('BATCH_ID')
.subtract(tb.select('BATCH_ID'))
.rdd.map(lambda x: x[0]).collect())
tb.unionAll(ta[ta.BATCH_ID.isin(diff)]).orderBy('BATCH_ID').show()