вы используете filter
неправильно, посмотрите на фрагмент ниже, чтобы понять
import sparkSession.implicits._
val dataframe = Seq(("x", 20, 10), ("y", 30, 20), ("y", 30, 20), ("z", 10, 20)).toDF("source", "target", "weight")
val result1 = dataframe.filter($"weight" > 10)
val result2 = dataframe.filter($"weight" > 10).dropDuplicates()
dataframe.show
result1.show
result2.show
и вывод
+------+------+------+
|source|target|weight|
+------+------+------+
| x| 20| 10|
| y| 30| 20|
| y| 30| 20|
| z| 10| 20|
+------+------+------+
+------+------+------+
|source|target|weight|
+------+------+------+
| y| 30| 20|
| y| 30| 20|
| z| 10| 20|
+------+------+------+
+------+------+------+
|source|target|weight|
+------+------+------+
| y| 30| 20|
| z| 10| 20|
+------+------+------+