В apache -спарке нет метода display
. Вы можете использовать show
scala> import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
scala> val df = spark.createDataFrame(Seq((0, "a"),(1, "b"),(2, "c"),(3, "a"),(4, "a"),(5, "c") )).toDF("id", "category")
df: org.apache.spark.sql.DataFrame = [id: int, category: string]
scala> val indexer = new StringIndexer().setInputCol("category").setOutputCol("categoryIndex").fit(df)
indexer: org.apache.spark.ml.feature.StringIndexerModel = strIdx_b1524bc6ee99
scala> val indexed = indexer.transform(df)
indexed: org.apache.spark.sql.DataFrame = [id: int, category: string ... 1 more field]
scala> val encoder = new OneHotEncoder().setInputCol("categoryIndex").setOutputCol("categoryVec")
encoder: org.apache.spark.ml.feature.OneHotEncoder = oneHot_2c4e3e2a369b
scala> val encoded = encoder.transform(indexed)
encoded: org.apache.spark.sql.DataFrame = [id: int, category: string ... 2 more fields]
scala> encoded.show()
+---+--------+-------------+-------------+
| id|category|categoryIndex| categoryVec|
+---+--------+-------------+-------------+
| 0| a| 0.0|(2,[0],[1.0])|
| 1| b| 2.0| (2,[],[])|
| 2| c| 1.0|(2,[1],[1.0])|
| 3| a| 0.0|(2,[0],[1.0])|
| 4| a| 0.0|(2,[0],[1.0])|
| 5| c| 1.0|(2,[1],[1.0])|
+---+--------+-------------+-------------+