Я пытаюсь применить алгоритм kmeans.
val dfJoin_products_items = df_products.join(df_items, "product_id")
val weightFreight = spark.sql("SELECT cast(product_weight_g as double) weight, cast(freight_value as double) freight FROM global_temp.products_items")
case class Rows(weight:Double, freight:Double)
val rows = weightFreight.as[Rows]
val assembler = new VectorAssembler().setInputCols(Array("weight", "freight")).setOutputCol("features")
val data = assembler.transform(rows)
val kmeans = new KMeans().setK(4)
val model = kmeans.fit(data)
scala> dfJoin_products_items.printSchema
|-- product_id: string (nullable = true)
|-- product_category_name: string (nullable = true)
|-- product_name_lenght: string (nullable = true)
|-- product_description_lenght: string (nullable = true)
|-- product_photos_qty: string (nullable = true)
|-- product_weight_g: string (nullable = true)
|-- product_length_cm: string (nullable = true)
|-- product_height_cm: string (nullable = true)
|-- product_width_cm: string (nullable = true)
|-- order_id: string (nullable = true)
|-- order_item_id: string (nullable = true)
|-- seller_id: string (nullable = true)
|-- shipping_limit_date: string (nullable = true)
|-- price: string (nullable = true)
|-- freight_value: string (nullable = true)
scala> weightFreight.printSchema
|-- weight: double (nullable = true)
|-- freight: double (nullable = true)
2019-02-03 20:51:41 WARN BlockManager:66 - Putting block rdd_126_1 failed due to exception org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>).
2019-02-03 20:51:41 WARN BlockManager:66 - Block rdd_126_1 could not be removed as it was not found on disk or in memory
2019-02-03 20:51:41 WARN BlockManager:66 - Putting block rdd_126_2 failed due to exception org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>).
2019-02-03 20:51:41 ERROR Executor:91 - Exception in task 1.0 in stage 16.0 (TID 23)
org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
Я не понимаю эту ошибку, кто-нибудь может объяснить мне, пожалуйста?
Большое спасибо!
ОБНОВЛЕНИЕ 1: Полная трассировка стека
Трассировка стека огромна, так что вы можете найти еездесь: https://pastebin.com/PhmZPtDk