Как преобразовать многопараметрическую функцию в udf в Spark? - PullRequest
0 голосов
/ 29 ноября 2018

У меня есть функция Scala следующим образом:

val func1(field1: String, field2: String, field3: Boolean): (Int, Int) = {
 // function implementation
}

Как преобразовать вышеуказанную функцию в udf в Spark Scala?

Обновление :

Основываясь на предложенном ниже ответе, я использовал следующую функцию udf () с несколькими дополнительными параметрами:

def funcudf = udf((
    field1: String,
    field2: String,
    field3: String,
    field4: String,
    field5: String,
    field6: String,
    field7: String,
    field8: String,
    field9: String,
    field10: String,
    field11: String,
    field12: String
) => { 
    (-1, 0.0)
})

Но получаю следующую ошибку:

<console>:23: error: overloaded method value udf with alternatives:
  (f: AnyRef,dataType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF10[_, _, _, _, _, _, _, _, _, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF9[_, _, _, _, _, _, _, _, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF8[_, _, _, _, _, _, _, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF7[_, _, _, _, _, _, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF6[_, _, _, _, _, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF5[_, _, _, _, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF4[_, _, _, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF3[_, _, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF2[_, _, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF1[_, _],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  (f: org.apache.spark.sql.api.java.UDF0[_],returnType: org.apache.spark.sql.types.DataType)org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10](f: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10) => RT)(implicit evidence$57: reflect.runtime.universe.TypeTag[RT], implicit evidence$58: reflect.runtime.universe.TypeTag[A1], implicit evidence$59: reflect.runtime.universe.TypeTag[A2], implicit evidence$60: reflect.runtime.universe.TypeTag[A3], implicit evidence$61: reflect.runtime.universe.TypeTag[A4], implicit evidence$62: reflect.runtime.universe.TypeTag[A5], implicit evidence$63: reflect.runtime.universe.TypeTag[A6], implicit evidence$64: reflect.runtime.universe.TypeTag[A7], implicit evidence$65: reflect.runtime.universe.TypeTag[A8], implicit evidence$66: reflect.runtime.universe.TypeTag[A9], implicit evidence$67: reflect.runtime.universe.TypeTag[A10])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2, A3, A4, A5, A6, A7, A8, A9](f: (A1, A2, A3, A4, A5, A6, A7, A8, A9) => RT)(implicit evidence$47: reflect.runtime.universe.TypeTag[RT], implicit evidence$48: reflect.runtime.universe.TypeTag[A1], implicit evidence$49: reflect.runtime.universe.TypeTag[A2], implicit evidence$50: reflect.runtime.universe.TypeTag[A3], implicit evidence$51: reflect.runtime.universe.TypeTag[A4], implicit evidence$52: reflect.runtime.universe.TypeTag[A5], implicit evidence$53: reflect.runtime.universe.TypeTag[A6], implicit evidence$54: reflect.runtime.universe.TypeTag[A7], implicit evidence$55: reflect.runtime.universe.TypeTag[A8], implicit evidence$56: reflect.runtime.universe.TypeTag[A9])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2, A3, A4, A5, A6, A7, A8](f: (A1, A2, A3, A4, A5, A6, A7, A8) => RT)(implicit evidence$38: reflect.runtime.universe.TypeTag[RT], implicit evidence$39: reflect.runtime.universe.TypeTag[A1], implicit evidence$40: reflect.runtime.universe.TypeTag[A2], implicit evidence$41: reflect.runtime.universe.TypeTag[A3], implicit evidence$42: reflect.runtime.universe.TypeTag[A4], implicit evidence$43: reflect.runtime.universe.TypeTag[A5], implicit evidence$44: reflect.runtime.universe.TypeTag[A6], implicit evidence$45: reflect.runtime.universe.TypeTag[A7], implicit evidence$46: reflect.runtime.universe.TypeTag[A8])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2, A3, A4, A5, A6, A7](f: (A1, A2, A3, A4, A5, A6, A7) => RT)(implicit evidence$30: reflect.runtime.universe.TypeTag[RT], implicit evidence$31: reflect.runtime.universe.TypeTag[A1], implicit evidence$32: reflect.runtime.universe.TypeTag[A2], implicit evidence$33: reflect.runtime.universe.TypeTag[A3], implicit evidence$34: reflect.runtime.universe.TypeTag[A4], implicit evidence$35: reflect.runtime.universe.TypeTag[A5], implicit evidence$36: reflect.runtime.universe.TypeTag[A6], implicit evidence$37: reflect.runtime.universe.TypeTag[A7])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2, A3, A4, A5, A6](f: (A1, A2, A3, A4, A5, A6) => RT)(implicit evidence$23: reflect.runtime.universe.TypeTag[RT], implicit evidence$24: reflect.runtime.universe.TypeTag[A1], implicit evidence$25: reflect.runtime.universe.TypeTag[A2], implicit evidence$26: reflect.runtime.universe.TypeTag[A3], implicit evidence$27: reflect.runtime.universe.TypeTag[A4], implicit evidence$28: reflect.runtime.universe.TypeTag[A5], implicit evidence$29: reflect.runtime.universe.TypeTag[A6])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2, A3, A4, A5](f: (A1, A2, A3, A4, A5) => RT)(implicit evidence$17: reflect.runtime.universe.TypeTag[RT], implicit evidence$18: reflect.runtime.universe.TypeTag[A1], implicit evidence$19: reflect.runtime.universe.TypeTag[A2], implicit evidence$20: reflect.runtime.universe.TypeTag[A3], implicit evidence$21: reflect.runtime.universe.TypeTag[A4], implicit evidence$22: reflect.runtime.universe.TypeTag[A5])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2, A3, A4](f: (A1, A2, A3, A4) => RT)(implicit evidence$12: reflect.runtime.universe.TypeTag[RT], implicit evidence$13: reflect.runtime.universe.TypeTag[A1], implicit evidence$14: reflect.runtime.universe.TypeTag[A2], implicit evidence$15: reflect.runtime.universe.TypeTag[A3], implicit evidence$16: reflect.runtime.universe.TypeTag[A4])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2, A3](f: (A1, A2, A3) => RT)(implicit evidence$8: reflect.runtime.universe.TypeTag[RT], implicit evidence$9: reflect.runtime.universe.TypeTag[A1], implicit evidence$10: reflect.runtime.universe.TypeTag[A2], implicit evidence$11: reflect.runtime.universe.TypeTag[A3])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1, A2](f: (A1, A2) => RT)(implicit evidence$5: reflect.runtime.universe.TypeTag[RT], implicit evidence$6: reflect.runtime.universe.TypeTag[A1], implicit evidence$7: reflect.runtime.universe.TypeTag[A2])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT, A1](f: A1 => RT)(implicit evidence$3: reflect.runtime.universe.TypeTag[RT], implicit evidence$4: reflect.runtime.universe.TypeTag[A1])org.apache.spark.sql.expressions.UserDefinedFunction <and>
  [RT](f: () => RT)(implicit evidence$2: reflect.runtime.universe.TypeTag[RT])org.apache.spark.sql.expressions.UserDefinedFunction
 cannot be applied to ((String, String, String, String, String, String, String, String, String, String, String, String) => (Int, Double))
       def funcudf = udf((
                     ^

1 Ответ

0 голосов
/ 29 ноября 2018

Вы можете просто использовать функцию udf и поставлять func1.Дополнительные документы по этому вопросу можно найти здесь

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...