Что мне делать, если я хочу выполнить некоторые преобразования в кадре данных Spark, и преобразования должны быть выполнены аналогично методу панд 'DataFrame.apply()
.
import org.apache.spark.sql.SparkSession
object SparkSQLTransform {
def main(args: Array[String]): Unit = {
val sparkSession = SparkSession
.builder()
.master("local")
.getOrCreate()
val peopleDF = sparkSession.read.json("resources/people.json")
peopleDF.show()
/*
+---+-------+------+
|age| name| sex|
+---+-------+------+
| 25|Michael|female|
| 30| Andy|female|
| 19| Justin| male|
| 39| Alice|female|
+---+-------+------+
*/
//if there is some common operations act as the pandas.DataFrame.apply() method, which
//can got the result below:
/*
+---+-------+------+
|age| name| sex|
+---+-------+------+
| 25|Michael|0|
| 30| Andy|0|
| 19| Justin| 1|
| 39| Alice|0|
+---+-------+------+
*/
}
}
the content of people.json :
{"name":"Michael", "age":25, "sex": "female"}
{"name":"Andy", "age":30, "sex":"female"}
{"name":"Justin", "age":19, "sex": "male"}
{"name":"Alice", "age": 39, "sex": "female"}