Используйте функции from_unixtime
и date_format
.
scala> val df = Seq(("1","296","5.0","1147880044","null"),("1","306","3.5","1147868817","null")).toDF("userId","movieId","rating","ts","ratingtimestamp")
df: org.apache.spark.sql.DataFrame = [userId: string, movieId: string ... 3 more fields]
scala> df.show(false)
+------+-------+------+----------+---------------+
|userId|movieId|rating|ts |ratingtimestamp|
+------+-------+------+----------+---------------+
|1 |296 |5.0 |1147880044|null |
|1 |306 |3.5 |1147868817|null |
+------+-------+------+----------+---------------+
scala> df.withColumn("ratingtimestamp",date_format(from_unixtime($"ts"),"MM/dd/yyyy HH:mm:ss")).show(false)
+------+-------+------+----------+-------------------+
|userId|movieId|rating|ts |ratingtimestamp |
+------+-------+------+----------+-------------------+
|1 |296 |5.0 |1147880044|05/17/2006 21:04:04|
|1 |306 |3.5 |1147868817|05/17/2006 17:56:57|
+------+-------+------+----------+-------------------+
scala> df.withColumn("ratingtimestamp",from_unixtime($"ts","MM/dd/yyyy HH:mm:ss")).show(false)
+------+-------+------+----------+-------------------+
|userId|movieId|rating|ts |ratingtimestamp |
+------+-------+------+----------+-------------------+
|1 |296 |5.0 |1147880044|05/17/2006 21:04:04|
|1 |306 |3.5 |1147868817|05/17/2006 17:56:57|
+------+-------+------+----------+-------------------+