Вам необходимо привести столбец к метке времени, а затем выполнить вычисление различий.Проверьте это:
scala> val df = Seq(("1/01/2017 12:01:00 AM","1/1/2017 12:05:00 AM")).toDF("time1","time2")
df: org.apache.spark.sql.DataFrame = [time1: string, time2: string]
scala> val df2 = df.withColumn("time1",to_timestamp('time1,"d/MM/yyyy hh:mm:ss a")).withColumn("time2",to_timestamp('time2,"d/MM/yyyy hh:mm:ss a"))
df2: org.apache.spark.sql.DataFrame = [time1: timestamp, time2: timestamp]
scala> df2.printSchema
root
|-- time1: timestamp (nullable = true)
|-- time2: timestamp (nullable = true)
scala> df2.withColumn("diff_sec",unix_timestamp('time2)-unix_timestamp('time1)).withColumn("diff_min",'diff_sec/60).show(false)
+-------------------+-------------------+--------+--------+
|time1 |time2 |diff_sec|diff_min|
+-------------------+-------------------+--------+--------+
|2017-01-01 00:01:00|2017-01-01 00:05:00|240 |4.0 |
+-------------------+-------------------+--------+--------+
scala>
Обновление 1:
scala> val df = Seq(("1/01/2017 12:01:00 AM"),("1/1/2017 12:05:00 AM")).toDF("timex")
df: org.apache.spark.sql.DataFrame = [timex: string]
scala> val df2 = df.withColumn("timex",to_timestamp('timex,"d/MM/yyyy hh:mm:ss a"))
df2: org.apache.spark.sql.DataFrame = [timex: timestamp]
scala> df2.show
+-------------------+
| timex|
+-------------------+
|2017-01-01 00:01:00|
|2017-01-01 00:05:00|
+-------------------+
scala> val df3 = df2.alias("t1").join(df2.alias("t2"), $"t1.timex" =!= $"t2.timex", "leftOuter").toDF("time1","time2")
df3: org.apache.spark.sql.DataFrame = [time1: timestamp, time2: timestamp]
scala> df3.withColumn("diff_sec",unix_timestamp('time2)-unix_timestamp('time1)).withColumn("diff_min",'diff_sec/60).show(false)
+-------------------+-------------------+--------+--------+
|time1 |time2 |diff_sec|diff_min|
+-------------------+-------------------+--------+--------+
|2017-01-01 00:01:00|2017-01-01 00:05:00|240 |4.0 |
|2017-01-01 00:05:00|2017-01-01 00:01:00|-240 |-4.0 |
+-------------------+-------------------+--------+--------+
scala> df3.withColumn("diff_sec",unix_timestamp('time2)-unix_timestamp('time1)).withColumn("diff_min",'diff_sec/60).show(1,false)
+-------------------+-------------------+--------+--------+
|time1 |time2 |diff_sec|diff_min|
+-------------------+-------------------+--------+--------+
|2017-01-01 00:01:00|2017-01-01 00:05:00|240 |4.0 |
+-------------------+-------------------+--------+--------+
only showing top 1 row
scala>