если вы находитесь в spark-shell, вам не нужно создавать новый SQLContext
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
, вы можете использовать spark напрямую
scala> import spark.implicits._
scala> val ratings_raw = sc.textFile("./ml-1m/ratings.dat")
ratings_raw: org.apache.spark.rdd.RDD[String] = ./ml-1m/ratings.dat MapPartitionsRDD[1] at textFile at <console>:38
scala> case class Rating(userId: Int, movieId: Int, rating: Float)
defined class Rating
scala> val ratings = ratings_raw.map(x => x.split("::")).map(r => Rating(r(0).toInt, r(1).toInt, r(2).toFloat)).toDF().na.drop()
ratings: org.apache.spark.sql.DataFrame = [userId: int, movieId: int ... 1 more field]
scala> ratings
res3: org.apache.spark.sql.DataFrame = [userId: int, movieId: int ... 1 more field]
scala> ratings.printSchema
root
|-- userId: integer (nullable = false)
|-- movieId: integer (nullable = false)
|-- rating: float (nullable = false)