import spark.implicits._
val df = List("Table,EXTERNAL,hive,name1","Table,EXTERNAL,hive,name2","Table,EXTERNAL,hive,name3")
.map(_.split(",") match {
case Array(table, tableType, db, name) => (table, tableType, db, name)
})
.toDF("table", "type", "db", "name")
df.show()
df.printSchema()
который печатает:
+-----+--------+----+-----+
|table| type| db| name|
+-----+--------+----+-----+
|Table|EXTERNAL|hive|name1|
|Table|EXTERNAL|hive|name2|
|Table|EXTERNAL|hive|name3|
+-----+--------+----+-----+
root
|-- table: string (nullable = true)
|-- type: string (nullable = true)
|-- db: string (nullable = true)
|-- name: string (nullable = true)