Попробуйте это:
val rdd = sc.parallelize(Seq("{CurrentDate:05.24.2008,Employeeid:90786532432,Division:TX_VG}",
"{Division:NW_VG,CurrentDate:01.18.2006,Employeeid:907806532432}"))
val rdd2 = rdd.map(x => (x.slice
(x.indexOfSlice("Division:")+9,
(x.indexOfSlice("}", (x.indexOfSlice("Division:")+9))) min
( if ( x.indexOfSlice(",", (x.indexOfSlice("Division:")+9)) == -1) {1000000} else {x.indexOfSlice(",", (x.indexOfSlice("Division:")+9)) } )
)
,
x.slice
(x.indexOfSlice("Employeeid:")+11,
(x.indexOfSlice("}", (x.indexOfSlice("Employeeid:")+11))) min
( if ( x.indexOfSlice(",", (x.indexOfSlice("Employeeid:")+11)) == -1) {1000000} else {x.indexOfSlice(",", (x.indexOfSlice("Employeeid:")+11)) } )
)
)
)
rdd2.collect
возвращается:
res52: Array[(String, String)] = Array((TX_VG,90786532432), (NW_VG,907806532432))