Я выполняю приведенный ниже код для анализа данных с использованием Spark, при выполнении кода я получаю исключение Nullpointer. Существует условие if (uPopulation != null && !uPopulation.isEmpty()
) для фильтрации пустых данных, но все еще получающее исключение NullPointer. Может ли кто-нибудь из вас помочь мне здесь?
package com.anil.wb
import org.apache.spark.sql.SparkSession
import java.lang.Long
object WorldBankDataAnalysis {
def main(args: Array[String]) {
System.setProperty("hadoop.home.dir", "D:\\BigData\\Hadoop_setups\\hadoop-
2.5.0-cdh5.3.2")
System.setProperty("spark.sql.warehouse.dir",
"file:/D:/BigData/Spark_setups/spark-2.0.2-bin-hadoop2.6/spark-warehouse")
val spark = SparkSession.builder.appName("UrbanPopulation").master("local").getOrCreate()
val data = spark.read.csv("D:\\WorldBankAnalysis\\World_Bank_Indicators.csv").rdd
val result = data.map { line =>
val uPopulation = line.getString(10).replaceAll(",", "")
var uPopNum = 0L
if(uPopulation.length() > 0){
uPopNum = Long.parseLong(uPopulation)
}
(uPopNum, line.getString(0))
}
.sortByKey(false)
//spark.sparkContext.parallelize(Seq(result)).saveAsTextFile(args(1))
result.foreach { println }
spark.stop
}
}
Пример данных:
Afghanistan,7/1/2000,0,,0,,151,11,8,"25,950,816","5,527,524",51,45,45,45,48,50,2
Afghanistan,7/1/2001,0,,0,0,150,11,9,"26,697,430","5,771,984",50,46,45,46,48,50,2,"2,461,666,315",92
Afghanistan,7/1/2002,0,,"25,000",0,150,22,7,"27,465,525","6,025,936",49,46,46,46,48,50,2,"4,338,907,579",158
Afghanistan,7/1/2003,0,,"200,000",0,151,25,8,"28,255,719","6,289,723",48,46,46,46,48,50,2,"4,766,127,272",169
Afghanistan,7/1/2004,0,,"600,000",0,150,30,9,"29,068,646","6,563,700",47,46,46,46,48,50,2,"5,704,202,651",196
Afghanistan,7/1/2005,0,,"1,200,000",1,151,33,9,"29,904,962","6,848,236",47,47,47,47,48,50,2,"6,814,753,581",228
Afghanistan,7/1/2006,0,11,"2,520,366",2,151,24,7,"30,751,661","7,158,987",46,47,47,47,48,50,2,"7,721,931,671",251
Afghanistan,7/1/2007,0,18,"4,668,096",2,150,29,7,"31,622,333","7,481,844",45,47,47,47,47,50,2,"9,707,373,721",307
Afghanistan,7/1/2008,0,19,"7,898,909",2,150,32,7,"32,517,656","7,817,245",45,48,47,48,47,51,2,"11,940,296,131",367
Afghanistan,7/1/2009,0,21,"12,000,000",3,149,34,8,"33,438,329","8,165,640",44,48,48,48,47,51,2,"14,213,670,485",425
Afghanistan,7/1/2010,0,,"13,000,000",4,149,38,8,"34,385,068","8,527,497",44,48,48,48,46,51,2,"17,243,112,604",501
Выводится в виде 0-го и 10-го строковых значений:
8527497 Afghanistan
8165640 Afghanistan
7817245 Afghanistan
5527524 Afghanistan
Примечание. В некоторых странах значение 10-й строки пустое.