ArrayIndexOutOfBoundException: 0 предупреждений при использовании sqlConect.read.json при использовании pyspark 1.6 - PullRequest
0 голосов
/ 20 декабря 2018

Меня всегда приветствует предупреждение ArrayIndexOutOfBoundException: 0 при чтении объекта json в DataFrame, хотя, похоже, это не влияет на результат.Просто интересно, я что-то здесь не так делаю?(pyspark версия 1.60).

Заранее благодарим за помощь.

>>> d = {"id": 1, "json":[{"col1":1, "col2":2}, {"col1":2, "col2":4}]}
>>> x = sqlContext.read.json(sc.parallelize([d,d,d,d,d]))
>>> x.printSchema()
root
 |-- id: long (nullable = true)
 |-- json: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- col1: long (nullable = true)
 |    |    |-- col2: long (nullable = true)

>>> x.show()
18/12/20 13:25:43 WARN util.ExecutionListenerManager: Error executing query execution listener
java.lang.ArrayIndexOutOfBoundsException: 0
        at org.apache.spark.sql.query.analysis.QueryAnalysis$$anonfun$getInputMetadata$2.apply(QueryAnalysis.scala:121)
        at org.apache.spark.sql.query.analysis.QueryAnalysis$$anonfun$getInputMetadata$2.apply(QueryAnalysis.scala:108)
        at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
        at scala.collection.immutable.List.foreach(List.scala:318)
        at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:32)
        at scala.collection.mutable.ListBuffer.foreach(ListBuffer.scala:45)
        at org.apache.spark.sql.util.ExecutionListenerManager.org$apache$spark$sql$util$ExecutionListenerManager$$withErrorHandling(QueryExecutionListener.scala:119)
        at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply$mcV$sp(QueryExecutionListener.scala:99)
        at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply(QueryExecutionListener.scala:99)
        at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply(QueryExecutionListener.scala:99)
        at org.apache.spark.sql.util.ExecutionListenerManager.readLock(QueryExecutionListener.scala:132)
        at org.apache.spark.sql.util.ExecutionListenerManager.onSuccess(QueryExecutionListener.scala:98)
        at org.apache.spark.sql.DataFrame.withCallback(DataFrame.scala:2116)
        at org.apache.spark.sql.DataFrame.head(DataFrame.scala:1389)
        at org.apache.spark.sql.DataFrame.take(DataFrame.scala:1471)
        at org.apache.spark.sql.DataFrame.showString(DataFrame.scala:184)
        at sun.reflect.GeneratedMethodAccessor35.invoke(Unknown Source)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
        at py4j.Gateway.invoke(Gateway.java:259)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:209)
        at java.lang.Thread.run(Thread.java:745)
+---+--------------+
| id|          json|
+---+--------------+
|  1|[[1,2], [2,4]]|
|  1|[[1,2], [2,4]]|
|  1|[[1,2], [2,4]]|
|  1|[[1,2], [2,4]]|
|  1|[[1,2], [2,4]]|
+---+--------------+
...