спарк получить ошибку "Нет плана для HiveTableRelation - PullRequest
0 голосов
/ 03 июля 2019

Мы настраиваем пряжу, взаимодействующую с оттенком.В блокноте оттенков мы пытаемся прочитать таблицу улья с помощью hiveContext.sql.как показано ниже.

from pyspark import SparkContext, HiveContext, SparkConf

sqlContext = HiveContext(sc)
liang = sqlContext.sql('select * from olympics.olympics');

получить ошибку:

Traceback (most recent call last): File "/opt/cloudera/parcels/CDH-6.2.01.cdh6.2.0.p0.967373/lib/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py", 
line 533:undefined, in collect sock_info = self._jdf.collectToPython() File "/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py",
line 1257, in __call__ answer, self.gateway_client, self.target_id, self.name) File "/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", 
line 63, in deco return f(*a, **kw) File "/opt/cloudera/parcels/CDH-6.2.0-1.cdh6.2.0.p0.967373/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py", 

line 328, in get_return_value format(target_id, ".", name), value) Py4JJavaError: 
An error occurred while calling o934.collectToPython. :
java.lang.AssertionError: assertion failed: No plan for HiveTableRelation `olympics`.`olympics`, org.apache.hadoop.hive.ql.io.orc.OrcSerde, [city#185, edition#186, sport#187, sub_sport#188, athlete#189, country#190, gender#191, event#192, event_gender#193, medal#194] at scala.Predef$.assert(Predef.scala:170) at 
org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) 
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78) 
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75) 
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157) 
at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157) 
at scala.collection.Iterator$class.foreach(Iterator.scala:891) 
at scala.collection.AbstractIterator.foreach(Iterator.scala:1334) 
at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157) 
at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1334) 
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75) 
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67) 
at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:435) 
at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:441) 
at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93) 
at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72) 
at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68) 
at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77) 
at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
 at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3359) 
at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3254)
 at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
 at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) 
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
 at py4j.Gateway.invoke(Gateway.java:282) 
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) 
at py4j.commands.CallCommand.execute(CallCommand.java:79) 
at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:748)



но мы можем использовать hiveConext для записи таблицы в куст, а таблицу можно выбрать в фрагменте улья.

...