java.lang.AssertionError: утверждение не выполнено: нет плана для HiveTableRelation - PullRequest
0 голосов
/ 28 августа 2018

Я пытаюсь запустить sql-запрос hive в приложении spark scala и получаю следующую ошибку «Нет плана для HiveTableRelation», когда приложение выполняет запрос к таблице, хранящейся на s3. Вот твой код:

package com.testapp.data

import org.apache.log4j.{Logger, Level}
import com.amazonaws.auth.{AWSCredentials, BasicSessionCredentials, DefaultAWSCredentialsProviderChain}
import play.api.libs.json.{JsObject, JsString, JsValue, Json}
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.sql.SparkSession
import org.jets3t.service.S3Service
import scala.sys.process._
import java.io.File;
import org.apache.spark.sql.hive.HiveContext;

object TestEnrich {

  def main(args: Array[String]) {
    Logger.getRootLogger.setLevel(Level.INFO);
    val controllerLogger = Logger.getLogger(this.getClass)

    val dt = args(0);
    val tm = args(1);
    println(s"enrich request $dt, $tm")
    val sparkConfig = new SparkConf()
      .setAppName("enricher")
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .registerKryoClasses {
        Array(
          classOf[AWSCredentials],
          classOf[BasicSessionCredentials],
          classOf[DefaultAWSCredentialsProviderChain]
        )
      }

    val sparkContext = SparkContext.getOrCreate(sparkConfig)
    val spark = SparkSession.builder.config(sparkContext.getConf).enableHiveSupport().getOrCreate()

    spark.sqlContext.setConf("spark.sql.caseSensitive", "true")

    spark.sqlContext.setConf("javax.jdo.option.ConnectionURL", "xxxx")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionDriverName", "com.mysql.jdbc.Driver")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionUserName", "xxxx")
    spark.sqlContext.setConf("javax.jdo.option.ConnectionPassword", "xxxx")

    spark.sparkContext.hadoopConfiguration.set("fs.s3.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
    spark.sparkContext.hadoopConfiguration.set("fs.s3.access.key", "xxxx");
    spark.sparkContext.hadoopConfiguration.set("fs.s3.secret.key", "xxxx");

    SparkSession.setDefaultSession(spark)
    SparkSession.clearActiveSession()

    import spark.sql;
    import spark.implicits._;

    val hiveContext = new org.apache.spark.sql.hive.HiveContext(spark.sparkContext);
    import hiveContext._
    hiveContext.sql("show databases").show();
    hiveContext.sql("use production");
    hiveContext.sql("show tables").show();
    // error is thrown in next line
    val data = hiveContext.sql(s"select * from raw_by_ts_events_nrt where dt='$dt' and tm='$tm' limit 5");
    data.show();

    sparkContext.stop()
  }
}

вот инструкция создания таблицы:

CREATE TABLE hive.production.raw_by_ts_events_nrt (
   ts bigint,
   batchts bigint,
   eventid varchar,
   userid varchar,
   ...
   dt varchar,
   tm varchar
)
WITH (
   external_location = 's3a://pb-prod-raw-by-ts-events-nrt/',
   format = 'ORC',
   partitioned_by = ARRAY['dt','tm']
)

и вот журнал:

18/08/28 06:50:05 INFO SessionState: Created local directory: /tmp/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97_resources
18/08/28 06:50:05 INFO SessionState: Created HDFS directory: /tmp/hive/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97
18/08/28 06:50:05 INFO SessionState: Created local directory: /tmp/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97
18/08/28 06:50:05 INFO SessionState: Created HDFS directory: /tmp/hive/hadoop/8efbfe25-22d0-43f2-8c65-9d4d27b1cb97/_tmp_space.db
18/08/28 06:50:05 INFO HiveClientImpl: Warehouse location for Hive client (version 1.2.2) is hdfs:///user/spark/warehouse
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 258.835732 ms
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 15.359587 ms
+------------+
|databaseName|
+------------+
|     default|
|  production|
+------------+

18/08/28 06:50:06 INFO CodeGenerator: Code generated in 11.998794 ms
++
||
++
++

18/08/28 06:50:06 INFO CodeGenerator: Code generated in 22.778824 ms
18/08/28 06:50:06 INFO CodeGenerator: Code generated in 16.995158 ms
+----------+--------------------+-----------+
|  database|           tableName|isTemporary|
+----------+--------------------+-----------+
|production|raw_by_ts_events_nrt|      false|
+----------+--------------------+-----------+

18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 1
18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 2
18/08/28 06:50:06 INFO ContextCleaner: Cleaned accumulator 0
18/08/28 06:50:07 WARN Utils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.
Exception in thread "main" java.lang.AssertionError: assertion failed: No plan for HiveTableRelation `production`.`raw_by_ts_events_nrt`, org.apache.hadoop.hive.ql.io.orc.OrcSerde, [ts#26L, batchts#27L, eventid#28, userid#29, eventname#30, pageloaduid#31, deltatime#32, adaction#33, adduration#34, aderrordescription#35, adispreload#36, admoduleisloaded#37, adnetwork#38, adplacement#39, adplayer#40, adprogress#41, adrejectreason#42, adtag#43, adtargeting#44, adtype#45, aduuid#46, articlecanonicalurl#47, articleformat#48, articleid#49, ... 113 more fields], [dt#163, tm#164]

        at scala.Predef$.assert(Predef.scala:170)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:78)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2$$anonfun$apply$2.apply(QueryPlanner.scala:75)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.TraversableOnce$$anonfun$foldLeft$1.apply(TraversableOnce.scala:157)
        at scala.collection.Iterator$class.foreach(Iterator.scala:893)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
        at scala.collection.TraversableOnce$class.foldLeft(TraversableOnce.scala:157)
        at scala.collection.AbstractIterator.foldLeft(Iterator.scala:1336)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:75)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$2.apply(QueryPlanner.scala:67)
        at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
        at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
        at org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:93)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72)
        at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77)
        at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77)
        at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3249)
        at org.apache.spark.sql.Dataset.head(Dataset.scala:2484)
        at org.apache.spark.sql.Dataset.take(Dataset.scala:2698)
        at org.apache.spark.sql.Dataset.showString(Dataset.scala:254)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:723)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:682)
        at org.apache.spark.sql.Dataset.show(Dataset.scala:691)
        at com.playbuzz.data.TestEnrich$.main(TestEnrich.scala:90)
        at com.playbuzz.data.TestEnrich.main(TestEnrich.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
        at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:894)
        at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:198)
        at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:228)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

как вы можете видеть, он успешно выполняет все предыдущие операторы и завершается неудачно при выборе. Любая помощь будет высоко оценена.

Ответы [ 2 ]

0 голосов
/ 13 мая 2019

Если вы используете spark-submit, вы должны включить spark.sql.catalogImplementation=hive config. как это:

spark-submit --master yarn --deploy-mode cluster --conf spark.sql.catalogImplementation=hive yourApplication.jar
0 голосов
/ 28 января 2019

Я думаю, что это ошибка, связанная с таблицей ORC из Блог Hortonworks

Пожалуйста, проверьте это две ссылки

  1. Переполнение стека http://stackoverflow.com/questions/38740862/not-able-to-fetch-result-from-hive-transaction-enabled-table-through-spark-sql

  2. SPARK-18355 : Spark SQL не может прочитать данные из таблицы кустов ORC, к которой добавлен новый столбец

  3. Spark-18497 : ForeachSink завершается с ошибкой «утверждение не выполнено: план EventTimeWatermark отсутствует»

Если причина № 2 или № 3, очевидного обходного пути сейчас нет. Вам нужно обновить, чтобы получить исправление. На # 1 есть некоторые подробности о том, как избежать этой проблемы

...