Ошибка выполнения при записи в управляемую таблицу куста 3 HWC - PullRequest
1 голос
/ 04 марта 2020

Я разрабатываю приложение для искрового тестирования, которое читает внешнюю таблицу улья, выполняет некоторое преобразование и записывает данные в управляемую таблицу улья, используя соединитель хранилища Hive, чтобы проверить соединение между искрой и кустом 3.

Приложение прочитало таблица по hw c, но когда она начинает вставку, она создает sh со следующей ошибкой

llap.HiveWarehouseDataSourceWriter - HiveWarehouseDataSourceWriter: com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter@6b6ddc37, msg:Committed File /tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464_39_0
2020-03-04 15:06:30.064 [main] INFO  llap.HiveWarehouseDataSourceWriter - Handling write: database:von_onogoro, table:mtm_reconciliation, savemode: Overwrite, tableExists:true, createTable:false, loadData:true
2020-03-04 15:06:30.072 [main] INFO  llap.HiveWarehouseDataSourceWriter - Load data query: LOAD DATA INPATH '/tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464' OVERWRITE  INTO TABLE von_onogoro.mtm_reconciliation
2020-03-04 15:06:30.472 [main] INFO  llap.HiveWarehouseDataSourceWriter - Commit job 20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464
2020-03-04 15:06:30.475 [main] ERROR llap.HiveWarehouseDataSourceWriter - Aborted DataWriter job 20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464
2020-03-04 15:06:30.481 [main] ERROR processing.SpringTaskProcessor - Erreur fatale
org.apache.spark.SparkException: Writing job aborted.
        at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:112) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) ~[spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:664) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:664) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:664) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:256) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at com.test.sparkHive.computation.reconciliation.mtm.dao.MtmReconciliationDao.saveIntoHive(MtmReconciliationDao.scala:85) ~[onogoro-computation_2.11-1.12.2-SNAPSHOT.jar:?]
        at com.test.sparkHive.computation.reconciliation.mtm.MtmReconciliationFeeder.computeAndFeedMtmReconciliation(MtmReconciliationFeeder.scala:122) ~[onogoro-computation_2.11-1.12.2-SNAPSHOT.jar:?]
        at com.test.sparkHive.reconciliation.mtm.MtmReconciliationProcessor.run(MtmReconciliationProcessor.java:46) ~[onogoro-1.12.3-SNAPSHOT.jar:?]
        at com.sparkHive.common.processing.SpringTaskProcessor.start(SpringTaskProcessor.java:154) [common-econometrics-core-3.15.29.jar:?]
        at com.sparkHive.common.processing.SpringTaskProcessor.start(SpringTaskProcessor.java:118) [common-econometrics-core-3.15.29.jar:?]
        at com.test.sparkHive.reconciliation.mtm.MtmReconciliationProcessor.main(MtmReconciliationProcessor.java:40) [onogoro-1.12.3-SNAPSHOT.jar:?]
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_222]
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_222]
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_222]
        at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_222]
        at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:900) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:192) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:217) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) [spark-core_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
Caused by: java.lang.RuntimeException: java.sql.SQLException: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask. org.apache.hadoop.hive.ql.metadata.HiveException: Load Data failed for hdfs://hdfs-prince/tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464_0_0 as the file is not owned by hive and load data is also not ran as hive
        at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter.commit(HiveWarehouseDataSourceWriter.java:172) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:91) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        ... 29 more
Caused by: java.sql.SQLException: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask. org.apache.hadoop.hive.ql.metadata.HiveException: Load Data failed for hdfs://hdfs-prince/tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464_0_0 as the file is not owned by hive and load data is also not ran as hive
        at shadehive.org.apache.hive.jdbc.HiveStatement.waitForOperationToComplete(HiveStatement.java:401) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at shadehive.org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:266) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at shadehive.org.apache.hive.jdbc.HivePreparedStatement.execute(HivePreparedStatement.java:101) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at org.apache.commons.dbcp2.DelegatingPreparedStatement.execute(DelegatingPreparedStatement.java:94) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at org.apache.commons.dbcp2.DelegatingPreparedStatement.execute(DelegatingPreparedStatement.java:94) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at com.hortonworks.spark.sql.hive.llap.JDBCWrapper.executeUpdate(HS2JDBCWrapper.scala:356) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at com.hortonworks.spark.sql.hive.llap.DefaultJDBCWrapper.executeUpdate(HS2JDBCWrapper.scala) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter.handleWriteWithSaveMode(HiveWarehouseDataSourceWriter.java:276) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter.commit(HiveWarehouseDataSourceWriter.java:170) ~[hive-warehouse-connector-assembly-1.0.0.3.1.5.0-152.jar:1.0.0.3.1.5.0-152]
        at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:91) ~[spark-sql_2.11-2.3.2.3.1.5.0-152.jar:2.3.2.3.1.5.0-152]
        ... 29 more
org.apache.spark.SparkException: Writing job aborted.
        at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:112)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:127)
        at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:155)
        at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
        at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152)
        at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127)
        at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:80)
        at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:80)
        at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:664)
        at org.apache.spark.sql.DataFrameWriter$$anonfun$runCommand$1.apply(DataFrameWriter.scala:664)
        at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:77)
        at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:664)
        at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:256)
        at com.test.sparkHive.computation.reconciliation.mtm.dao.MtmReconciliationDao.saveIntoHive(MtmReconciliationDao.scala:85)
        at com.test.sparkHive.computation.reconciliation.mtm.MtmReconciliationFeeder.computeAndFeedMtmReconciliation(MtmReconciliationFeeder.scala:122)
        at com.test.sparkHive.reconciliation.mtm.MtmReconciliationProcessor.run(MtmReconciliationProcessor.java:46)
        at com.sparkHive.common.processing.SpringTaskProcessor.start(SpringTaskProcessor.java:154)
        at com.sparkHive.common.processing.SpringTaskProcessor.start(SpringTaskProcessor.java:118)
        at com.test.sparkHive.reconciliation.mtm.MtmReconciliationProcessor.main(MtmReconciliationProcessor.java:40)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
        at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:900)
        at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:192)
        at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:217)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:137)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.RuntimeException: java.sql.SQLException: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.MoveTask. org.apache.hadoop.hive.ql.metadata.HiveException: Load Data failed for hdfs://hdfs-prince/tmp/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464/20200304150515-003d4c58-e824-40fa-bac6-12ea92a36464_0_0 as the file is not owned by hive and load data is also not ran as hive
        at com.hortonworks.spark.sql.hive.llap.HiveWarehouseDataSourceWriter.commit(HiveWarehouseDataSourceWriter.java:172)
        at org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2Exec.doExecute(WriteToDataSourceV2.scala:91)
        ... 29 more

Код для записи в куст 3:

val hiveHwc=
com.hortonworks.spark.sql.hive.llap.HiveWarehouseBuilder.session(spark).build()
  reconciliatinDF.write.format("com.hortonworks.spark.sql.hive.llap.HiveWarehouseConnector")
              .option("database", "von_onogoro")
                .option("table", "mtm_reconciliation")
                    .mode(SaveMode.Overwrite)
                      .save()

1 Ответ

0 голосов
/ 18 апреля 2020

@ Prince: эта ошибка связана с правами доступа:

файл не принадлежит кусту и данные загрузки также не запускаются как куст

Если вы обращаетесь к вопрос разрешения, ваш запрос будет завершен. Если у вас есть HDP, вы сможете использовать Ranger с плагином Hive, чтобы создать политику для вашего искрового пользователя, чтобы иметь разрешения для улья.

...