Как выполнить запрос к представлению в Hive? - PullRequest
0 голосов
/ 15 января 2019

Мы выполняем простой выбор в представлении (которое содержит много данных), и мы получаем «Превышен предел накладных расходов GC из-за ошибки памяти. Мы хотим выполнить этот запрос, чтобы отчет, который запускался поверх этого представления может работать. Работает на Tez.

Запрос выполняется в течение 4+ часов и завершается неудачно. Есть ли способ, которым мы можем выполнить этот запрос, например, некоторые параметры?

Запрос

select * from inc_cts.v_report_pub_view;

Сообщение об ошибке -

    TaskAttempt 0 failed, info=
» Error: Failure while running task:java.lang.RuntimeException: java.lang.RuntimeException: Reduce operator initialization failed
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
  at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:347)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:194)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:185)
  at java.security.AccessController.doPrivileged(Native Method)
  at javax.security.auth.Subject.doAs(Subject.java:422)
  at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:185)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:181)
  at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.RuntimeException: Reduce operator initialization failed
  at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:204)
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:149)
  ... 14 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: Java heap space
  at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:389)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:379)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:482)
  at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:439)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376)
  at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:182)
  ... 15 more
Caused by: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: Java heap space
  at java.util.concurrent.FutureTask.report(FutureTask.java:122)
  at java.util.concurrent.FutureTask.get(FutureTask.java:192)
  at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:387)
  ... 20 more
Caused by: java.lang.OutOfMemoryError: Java heap space
  at org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer.listRealloc(FlatRowContainer.java:259)
  at org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer.add(FlatRowContainer.java:86)
  at org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper.putRow(HashMapWrapper.java:133)
  at org.apache.hadoop.hive.ql.exec.tez.HashTableLoader.load(HashTableLoader.java:211)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator.loadHashTable(MapJoinOperator.java:310)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:179)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:175)
  at org.apache.hadoop.hive.ql.exec.tez.ObjectCache.retrieve(ObjectCache.java:75)
  at org.apache.hadoop.hive.ql.exec.tez.ObjectCache$1.call(ObjectCache.java:92)
  ... 4 more

TaskAttempt 1 killed
TaskAttempt 2 killed
TaskAttempt 3 failed, info=
» Error: Failure while running task:java.lang.RuntimeException: java.lang.RuntimeException: Reduce operator initialization failed
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
  at or
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:389)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:379)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:482)
  at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:439)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376)
  at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:182)
  ... 15 more
Caused by: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at java.util.concurrent.FutureTask.report(FutureTask.java:122)
  at java.util.concurrent.FutureTask.get(FutureTask.java:192)
  at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:387)
  ... 20 more
Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer.listRealloc(FlatRowContainer.java:259)
  at org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer.add(FlatRowContainer.java:86)
  at org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper.putRow(HashMapWrapper.java:133)
  at org.apache.hadoop.hive.ql.exec.tez.HashTableLoader.load(HashTableLoader.java:211)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator.loadHashTable(MapJoinOperator.java:310)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:179)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:175)
  at org.apache.hadoop.hive.ql.exec.tez.ObjectCache.retrieve(ObjectCache.java:75)
  at org.apache.hadoop.hive.ql.exec.tez.ObjectCache$1.call(ObjectCache.java:92)
  ... 4 more

TaskAttempt 4 killed
TaskAttempt 5 failed, info=
» Error: Failure while running task:java.lang.RuntimeException: java.lang.RuntimeException: Reduce operator initialization failed
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
  at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:347)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:194)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:185)
  at java.security.AccessController.doPrivileged(Native Method)
  at javax.security.auth.Subject.doAs(Subject.java:422)
  at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:185)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:181)
  at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.RuntimeException: Reduce operator initialization failed
  at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:204)
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:149)
  ... 14 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:389)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:379)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:482)
  at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:439)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376)
  at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:182)
  ... 15 more
Caused by: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at java.util.concurrent.FutureTask.report(FutureTask.java:122)
  at java.util.concurrent.FutureTask.get(FutureTask.java:192)
  at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:387)
  ... 20 more
Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer.listRealloc(FlatRowContainer.java:259)
  at org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer.add(FlatRowContainer.java:86)
  at org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper.putRow(HashMapWrapper.java:133)
  at org.apache.hadoop.hive.ql.exec.tez.HashTableLoader.load(HashTableLoader.java:211)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator.loadHashTable(MapJoinOperator.java:310)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:179)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:175)
  at org.apache.hadoop.hive.ql.exec.tez.ObjectCache.retrieve(ObjectCache.java:75)
  at org.apache.hadoop.hive.ql.exec.tez.ObjectCache$1.call(ObjectCache.java:92)
  ... 4 more

TaskAttempt 6 failed, info=
» Error: Failure while running task:java.lang.RuntimeException: java.lang.RuntimeException: Reduce operator initialization failed
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:173)
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:139)
  at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:347)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:194)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable$1.run(TezTaskRunner.java:185)
  at java.security.AccessController.doPrivileged(Native Method)
  at javax.security.auth.Subject.doAs(Subject.java:422)
  at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1724)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:185)
  at org.apache.tez.runtime.task.TezTaskRunner$TaskRunnerCallable.callInternal(TezTaskRunner.java:181)
  at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.RuntimeException: Reduce operator initialization failed
  at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:204)
  at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:149)
  ... 14 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:389)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:379)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:482)
  at org.apache.hadoop.hive.ql.exec.Operator.initializeChildren(Operator.java:439)
  at org.apache.hadoop.hive.ql.exec.Operator.initialize(Operator.java:376)
  at org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.init(ReduceRecordProcessor.java:182)
  ... 15 more
Caused by: java.util.concurrent.ExecutionException: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at java.util.concurrent.FutureTask.report(FutureTask.java:122)
  at java.util.concurrent.FutureTask.get(FutureTask.java:192)
  at org.apache.hadoop.hive.ql.exec.Operator.completeInitialization(Operator.java:387)
  ... 20 more
Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
  at org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer.listRealloc(FlatRowContainer.java:259)
  at org.apache.hadoop.hive.ql.exec.persistence.FlatRowContainer.add(FlatRowContainer.java:86)
  at org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper.putRow(HashMapWrapper.java:133)
  at org.apache.hadoop.hive.ql.exec.tez.HashTableLoader.load(HashTableLoader.java:211)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator.loadHashTable(MapJoinOperator.java:310)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:179)
  at org.apache.hadoop.hive.ql.exec.MapJoinOperator$1.call(MapJoinOperator.java:175)
  at org.apache.hadoop.hive.ql.exec.tez.ObjectCache.retrieve(ObjectCache.java:75)
  at org.apache.hadoop.hive.ql.exec.tez.ObjectCache$1.call(ObjectCache.java:92)
  ... 4 more

1 Ответ

0 голосов
/ 15 января 2019

Согласно журналу, исключение составляет OutOfMemoryError: GC overhead limit exceeded в MapJoin HashTableLoader.

проверить текущие настройки и соответственно увеличить:

set hive.tez.container.size=4096MB; 
set hive.auto.convert.join.noconditionaltask.size=1370MB --recommended one third of container size 

Попробуйте использовать оптимизированную для памяти хеш-таблицу:

set hive.mapjoin.optimized.hashtable=true;
set hive.mapjoin.optimized.hashtable.wbsize=10485760; --Default Value (10 * 1024 * 1024)
--Optimized hashtable uses a chain of buffers to store data. This is one buffer size.

Наконец, если ничего не помогает, вы можете отключить mapjoin:

set hive.auto.convert.join=false;
...