HIve metastore мертв, но файл pid существует - PullRequest
0 голосов
/ 11 февраля 2019

Я настроил виртуальную машину Cloudera для быстрого запуска на виртуальной коробке с сетью в качестве адаптера моста.Все услуги в хорошем состоянии.Но когда я запускаю службу metastore (перезапуск hive-metastore службы sudo), она дает статус OK, но через несколько секунд ее состояние отображается как FAILED вместе с сообщением «Hive Metastore мертв, а файл pid существует».То же самое и с сервисом hiveserver2.Пожалуйста, найдите мой анализ ниже.

  1. Я удалил оба файла pid из папки / var / run / hive и перезапустил scm-сервер / агент, а также службу метастазирования hive и hiveserver2.Но опять же, он создает файл pid и выдает то же сообщение об ошибке.

  2. Я могу подключиться как к HCLI, так и к beeline.Я также могу подключиться через веб-интерфейс hue и успешно выполнить запрос.

Имя хоста: quickstart.cloudera

имя пользователя: hive

pwd:mypassword

база данных: metastore

Я могу подключиться через mysqld.

Все hive-site.xml настроены, как указано ниже.

PATH1: / usr / lib / hive

PATH2: / etc / hive / conf

PATH3: / usr / lib / hive / conf

PATH4: / etc / hive/conf.dist

PATH4: /etc/hive/conf.cloudera.hive

<?xml version="1.0" encoding="UTF-8"?>

<!--Autogenerated by Cloudera Manager-->
<configuration>
  <property>
    <name>hive.metastore.uris</name>
    <value>thrift://localhost:9083</value>
  </property>
      <property>
      <name>javax.jdo.option.ConnectionURL</name>
      <value>jdbc:mysql://localhost/metastore?createDatabaseIfNotExist=true</value>
      <description>the URL of the MySQL database</description>
    </property>

    <property>
      <name>javax.jdo.option.ConnectionDriverName</name>
      <value>com.mysql.jdbc.Driver</value>
    </property>

    <property>
      <name>javax.jdo.option.ConnectionUserName</name>
      <value>hive</value>
    </property>

    <property>
      <name>javax.jdo.option.ConnectionPassword</name>
      <value>mypassword</value>
    </property>

    <property>
      <name>datanucleus.autoCreateSchema</name>
      <value>false</value>
    </property>

    <property>
      <name>datanucleus.fixedDatastore</name>
      <value>true</value>
    </property>

    <property>
      <name>datanucleus.autoStartMechanism</name>
      <value>SchemaTable</value>
    </property>

    <property>
    <name>hive.metastore.schema.verification</name>
    <value>true</value>
    </property>
  <property>
    <name>hive.metastore.client.socket.timeout</name>
    <value>300</value>
  </property>
  <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/user/hive/warehouse</value>
  </property>
  <property>
    <name>hive.warehouse.subdir.inherit.perms</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.auto.convert.join</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.auto.convert.join.noconditionaltask.size</name>
    <value>20971520</value>
  </property>
  <property>
    <name>hive.optimize.bucketmapjoin.sortedmerge</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.smbjoin.cache.rows</name>
    <value>10000</value>
  </property>
  <property>
    <name>hive.server2.logging.operation.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.server2.logging.operation.log.location</name>
    <value>/var/log/hive/operation_logs</value>
  </property>
  <property>
    <name>mapred.reduce.tasks</name>
    <value>-1</value>
  </property>
  <property>
    <name>hive.exec.reducers.bytes.per.reducer</name>
    <value>67108864</value>
  </property>
  <property>
    <name>hive.exec.copyfile.maxsize</name>
    <value>33554432</value>
  </property>
  <property>
    <name>hive.exec.reducers.max</name>
    <value>1099</value>
  </property>
  <property>
    <name>hive.vectorized.groupby.checkinterval</name>
    <value>4096</value>
  </property>
  <property>
    <name>hive.vectorized.groupby.flush.percent</name>
    <value>0.1</value>
  </property>
  <property>
    <name>hive.compute.query.using.stats</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.vectorized.execution.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.vectorized.execution.reduce.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.merge.mapfiles</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.merge.mapredfiles</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.cbo.enable</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.fetch.task.conversion</name>
    <value>minimal</value>
  </property>
  <property>
    <name>hive.fetch.task.conversion.threshold</name>
    <value>268435456</value>
  </property>
  <property>
    <name>hive.limit.pushdown.memory.usage</name>
    <value>0.1</value>
  </property>
  <property>
    <name>hive.merge.sparkfiles</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.merge.smallfiles.avgsize</name>
    <value>16777216</value>
  </property>
  <property>
    <name>hive.merge.size.per.task</name>
    <value>268435456</value>
  </property>
  <property>
    <name>hive.optimize.reducededuplication</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.reducededuplication.min.reducer</name>
    <value>4</value>
  </property>
  <property>
    <name>hive.map.aggr</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.map.aggr.hash.percentmemory</name>
    <value>0.5</value>
  </property>
  <property>
    <name>hive.optimize.sort.dynamic.partition</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.execution.engine</name>
    <value>mr</value>
  </property>
  <property>
    <name>spark.executor.memory</name>
    <value>52428800</value>
  </property>
  <property>
    <name>spark.driver.memory</name>
    <value>52428800</value>
  </property>
  <property>
    <name>spark.executor.cores</name>
    <value>1</value>
  </property>
  <property>
    <name>spark.yarn.driver.memoryOverhead</name>
    <value>64</value>
  </property>
  <property>
    <name>spark.yarn.executor.memoryOverhead</name>
    <value>64</value>
  </property>
  <property>
    <name>spark.dynamicAllocation.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>spark.dynamicAllocation.initialExecutors</name>
    <value>1</value>
  </property>
  <property>
    <name>spark.dynamicAllocation.minExecutors</name>
    <value>1</value>
  </property>
  <property>
    <name>spark.dynamicAllocation.maxExecutors</name>
    <value>2147483647</value>
  </property>
  <property>
    <name>hive.metastore.execute.setugi</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.support.concurrency</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.zookeeper.quorum</name>
    <value>quickstart.cloudera</value>
  </property>
  <property>
    <name>hive.zookeeper.client.port</name>
    <value>2181</value>
  </property>
  <property>
    <name>hive.zookeeper.namespace</name>
    <value>hive_zookeeper_namespace_hive</value>
  </property>
  <property>
    <name>hive.cluster.delegation.token.store.class</name>
    <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
  </property>
  <property>
    <name>hive.server2.enable.doAs</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.server2.use.SSL</name>
    <value>false</value>
  </property>
  <property>
    <name>spark.shuffle.service.enabled</name>
    <value>true</value>
  </property>
</configuration>

Я увеличил объем памяти кучи Java до 1 ГБ и 150 единиц памяти Java в конфигурации куста.

Я проверил журналы на наличие hive-metastore, hiveserver2.Там написано, что порт уже используется, я убил ПИД, перезапустил службу улья.Но он повторяет одну и ту же ошибку снова и снова.

Я также пытаюсь записать данные через таблицы spark в таблицы кустов, но он не соединяется с URI метастазов, резонаторы, кажется, такие же, то есть кустmetastore, служба hiveserver2 не работает.

Я боролся с этим последние 2 дня и пробовал все виды решений, найденных в Интернете, но бесполезные.Пожалуйста, помогите мне с этим.

ЛОГИ:

hive-metastore logs:

2019-02-11 06:13:48,685 ERROR [main]: metastore.HiveMetaStore (HiveMetaStore.java:main(6650)) - Metastore Thrift Server threw an exception...
org.apache.thrift.transport.TTransportException: Could not create ServerSocket on address 0.0.0.0/0.0.0.0:9083.
    at org.apache.thrift.transport.TServerSocket.<init>(TServerSocket.java:109)
    at org.apache.thrift.transport.TServerSocket.<init>(TServerSocket.java:91)
    at org.apache.thrift.transport.TServerSocket.<init>(TServerSocket.java:87)
    at org.apache.hadoop.hive.common.auth.HiveAuthUtils.getServerSocket(HiveAuthUtils.java:87)
    at org.apache.hadoop.hive.metastore.HiveMetaStore.startMetaStore(HiveMetaStore.java:6762)
    at org.apache.hadoop.hive.metastore.HiveMetaStore.main(HiveMetaStore.java:6646)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.net.BindException: Address already in use (Bind failed)
    at java.net.PlainSocketImpl.socketBind(Native Method)
    at java.net.AbstractPlainSocketImpl.bind(AbstractPlainSocketImpl.java:387)
    at java.net.ServerSocket.bind(ServerSocket.java:375)
    at org.apache.thrift.transport.TServerSocket.<init>(TServerSocket.java:106)
    ... 11 more

2. hiveserver2 logs:


    at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
2019-02-11 06:15:17,985 WARN  [main]: server.HiveServer2 (HiveServer2.java:startHiveServer2(581)) - Error starting HiveServer2 on attempt 1, will retry in 60000ms
org.apache.hive.service.ServiceException: java.net.BindException: Address already in use
    at org.apache.hive.service.server.HiveServer2.start(HiveServer2.java:455)
    at org.apache.hive.service.server.HiveServer2.startHiveServer2(HiveServer2.java:543)
    at org.apache.hive.service.server.HiveServer2.access$700(HiveServer2.java:89)
    at org.apache.hive.service.server.HiveServer2$StartOptionExecutor.execute(HiveServer2.java:793)
    at org.apache.hive.service.server.HiveServer2.main(HiveServer2.java:666)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.net.BindException: Address already in use
    at sun.nio.ch.Net.bind0(Native Method)
    at sun.nio.ch.Net.bind(Net.java:433)
    at sun.nio.ch.Net.bind(Net.java:425)
    at sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:223)
    at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74)
    at org.eclipse.jetty.server.nio.SelectChannelConnector.open(SelectChannelConnector.java:173)
    at org.eclipse.jetty.server.AbstractConnector.doStart(AbstractConnector.java:311)
    at org.eclipse.jetty.server.nio.SelectChannelConnector.doStart(SelectChannelConnector.java:251)
    at org.eclipse.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:59)
    at org.eclipse.jetty.server.Server.doStart(Server.java:272)
    at org.eclipse.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:59)
    at org.apache.hive.http.HttpServer.start(HttpServer.java:186)
    at org.apache.hive.service.server.HiveServer2.start(HiveServer2.java:451)
    ... 10 more

3. /var/log/hive/hadoop-cmf.quiickstart.cloudera.log.out


nd=1549906983997 duration=23 from=org.apache.hadoop.hive.ql.Driver>
2019-02-11 09:43:04,080 WARN  org.apache.hadoop.security.UserGroupInformation: [HiveServer2-Handler-Pool: Thread-37]: PriviledgedActionException as:hive (auth:SIMPLE) cause:org.apache.hive.service.cli.HiveSQLException: Couldn't find log associated with operation handle: OperationHandle [opType=EXECUTE_STATEMENT, getHandleIdentifier()=4a31b8b2-cb65-48db-8051-68da932c648d]
2019-02-11 09:43:04,080 WARN  org.apache.hive.service.cli.thrift.ThriftCLIService: [HiveServer2-Handler-Pool: Thread-37]: Error fetching results:
org.apache.hive.service.cli.HiveSQLException: Couldn't find log associated with operation handle: OperationHandle [opType=EXECUTE_STATEMENT, getHandleIdentifier()=4a31b8b2-cb65-48db-8051-68da932c648d]
    at org.apache.hive.service.cli.operation.OperationManager.getOperationLogRowSet(OperationManager.java:310)
    at org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:771)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
    at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
    at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1917)
    at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
    at com.sun.proxy.$Proxy21.fetchResults(Unknown Source)
    at org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:462)
    at org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:694)
    at org.apache.hive.service.cli.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1553)
    at org.apache.hive.service.cli.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1538)
    at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
    at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
    at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
    at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
2019-02-11 09:43:04,092 INFO  org.apache.hadoop.conf.Configuration.deprecation: [HiveServer2-Handler-Pool: Thread-37]: mapred.input.dir is deprecated. Instead, use mapreduce.input.fileinputformat.inputdir
2019-02-11 09:43:04,107 INFO  org.apache.hadoop.mapred.FileInputFormat: [HiveServer2-Handler-Pool: Thread-37]: Total input paths to process : 1
2019-02-11 09:43:04,184 WARN  org.apache.hadoop.security.UserGroupInformation: [HiveServer2-Handler-Pool: Thread-37]: PriviledgedActionException as:hive (auth:SIMPLE) cause:org.apache.hive.service.cli.HiveSQLException: Couldn't find log associated with operation handle: OperationHandle [opType=EXECUTE_STATEMENT, getHandleIdentifier()=4a31b8b2-cb65-48db-8051-68da932c648d]
2019-02-11 09:43:04,184 WARN  org.apache.hive.service.cli.thrift.ThriftCLIService: [HiveServer2-Handler-Pool: Thread-37]: Error fetching results:
org.apache.hive.service.cli.HiveSQLException: Couldn't find log associated with operation handle: OperationHandle [opType=EXECUTE_STATEMENT, getHandleIdentifier()=4a31b8b2-cb65-48db-8051-68da932c648d]
    at org.apache.hive.service.cli.operation.OperationManager.getOperationLogRowSet(OperationManager.java:310)
    at org.apache.hive.service.cli.session.HiveSessionImpl.fetchResults(HiveSessionImpl.java:771)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78)
    at org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36)
    at org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1917)
    at org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59)
    at com.sun.proxy.$Proxy21.fetchResults(Unknown Source)
    at org.apache.hive.service.cli.CLIService.fetchResults(CLIService.java:462)
    at org.apache.hive.service.cli.thrift.ThriftCLIService.FetchResults(ThriftCLIService.java:694)
    at org.apache.hive.service.cli.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1553)
    at org.apache.hive.service.cli.thrift.TCLIService$Processor$FetchResults.getResult(TCLIService.java:1538)
    at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
    at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
    at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56)
    at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
2019-02-11 09:43:04,185 INFO  org.apache.hive.service.cli.operation.OperationManager: [HiveServer2-Handler-Pool: Thread-37]: Closing operation: OperationHandle [opType=EXECUTE_STATEMENT, getHandleIdentifier()=4a31b8b2-cb65-48db-8051-68da932c648d]
2019-02-11 09:43:04,185 INFO  org.apache.hadoop.hive.ql.exec.ListSinkOperator: [HiveServer2-Handler-Pool: Thread-37]: 0 finished. closing...
2019-02-11 09:43:04,185 INFO  org.apache.hadoop.hive.ql.exec.ListSinkOperator: [HiveServer2-Handler-Pool: Thread-37]: 0 Close done


[cloudera@quickstart hive]$ sudo lsof -i :10002 -S
COMMAND  PID USER   FD   TYPE  DEVICE SIZE/OFF NODE NAME
java    7071 hive  390u  IPv4 1159430      0t0  TCP *:documentum (LISTEN)


[cloudera@quickstart hive]$ sudo lsof -i :10000 -S
COMMAND   PID     USER   FD   TYPE  DEVICE SIZE/OFF NODE NAME
java     7071     hive  382u  IPv4 1215796      0t0  TCP quickstart.cloudera:ndmp->quickstart.cloudera:37459 (ESTABLISHED)
java     7071     hive  392u  IPv4 1159420      0t0  TCP *:ndmp (LISTEN)
java    19405 cloudera  429u  IPv4 1215792      0t0  TCP quickstart.cloudera:37459->quickstart.cloudera:ndmp (ESTABLISHED)


[cloudera@quickstart hive]$ sudo lsof -i :9083 -S
COMMAND  PID USER   FD   TYPE  DEVICE SIZE/OFF NODE NAME
java    7071 hive  397u  IPv4 1284068      0t0  TCP quickstart.cloudera:45466->quickstart.cloudera:9083 (ESTABLISHED)
java    7418 hive  391u  IPv4 1157031      0t0  TCP *:9083 (LISTEN)
java    7418 hive  392u  IPv4 1284070      0t0  TCP quickstart.cloudera:9083->quickstart.cloudera:45466 (ESTABLISHED)

PLease help.
...