После запуска hbase в распределенной экосистеме oop система зависла, и ее пришлось перезагрузить, чтобы стабилизировать машину - PullRequest
0 голосов
/ 08 марта 2020

Я установил oop в распределенном режиме в виртуальных машинах Ubuntu EC2. Он работает нормально с 2 датодами и отдельным наменодом. Я установил hbase на машину с namenode, и через несколько минут после запуска hbase вся система зависла. Мне пришлось перезагрузить экземпляр EC2, чтобы система снова стала стабильной.

Имеет oop Версия: 2.10.0

Версия HBase: 2.2.3

hbase-site. xml

    <configuration>
    <property>
        <name>hbase.rootdir</name>
        <value>hdfs://namenode:9000/hbase</value>
    </property>

    <property>
                <name>hbase.zookeper.property.dataDir</name>
        <value>/home/ubuntu/zookeeper</value>
    </property>

    <property>
                <name>hbase.cluster.distributed</name>
                <value>true</value>
    </property>
    <property>
        <name>hbase.unsafe.stream.capability.enforce</name>
        <value>false</value>
    </property>
    <property>
            <name>hbase.zookeeper.property.clientPort</name>
            <value>2181</value>
    </property>
    <property>
            <name>hbase.zookeeper.quorum</name>
        <value>namenode</value>
    </property>
    <property>
            <name>hbase.tmp.dir</name>
        <value>/home/ubuntu/hbase/tmp</value>
            <description>Temporary directory on the local filesystem.</description>
    </property>
</configuration>

hdfs-site. xml

<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/home/ubuntu/hadoop/data/nameNode</value>
        </property>

        <property>
        <name>dfs.datanode.data.dir</name>
        <value>/home/ubuntu/hadoop/data/dataNode</value>
        </property>

        <property>
            <name>dfs.replication</name>
            <value>2</value>
        </property>
</configuration>

core-site. xml

<configuration>
    <property>
        <name>fs.default.name</name>
        <value>hdfs://namenode:9000</value>
    </property>
</configuration>

Главный файл журнала hbase

org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.fs.PathIsNotEmptyDirectoryException): `/hbase/WALs/namenode,16020,1583642429033-splitting is non empty': Directory is not empty
    at org.apache.hadoop.hdfs.server.namenode.FSDirDeleteOp.delete(FSDirDeleteOp.java:115)
    at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.delete(FSNamesystem.java:2873)
    at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.delete(NameNodeRpcServer.java:1101)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.delete(ClientNamenodeProtocolServerSideTranslatorPB.java:648)
    at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:507)
    at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1034)
    at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:994)
    at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:922)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
    at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2833)

    at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1489)
    at org.apache.hadoop.ipc.Client.call(Client.java:1435)
    at org.apache.hadoop.ipc.Client.call(Client.java:1345)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:227)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116)
    at com.sun.proxy.$Proxy18.delete(Unknown Source)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.delete(ClientNamenodeProtocolTranslatorPB.java:568)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:409)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:163)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:155)
    at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:346)
    at com.sun.proxy.$Proxy19.delete(Unknown Source)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hadoop.hbase.fs.HFileSystem$1.invoke(HFileSystem.java:372)
    at com.sun.proxy.$Proxy20.delete(Unknown Source)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hadoop.hbase.fs.HFileSystem$1.invoke(HFileSystem.java:372)
    at com.sun.proxy.$Proxy20.delete(Unknown Source)
    at org.apache.hadoop.hdfs.DFSClient.delete(DFSClient.java:1591)
    at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:798)
    at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:795)
    at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
    at org.apache.hadoop.hdfs.DistributedFileSystem.delete(DistributedFileSystem.java:795)
    at org.apache.hadoop.hbase.master.SplitLogManager.splitLogDistributed(SplitLogManager.java:278)
    at org.apache.hadoop.hbase.master.MasterWalManager.splitLog(MasterWalManager.java:349)
    at org.apache.hadoop.hbase.master.MasterWalManager.splitLog(MasterWalManager.java:334)
    at org.apache.hadoop.hbase.master.MasterWalManager.splitLog(MasterWalManager.java:271)
    at org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure.splitLogs(ServerCrashProcedure.java:312)
    at org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure.executeFromState(ServerCrashProcedure.java:197)
    at org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure.executeFromState(ServerCrashProcedure.java:64)
    at org.apache.hadoop.hbase.procedure2.StateMachineProcedure.execute(StateMachineProcedure.java:194)
    at org.apache.hadoop.hbase.procedure2.Procedure.doExecute(Procedure.java:962)
    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.execProcedure(ProcedureExecutor.java:1662)
    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.executeProcedure(ProcedureExecutor.java:1409)
    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor.access$1100(ProcedureExecutor.java:78)
    at org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:1979)
java.io.EOFException: Unexpected EOF while trying to read response from server
    at org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed(PBHelperClient.java:402)
    at org.apache.hadoop.hdfs.protocol.datatransfer.PipelineAck.readFields(PipelineAck.java:213)
    at org.apache.hadoop.hdfs.DataStreamer$ResponseProcessor.run(DataStreamer.java:1073)
2020-03-08 05:13:29,508 INFO  [main-SendThread(namenode:2181)] zookeeper.ClientCnxn: Client session timed out, have not heard from server in 66257ms for sessionid 0x170b88ad87c0000, closing socket connection and attempting reconnect

hbase zookeeper log

EndOfStreamException: Unable to read additional data from client sessionid 0x170b884854f0002, likely client has closed socket
    at org.apache.zookeeper.server.NIOServerCnxn.doIO(NIOServerCnxn.java:239)
    at org.apache.zookeeper.server.NIOServerCnxnFactory.run(NIOServerCnxnFactory.java:203)
    at java.lang.Thread.run(Thread.java:748)
2020-03-08 05:02:00,008 INFO  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181] server.NIOServerCnxn: Closed socket connection for client /172.31.14.122:53892 which had sessionid 0x170b884854f0002
2020-03-08 05:02:00,013 INFO  [ProcessThread(sid:0 cport:2181):] server.PrepRequestProcessor: Processed session termination for sessionid: 0x170b884854f0005
2020-03-08 05:02:00,017 WARN  [NIOServerCxn.Factory:0.0.0.0/0.0.0.0:2181] server.NIOServerCnxn: caught end of stream exception
EndOfStreamException: Unable to read additional data from client sessionid 0x170b884854f0005, likely client has closed socket
    at org.apache.zookeeper.server.NIOServerCnxn.doIO(NIOServerCnxn.java:239)
    at org.apache.zookeeper.server.NIOServerCnxnFactory.run(NIOServerCnxnFactory.java:203)
    at java.lang.Thread.run(Thread.java:748)

Файлы журналов усекаются, копируются только исключения.

...