Я добавил новый узел в свой существующий одиночный узел кластер кассандры. Он имеет около 48 ГБ данных. За это отвечает только одно пространство ключей, и оно имеет коэффициент репликации , равный '2' (я изменил его после добавления нового узла). Я пытаюсь запустить перестроение nodetool на новом узле, чтобы данные могли передаваться на него с начального узла. Поток завершился после передачи 36 ГБ данных, и узел вышел из строя. Поэтому я повторил процесс, но поток продолжает сбой после передачи некоторых данных (12-25 ГБ). Это заканчивается следующей ошибкой.
error: Error while rebuilding node: Stream failed
-- StackTrace --
java.lang.RuntimeException: Error while rebuilding node: Stream failed
at org.apache.cassandra.service.StorageService.rebuild(StorageService.java:1319)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at sun.reflect.misc.Trampoline.invoke(MethodUtil.java:71)
at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at sun.reflect.misc.MethodUtil.invoke(MethodUtil.java:275)
at com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:112)
at com.sun.jmx.mbeanserver.StandardMBeanIntrospector.invokeM2(StandardMBeanIntrospector.java:46)
at com.sun.jmx.mbeanserver.MBeanIntrospector.invokeM(MBeanIntrospector.java:237)
at com.sun.jmx.mbeanserver.PerInterface.invoke(PerInterface.java:138)
at com.sun.jmx.mbeanserver.MBeanSupport.invoke(MBeanSupport.java:252)
at com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.invoke(DefaultMBeanServerInterceptor.java:819)
at com.sun.jmx.mbeanserver.JmxMBeanServer.invoke(JmxMBeanServer.java:801)
at javax.management.remote.rmi.RMIConnectionImpl.doOperation(RMIConnectionImpl.java:1468)
at javax.management.remote.rmi.RMIConnectionImpl.access$300(RMIConnectionImpl.java:76)
at javax.management.remote.rmi.RMIConnectionImpl$PrivilegedOperation.run(RMIConnectionImpl.java:1309)
at javax.management.remote.rmi.RMIConnectionImpl.doPrivilegedOperation(RMIConnectionImpl.java:1401)
at javax.management.remote.rmi.RMIConnectionImpl.invoke(RMIConnectionImpl.java:829)
at sun.reflect.GeneratedMethodAccessor6.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at sun.rmi.server.UnicastServerRef.dispatch(UnicastServerRef.java:357)
at sun.rmi.transport.Transport$1.run(Transport.java:200)
at sun.rmi.transport.Transport$1.run(Transport.java:197)
at java.security.AccessController.doPrivileged(Native Method)
at sun.rmi.transport.Transport.serviceCall(Transport.java:196)
at sun.rmi.transport.tcp.TCPTransport.handleMessages(TCPTransport.java:573)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run0(TCPTransport.java:834)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.lambda$run$0(TCPTransport.java:688)
at java.security.AccessController.doPrivileged(Native Method)
at sun.rmi.transport.tcp.TCPTransport$ConnectionHandler.run(TCPTransport.java:687)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
PS Я убедился, что для streaming_socket_timeout_in_ms установлено значение не менее 24 часов. Пожалуйста, помогите мне, ребята.
Спасибо.
Обновление: - Я запустил пересборка nodetool keyspace_name вместо перестройка nodetool , и это закончилось этимСнова ошибка.
WARN [StreamReceiveTask:9] 2019-10-23 11:14:41,522 StreamResultFuture.java:214 - [Stream #b9b051b0-f580-11e9-92dd-9765711f899a] Stream failed
ERROR [RMI TCP Connection(12)-10.128.1.3] 2019-10-23 11:14:42,316 StorageService.java:1318 - Error while rebuilding node
org.apache.cassandra.streaming.StreamException: Stream failed
at org.apache.cassandra.streaming.StreamResultFuture.maybeComplete(StreamResultFuture.java:215) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.StreamResultFuture.handleSessionComplete(StreamResultFuture.java:191) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.StreamSession.closeSession(StreamSession.java:481) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.StreamSession.onError(StreamSession.java:571) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.StreamReceiveTask$OnCompletionRunnable.run(StreamReceiveTask.java:281) ~[apache-cassandra-3.11.4.jar:3.11.4]
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) ~[na:1.8.0_222]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[na:1.8.0_222]
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[na:1.8.0_222]
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[na:1.8.0_222]
at org.apache.cassandra.concurrent.NamedThreadFactory.lambda$threadLocalDeallocator$0(NamedThreadFactory.java:81) ~[apache-cassandra-3.11.4.jar:3.11.4]
at java.lang.Thread.run(Thread.java:748) ~[na:1.8.0_222]
INFO [Service Thread] 2019-10-23 11:14:43,223 GCInspector.java:284 - ConcurrentMarkSweep GC in 310ms. CMS Old Gen: 2391324840 -> 639245216; Code Cache: 38320192 -> 38627904; Compressed Class Space: 554$
ERROR [STREAM-IN-/10.128.1.1:7000] 2019-10-23 11:14:48,769 StreamSession.java:593 - [Stream #b9b051b0-f580-11e9-92dd-9765711f899a] Streaming error occurred on session with peer 10.128.1.1
java.lang.RuntimeException: Outgoing stream handler has been closed
at org.apache.cassandra.streaming.ConnectionHandler.sendMessage(ConnectionHandler.java:143) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.StreamSession.receive(StreamSession.java:655) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.StreamSession.messageReceived(StreamSession.java:523) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:317) ~[apache-cassandra-3.11.4.jar:3.11.4]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_222]
Обновление 2: - Я попытался снова перестроить nodetool на новом узле. Поток снова завершается ошибкой после передачи около 95% данных. Это журнал потокового узла
INFO [STREAM-INIT-/10.128.1.3:56486] 2019-10-23 11:16:03,497 StreamResultFuture.java:116 - [Stream #80136bd0-f586-11e9-92dd-9765711f899a ID#0] Creating new streaming plan for Rebuild
INFO [STREAM-INIT-/10.128.1.3:56486] 2019-10-23 11:16:03,498 StreamResultFuture.java:123 - [Stream #80136bd0-f586-11e9-92dd-9765711f899a, ID#0] Received streaming plan for Rebuild
INFO [STREAM-INIT-/10.128.1.3:56488] 2019-10-23 11:16:03,498 StreamResultFuture.java:123 - [Stream #80136bd0-f586-11e9-92dd-9765711f899a, ID#0] Received streaming plan for Rebuild
INFO [STREAM-IN-/10.128.1.3:56488] 2019-10-23 11:16:03,600 StreamResultFuture.java:173 - [Stream #80136bd0-f586-11e9-92dd-9765711f899a ID#0] Prepare completed. Receiving 0 files(0.000KiB), sending 133 f$
INFO [Service Thread] 2019-10-23 11:19:14,472 GCInspector.java:284 - ParNew GC in 517ms. CMS Old Gen: 104131728 -> 121315352; Par Eden Space: 1342177280 -> 0; Par Survivor Space: 67963984 -> 61263088
ERROR [STREAM-IN-/10.128.1.3:56488] 2019-10-23 11:56:43,902 StreamSession.java:706 - [Stream #80136bd0-f586-11e9-92dd-9765711f899a] Remote peer 10.128.1.3 failed stream session.
INFO [IndexSummaryManager:1] 2019-10-23 11:58:32,284 IndexSummaryRedistribution.java:77 - Redistributing index summaries
INFO [STREAM-IN-/10.128.1.3:56488] 2019-10-23 11:59:38,687 StreamResultFuture.java:187 - [Stream #80136bd0-f586-11e9-92dd-9765711f899a] Session with /10.128.1.3 is complete
ERROR [STREAM-OUT-/10.128.1.3:56486] 2019-10-23 11:59:38,688 StreamSession.java:593 - [Stream #80136bd0-f586-11e9-92dd-9765711f899a] Streaming error occurred on session with peer 10.128.1.3
java.lang.RuntimeException: Transfer of file /var/lib/cassandra/data/thingsboard/ts_kv_cf-53b7bf3096ec11e99154356269723c5c/md-583-big-Data.db already completed or aborted (perhaps session failed?).
at org.apache.cassandra.streaming.messages.OutgoingFileMessage.startTransfer(OutgoingFileMessage.java:119) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.messages.OutgoingFileMessage$1.serialize(OutgoingFileMessage.java:49) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.messages.OutgoingFileMessage$1.serialize(OutgoingFileMessage.java:41) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.messages.StreamMessage.serialize(StreamMessage.java:50) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.sendMessage(ConnectionHandler.java:408) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.run(ConnectionHandler.java:380) ~[apache-cassandra-3.11.4.jar:3.11.4]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_222]
WARN [STREAM-IN-/10.128.1.3:56488] 2019-10-23 11:59:38,688 StreamResultFuture.java:214 - [Stream #80136bd0-f586-11e9-92dd-9765711f899a] Stream failed
INFO [STREAM-INIT-/10.128.1.3:56674] 2019-10-23 12:03:24,860 StreamResultFuture.java:116 - [Stream #1da92910-f58d-11e9-92dd-9765711f899a ID#0] Creating new streaming plan for Rebuild
INFO [STREAM-INIT-/10.128.1.3:56674] 2019-10-23 12:03:24,861 StreamResultFuture.java:123 - [Stream #1da92910-f58d-11e9-92dd-9765711f899a, ID#0] Received streaming plan for Rebuild
INFO [STREAM-INIT-/10.128.1.3:56676] 2019-10-23 12:03:24,861 StreamResultFuture.java:123 - [Stream #1da92910-f58d-11e9-92dd-9765711f899a, ID#0] Received streaming plan for Rebuild
INFO [STREAM-IN-/10.128.1.3:56676] 2019-10-23 12:03:24,950 StreamResultFuture.java:173 - [Stream #1da92910-f58d-11e9-92dd-9765711f899a ID#0] Prepare completed. Receiving 0 files(0.000KiB), sending 133 f$
INFO [Service Thread] 2019-10-23 12:04:18,160 GCInspector.java:284 - ParNew GC in 307ms. CMS Old Gen: 124972984 -> 125070416; Par Eden Space: 1342177280 -> 0; Par Survivor Space: 61042328 -> 82423296
INFO [GossipStage:1] 2019-10-23 12:27:39,200 Gossiper.java:1026 - InetAddress /10.128.1.3 is now DOWN
INFO [HANDSHAKE-/10.128.1.3] 2019-10-23 12:27:39,424 OutboundTcpConnection.java:561 - Handshaking version with /10.128.1.3
ERROR [STREAM-IN-/10.128.1.3:56676] 2019-10-23 12:27:45,107 StreamSession.java:593 - [Stream #1da92910-f58d-11e9-92dd-9765711f899a] Streaming error occurred on session with peer 10.128.1.3
java.net.SocketException: End-of-stream reached
at org.apache.cassandra.streaming.messages.StreamMessage.deserialize(StreamMessage.java:71) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.ConnectionHandler$IncomingMessageHandler.run(ConnectionHandler.java:311) ~[apache-cassandra-3.11.4.jar:3.11.4]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_222]
INFO [STREAM-IN-/10.128.1.3:56676] 2019-10-23 12:27:45,108 StreamResultFuture.java:187 - [Stream #1da92910-f58d-11e9-92dd-9765711f899a] Session with /10.128.1.3 is complete
ERROR [STREAM-OUT-/10.128.1.3:56674] 2019-10-23 12:27:45,108 StreamSession.java:593 - [Stream #1da92910-f58d-11e9-92dd-9765711f899a] Streaming error occurred on session with peer 10.128.1.3
org.apache.cassandra.io.FSReadError: java.io.IOException: Broken pipe
at org.apache.cassandra.io.util.ChannelProxy.transferTo(ChannelProxy.java:145) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.compress.CompressedStreamWriter.lambda$write$0(CompressedStreamWriter.java:85) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.io.util.BufferedDataOutputStreamPlus.applyToChannel(BufferedDataOutputStreamPlus.java:350) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.compress.CompressedStreamWriter.write(CompressedStreamWriter.java:85) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.messages.OutgoingFileMessage.serialize(OutgoingFileMessage.java:101) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.messages.OutgoingFileMessage$1.serialize(OutgoingFileMessage.java:52) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.messages.OutgoingFileMessage$1.serialize(OutgoingFileMessage.java:41) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.messages.StreamMessage.serialize(StreamMessage.java:50) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.sendMessage(ConnectionHandler.java:408) ~[apache-cassandra-3.11.4.jar:3.11.4]
at org.apache.cassandra.streaming.ConnectionHandler$OutgoingMessageHandler.run(ConnectionHandler.java:380) ~[apache-cassandra-3.11.4.jar:3.11.4]
at java.lang.Thread.run(Thread.java:748) [na:1.8.0_222]
Caused by: java.io.IOException: Broken pipe
at sun.nio.ch.FileChannelImpl.transferTo0(Native Method) ~[na:1.8.0_222]
at sun.nio.ch.FileChannelImpl.transferToDirectlyInternal(FileChannelImpl.java:428) ~[na:1.8.0_222]
at sun.nio.ch.FileChannelImpl.transferToDirectly(FileChannelImpl.java:493) ~[na:1.8.0_222]
at sun.nio.ch.FileChannelImpl.transferTo(FileChannelImpl.java:605) ~[na:1.8.0_222]
at org.apache.cassandra.io.util.ChannelProxy.transferTo(ChannelProxy.java:141) ~[apache-cassandra-3.11.4.jar:3.11.4]
... 10 common frames omitted
WARN [STREAM-IN-/10.128.1.3:56676] 2019-10-23 12:27:45,108 StreamResultFuture.java:214 - [Stream #1da92910-f58d-11e9-92dd-9765711f899a] Stream failed
INFO [RMI TCP Connection(124)-10.128.1.1] 2019-10-23 12:28:19,854 Gossiper.java:525 - Removing host: 0e8ad28d-6cc2-46df-8d3f-f346d464db40
INFO [RMI TCP Connection(124)-10.128.1.1] 2019-10-23 12:28:19,854 Gossiper.java:526 - Sleeping for 30000ms to ensure /10.128.1.3 does not change
INFO [RMI TCP Connection(124)-10.128.1.1] 2019-10-23 12:28:49,854 Gossiper.java:533 - Advertising removal for /10.128.1.3
INFO [RMI TCP Connection(124)-10.128.1.1] 2019-10-23 12:28:50,245 StreamResultFuture.java:90 - [Stream #aae08f50-f590-11e9-9934-850cf6bcace3] Executing streaming plan for Restore replica count
INFO [MiscStage:1] 2019-10-23 12:28:50,247 StorageService.java:4459 - Received unexpected REPLICATION_FINISHED message from /10.128.1.1. Was this node recently a removal coordinator?
INFO [RMI TCP Connection(124)-10.128.1.1] 2019-10-23 12:28:50,248 StorageService.java:2584 - Removing tokens [-9135980046459212380, -9100471967410923634, -9097242662756219549, -8974765285872613713, -895$
INFO [RMI TCP Connection(124)-10.128.1.1] 2019-10-23 12:28:50,317 Gossiper.java:557 - Completing removal of /10.128.1.3
INFO [HANDSHAKE-/10.128.1.3] 2019-10-23 12:31:35,019 OutboundTcpConnection.java:561 - Handshaking version with /10.128.1.3
Я совершенно не понимаю, почему он выходит из строя. Кто-нибудь может указать мне в правильном направлении? Я удостоверился, что нет проблемы с брандмауэром, и я не использую SSL для связи между узлами.