В докеризованной среде работник Spark не может разрешить хост приложения - PullRequest
0 голосов
/ 23 мая 2018

Мы работаем с Spark как микросервис Docker.Мое искровое приложение может отправлять задачи на рабочий узел, но рабочий узел не может подключиться к приложению, выбрасывая исключение UnknowHostException.В основном рабочий узел пытается связаться с приложением с containerId (658e5d214a60), который не разрешается в IP-адрес контейнера.

Работает, когда я запускаю эти службы с файлами yaml для docker-compose на локальной машине Linux, но не работает в контейнере AWS EC2.

Exception in thread "main" java.lang.reflect.UndeclaredThrowableException
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1713)
    at org.apache.spark.deploy.SparkHadoopUtil.runAsSparkUser(SparkHadoopUtil.scala:64)
    at org.apache.spark.executor.CoarseGrainedExecutorBackend$.run(CoarseGrainedExecutorBackend.scala:188)
    at org.apache.spark.executor.CoarseGrainedExecutorBackend$.main(CoarseGrainedExecutorBackend.scala:293)
    at org.apache.spark.executor.CoarseGrainedExecutorBackend.main(CoarseGrainedExecutorBackend.scala)
Caused by: org.apache.spark.SparkException: Exception thrown in awaitResult:
    at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:205)
    at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
    at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:101)
    at org.apache.spark.executor.CoarseGrainedExecutorBackend$$anonfun$run$1.apply$mcV$sp(CoarseGrainedExecutorBackend.scala:201)
    at org.apache.spark.deploy.SparkHadoopUtil$$anon$2.run(SparkHadoopUtil.scala:65)
    at org.apache.spark.deploy.SparkHadoopUtil$$anon$2.run(SparkHadoopUtil.scala:64)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
    ... 4 more
Caused by: java.io.IOException: Failed to connect to 658e5d214a60:36335
    at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:245)
    at org.apache.spark.network.client.TransportClientFactory.createClient(TransportClientFactory.java:187)
    at org.apache.spark.rpc.netty.NettyRpcEnv.createClient(NettyRpcEnv.scala:198)
    at org.apache.spark.rpc.netty.Outbox$$anon$1.call(Outbox.scala:194)
    at org.apache.spark.rpc.netty.Outbox$$anon$1.call(Outbox.scala:190)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
Caused by: java.net.UnknownHostException: 658e5d214a60
    at java.net.InetAddress.getAllByName0(InetAddress.java:1259)
    at java.net.InetAddress.getAllByName(InetAddress.java:1171)
    at java.net.InetAddress.getAllByName(InetAddress.java:1105)
    at java.net.InetAddress.getByName(InetAddress.java:1055)
    at io.netty.util.internal.SocketUtils$8.run(SocketUtils.java:146)
    at io.netty.util.internal.SocketUtils$8.run(SocketUtils.java:143)
    at java.security.AccessController.doPrivileged(Native Method)
    at io.netty.util.internal.SocketUtils.addressByName(SocketUtils.java:143)
    at io.netty.resolver.DefaultNameResolver.doResolve(DefaultNameResolver.java:43)
    at io.netty.resolver.SimpleNameResolver.resolve(SimpleNameResolver.java:63)
    at io.netty.resolver.SimpleNameResolver.resolve(SimpleNameResolver.java:55)
    at io.netty.resolver.InetSocketAddressResolver.doResolve(InetSocketAddressResolver.java:57)
    at io.netty.resolver.InetSocketAddressResolver.doResolve(InetSocketAddressResolver.java:32)
    at io.netty.resolver.AbstractAddressResolver.resolve(AbstractAddressResolver.java:108)
    at io.netty.bootstrap.Bootstrap.doResolveAndConnect0(Bootstrap.java:208)
    at io.netty.bootstrap.Bootstrap.access$000(Bootstrap.java:49)
    at io.netty.bootstrap.Bootstrap$1.operationComplete(Bootstrap.java:188)
    at io.netty.bootstrap.Bootstrap$1.operationComplete(Bootstrap.java:174)
    at io.netty.util.concurrent.DefaultPromise.notifyListener0(DefaultPromise.java:507)
    at io.netty.util.concurrent.DefaultPromise.notifyListenersNow(DefaultPromise.java:481)
    at io.netty.util.concurrent.DefaultPromise.notifyListeners(DefaultPromise.java:420)
    at io.netty.util.concurrent.DefaultPromise.trySuccess(DefaultPromise.java:104)
    at io.netty.channel.DefaultChannelPromise.trySuccess(DefaultChannelPromise.java:82)
    at io.netty.channel.AbstractChannel$AbstractUnsafe.safeSetSuccess(AbstractChannel.java:978)
    at io.netty.channel.AbstractChannel$AbstractUnsafe.register0(AbstractChannel.java:512)
    at io.netty.channel.AbstractChannel$AbstractUnsafe.access$200(AbstractChannel.java:423)
    at io.netty.channel.AbstractChannel$AbstractUnsafe$1.run(AbstractChannel.java:482)
    at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:163)
    at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:403)
    at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:463)
    at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:858)
    at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:138)
    ... 1 more
  • Версия Spark 2.3.0
  • Docker версия 1.12.6

Поскольку порт контейнера является динамическим, мы не можем отобразить его на хост, а затем использовать.

...