Я играл со своими заданиями MapReduce на моем локальном P C в псевдораспределенном режиме и недавно решил перенести одно из них на AWS. Тем не менее, я получаю тайм-аут вскоре после запуска задания:
[hadoop@ip-172-31-36-184 ~]$ hadoop jar TweetsReconciliation.jar MapReduceProgram batch1 output
20/03/19 05:17:46 INFO client.RMProxy: Connecting to ResourceManager at ip-172-31-36-184.ec2.internal/172.31.36.184:8032
20/03/19 05:17:46 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.
20/03/19 05:17:46 INFO input.FileInputFormat: Total input files to process : 3
20/03/19 05:18:51 INFO hdfs.DataStreamer: Exception in createBlockOutputStream
java.io.IOException: Got error, status=ERROR, status message , ack with firstBadLink as 172.31.42.32:50010
at org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.checkBlockOpStatus(DataTransferProtoUtil.java:121)
at org.apache.hadoop.hdfs.DataStreamer.createBlockOutputStream(DataStreamer.java:1744)
at org.apache.hadoop.hdfs.DataStreamer.nextBlockOutputStream(DataStreamer.java:1648)
at org.apache.hadoop.hdfs.DataStreamer.run(DataStreamer.java:704)
20/03/19 05:18:51 WARN hdfs.DataStreamer: Abandoning BP-550493184-172.31.36.184-1584587201039:blk_1073743277_2453
20/03/19 05:18:51 WARN hdfs.DataStreamer: Excluding datanode DatanodeInfoWithStorage[172.31.42.32:50010,DS-7fb2702c-0ca8-452b-a2e5-fcae7da94245,DISK]
20/03/19 05:18:51 WARN hdfs.DataStreamer: Slow waitForAckedSeqno took 65079ms (threshold=30000ms)
20/03/19 05:18:51 INFO mapreduce.JobSubmitter: number of splits:3
20/03/19 05:18:51 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1584587248590_0007
20/03/19 05:18:51 INFO impl.YarnClientImpl: Submitted application application_1584587248590_0007
20/03/19 05:18:52 INFO mapreduce.Job: The url to track the job: http://ip-172-31-36-184.ec2.internal:20888/proxy/application_1584587248590_0007/
20/03/19 05:18:52 INFO mapreduce.Job: Running job: job_1584587248590_0007
20/03/19 05:26:53 INFO mapreduce.Job: Job job_1584587248590_0007 running in uber mode : false
20/03/19 05:26:53 INFO mapreduce.Job: map 0% reduce 0%
20/03/19 05:26:53 INFO mapreduce.Job: Job job_1584587248590_0007 failed with state FAILED due to: Application application_1584587248590_0007 failed 2 times due to AM Container for appattempt_1584587248590_0007_000002 exited with exitCode: -1000
Failing this attempt.Diagnostics: Call From ip-172-31-36-152/172.31.36.152 to ip-172-31-36-184.ec2.internal:8020 failed on socket timeout exception: org.apache.hadoop.net.ConnectTimeoutException: 20000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=ip-172-31-36-184.ec2.internal/172.31.36.184:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout
org.apache.hadoop.net.ConnectTimeoutException: Call From ip-172-31-36-152/172.31.36.152 to ip-172-31-36-184.ec2.internal:8020 failed on socket timeout exception: org.apache.hadoop.net.ConnectTimeoutException: 20000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=ip-172-31-36-184.ec2.internal/172.31.36.184:8020]; For more details see: http://wiki.apache.org/hadoop/SocketTimeout
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:801)
at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:751)
at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1493)
at org.apache.hadoop.ipc.Client.call(Client.java:1435)
at org.apache.hadoop.ipc.Client.call(Client.java:1345)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:227)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:116)
at com.sun.proxy.$Proxy78.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:796)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:409)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeMethod(RetryInvocationHandler.java:163)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invoke(RetryInvocationHandler.java:155)
at org.apache.hadoop.io.retry.RetryInvocationHandler$Call.invokeOnce(RetryInvocationHandler.java:95)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:346)
at com.sun.proxy.$Proxy79.getFileInfo(Unknown Source)
at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1649)
at org.apache.hadoop.hdfs.DistributedFileSystem$27.doCall(DistributedFileSystem.java:1440)
at org.apache.hadoop.hdfs.DistributedFileSystem$27.doCall(DistributedFileSystem.java:1437)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1452)
at org.apache.hadoop.yarn.util.FSDownload.copy(FSDownload.java:253)
at org.apache.hadoop.yarn.util.FSDownload.access$000(FSDownload.java:63)
at org.apache.hadoop.yarn.util.FSDownload$2.run(FSDownload.java:361)
at org.apache.hadoop.yarn.util.FSDownload$2.run(FSDownload.java:359)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1844)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:359)
at org.apache.hadoop.yarn.util.FSDownload.call(FSDownload.java:62)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.hadoop.net.ConnectTimeoutException: 20000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=ip-172-31-36-184.ec2.internal/172.31.36.184:8020]
Учитывая первую ошибку, я подозреваю некоторые проблемы с выходными потоками. Я посмотрел в журналы датоде и увидел что-то вроде этого в одном из них:
2020-03-19 04:57:22,514 ERROR org.apache.hadoop.hdfs.server.datanode.DataNode (DataXceiver for client DFSClient_NONMAPREDUCE_-1489188075_1 at /172.31.36.184:59346 [Receiving block BP-550493184-172.31.36.184-1584587201039:blk_1073743270_2446]): ip-172-31-36-152.ec2.internal:50010:DataXceiver error processing WRITE_BLOCK operation src: /172.31.36.184:59346 dst: /172.31.36.152:50010
org.apache.hadoop.net.ConnectTimeoutException: 65000 millis timeout while waiting for channel to be ready for connect. ch : java.nio.channels.SocketChannel[connection-pending remote=/172.31.42.32:50010]
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:534)
at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:495)
at org.apache.hadoop.hdfs.server.datanode.DataXceiver.writeBlock(DataXceiver.java:754)
at org.apache.hadoop.hdfs.protocol.datatransfer.Receiver.opWriteBlock(Receiver.java:166)
at org.apache.hadoop.hdfs.protocol.datatransfer.Receiver.processOp(Receiver.java:103)
at org.apache.hadoop.hdfs.server.datanode.DataXceiver.run(DataXceiver.java:288)
at java.lang.Thread.run(Thread.java:748)
И в еще одном журнале датоде:
2020-03-19 03:05:05,288 INFO org.apache.hadoop.ipc.CallQueueManager (main): Using callQueue: class java.util.concurrent.LinkedBlockingQueue queueCapacity: 1000 scheduler: class org.apache.hadoop.ipc.DefaultRpcScheduler
2020-03-19 03:05:05,298 INFO org.apache.hadoop.ipc.Server (Socket Reader #1 for port 50020): Starting Socket Reader #1 for port 50020
2020-03-19 03:05:05,340 INFO org.apache.hadoop.hdfs.server.datanode.DataNode (main): Opened IPC server at /0.0.0.0:50020
2020-03-19 03:05:05,348 INFO org.apache.hadoop.hdfs.server.datanode.DataNode (main): Refresh request received for nameservices: null
2020-03-19 03:05:05,354 INFO org.apache.hadoop.hdfs.server.datanode.DataNode (main): Starting BPOfferServices for nameservices: <default>
2020-03-19 03:05:05,361 INFO org.apache.hadoop.hdfs.server.datanode.DataNode (Thread-20): Block pool <registering> (Datanode Uuid unassigned) service to ip-172-31-36-184.ec2.internal/172.31.36.184:8020 starting to offer service
2020-03-19 03:05:05,386 INFO org.apache.hadoop.ipc.Server (IPC Server Responder): IPC Server Responder: starting
2020-03-19 03:05:05,389 INFO org.apache.hadoop.ipc.Server (IPC Server listener on 50020): IPC Server listener on 50020: starting
2020-03-19 03:05:06,422 INFO org.apache.hadoop.ipc.Client (Thread-20): Retrying connect to server: ip-172-31-36-184.ec2.internal/172.31.36.184:8020. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-03-19 03:05:07,423 INFO org.apache.hadoop.ipc.Client (Thread-20): Retrying connect to server: ip-172-31-36-184.ec2.internal/172.31.36.184:8020. Already tried 1 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2020-03-19 03:05:08,424 INFO org.apache.hadoop.ipc.Client (Thread-20): Retrying connect to server: ip-172-31-36-184.ec2.internal/172.31.36.184:8020. Already tried 2 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
В последнем журнале было 10 попыток а затем ConnectException
. У меня нет большого опыта работы с AWS, поэтому мне интересно, что я мог сделать неправильно. У меня не возникло никаких проблем с моим локальным P C.
Когда дело доходит до настройки, у меня есть один главный и два основных узла, настроенные как m5.xlarge экземпляры .
core-site. xml:
<configuration>
<property>
<!-- URI of NN. Fully qualified. No IP.-->
<name>fs.defaultFS</name>
<value>hdfs://ip-172-31-36-184.ec2.internal:8020</value>
</property>
<property>
<name>hadoop.security.authentication</name>
<value>simple</value>
</property>
<property>
<name>hadoop.security.auth_to_local</name>
<value>
RULE:[1:$1@$0](.*@)s/@.*///L
RULE:[2:$1@$0](.*@)s/@.*///L
DEFAULT
</value>
</property>
<!--Without this executing hadoop jobs on cross_realm
clusters will fail on TOKEN_DELEGATION_ERROR
More info: https://issues.apache.org/jira/browse/MAPREDUCE-6565 -->
<property>
<name>hadoop.security.token.service.use_ip</name>
<value>true</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.livy.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.livy.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.presto.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.presto.groups</name>
<value>*</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>fs.s3.buffer.dir</name>
<value>/mnt/s3,/mnt1/s3</value>
<final>true</final>
</property>
<property>
<name>fs.s3.impl</name>
<value>com.amazon.ws.emr.hadoop.fs.EmrFileSystem</value>
</property>
<property>
<name>fs.s3n.impl</name>
<value>com.amazon.ws.emr.hadoop.fs.EmrFileSystem</value>
</property>
<property>
<name>hadoop.security.key.provider.path</name>
<value>kms://http@ip-172-31-36-184.ec2.internal:9700/kms</value>
</property>
<property>
<name>ipc.client.connect.max.retries.on.timeouts</name>
<value>5</value>
</property>
<property>
<name>hadoop.security.key.default.bitlength</name>
<value>256</value>
</property>
<property>
<name>hadoop.http.filter.initializers</name>
<value>org.apache.hadoop.security.HttpCrossOriginFilterInitializer</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/mnt/var/lib/hadoop/tmp</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>65536</value>
</property>
<property>
<name>fs.AbstractFileSystem.s3.impl</name>
<value>org.apache.hadoop.fs.s3.EMRFSDelegate</value>
</property>
<property>
<name>fs.s3bfs.impl</name>
<value>org.apache.hadoop.fs.s3.S3FileSystem</value>
</property>
</configuration>
hdfs-site. xml:
<configuration>
<!-- non HA -->
<property>
<name>dfs.namenode.rpc-address</name>
<value>ip-172-31-36-184.ec2.internal:8020</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>ip-172-31-36-184.ec2.internal:50070</value>
</property>
<property>
<name>dfs.namenode.https-address</name>
<value>ip-172-31-36-184.ec2.internal:50470</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///mnt/hdfs,file:///mnt1/hdfs</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///mnt/namenode,file:///mnt1/namenode</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
<description>The name of the group of super-users.</description>
</property>
<!-- Enable Hue plugins -->
<property>
<name>dfs.namenode.plugins</name>
<value></value>
<description>Comma-separated list of namenode plug-ins to be activated.
</description>
</property>
<property>
<name>dfs.datanode.plugins</name>
<value></value>
<description>Comma-separated list of datanode plug-ins to be activated.
</description>
</property>
<!-- increase the number of datanode transceivers way above the default of 256
- this is for hbase -->
<property>
<name>dfs.datanode.max.xcievers</name>
<value>4096</value>
</property>
<!-- Configurations for large cluster -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.encryption.key.provider.uri</name>
<value>kms://http@ip-172-31-36-184.ec2.internal:9700/kms</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/emr/instance-controller/lib/dfs.hosts.exclude</value>
</property>
<property>
<name>dfs.namenode.safemode.extension</name>
<value>5000</value>
</property>
<property>
<name>dfs.namenode.replication.max-streams</name>
<value>20</value>
</property>
<property>
<name>dfs.namenode.replication.max-streams-hard-limit</name>
<value>40</value>
</property>
<property>
<name>dfs.namenode.replication.work.multiplier.per.iteration</name>
<value>10</value>
</property>
<property>
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
<value>1.0</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
<value>10737418240</value>
</property>
<property>
<name>dfs.datanode.du.reserved</name>
<value>536870912</value>
</property>
<property>
<name>dfs.datanode.fsdataset.volume.choosing.policy</name>
<value>org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>64</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/mnt/namenode,/mnt1/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/mnt/hdfs,/mnt1/hdfs</value>
</property>
</configuration>
пряжа-сайт. xml:
<configuration>
<property>
<name>yarn.timeline-service.hostname</name>
<value>ip-172-31-36-184.ec2.internal</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>ip-172-31-36-184.ec2.internal:20888</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>ip-172-31-36-184.ec2.internal:8025</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>ip-172-31-36-184.ec2.internal:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>ip-172-31-36-184.ec2.internal:8030</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle,</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://ip-172-31-36-184.ec2.internal:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.dispatcher.exit-on-error</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/mnt/yarn,/mnt1/yarn</value>
<final>true</final>
</property>
<property>
<description>Where to store container logs.</description>
<name>yarn.nodemanager.log-dirs</name>
<value>/var/log/hadoop-yarn/containers</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/var/log/hadoop-yarn/apps</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*,
/usr/lib/hadoop-lzo/lib/*,
/usr/share/aws/emr/emrfs/conf,
/usr/share/aws/emr/emrfs/lib/*,
/usr/share/aws/emr/emrfs/auxlib/*,
/usr/share/aws/emr/lib/*,
/usr/share/aws/emr/ddb/lib/emr-ddb-hadoop.jar,
/usr/share/aws/emr/goodies/lib/emr-hadoop-goodies.jar,
/usr/share/aws/emr/kinesis/lib/emr-kinesis-hadoop.jar,
/usr/share/aws/emr/cloudwatch-sink/lib/*,
/usr/share/aws/aws-java-sdk/*
</value>
</property>
<!-- The defaut setting (2.1) is silly. The virtual memory is not
a limiting factor on 64Bit systems, at least not a limiting
resource, so make it large, very large. -->
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>5</value>
</property>
<property>
<name>yarn.node-labels.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.node-labels.am.default-node-label-expression</name>
<value>CORE</value>
</property>
<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>file:///mnt/var/lib/hadoop-yarn/nodelabels</value>
</property>
<property>
<name>yarn.node-labels.configuration-type</name>
<value>distributed</value>
</property>
<property>
<name>yarn.log-aggregation.enable-local-cleanup</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>${yarn.nodemanager.hostname}:8041</value>
</property>
<property>
<name>yarn.nodemanager.container-metrics.enable</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.recovery.supervised</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/emr/instance-controller/lib/yarn.nodes.exclude.xml</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.cross-origin.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.increment-allocation-mb</name>
<value>32</value>
</property>
<property>
<name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
<value>250</value>
</property>
<property>
<name>yarn.nodemanager.node-labels.provider</name>
<value>config</value>
</property>
<property>
<name>yarn.nodemanager.node-labels.provider.configured-node-partition</name>
<value>CORE</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.http-cross-origin.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.client.thread-count</name>
<value>64</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>4</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
<value>64</value>
</property>
<property>
<name>yarn.nodemanager.container-manager.thread-count</name>
<value>64</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.client.thread-count</name>
<value>64</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>12288</value>
</property>
<property>
<name>yarn.nodemanager.localizer.client.thread-count</name>
<value>20</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>172800</value>
</property>
<property>
<name>yarn.nodemanager.localizer.fetch.thread-count</name>
<value>20</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>12288</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>128</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>172.31.36.184</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>32</value>
</property>
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
</configuration>
Когда дело доходит до конфигурации, я просто ждал, пока мой кластер будет в состоянии ожидания, согласно учебнику, которому я следую:
В начале у меня были некоторые проблемы с несуществующими путями ввода, поэтому я создал соответствующие каталоги (/home/hadoop
), и они исчезли. Таким образом, я думаю, что некоторые namenode / datanodes должны были работать для того, чтобы это было успешным (т. Е. Имел oop возможность проверить наличие некоторых путей в файловой системе).
Вкладка Hardware кластера EMR: