Ошибка менеджера узлов YARN при выполнении примера basi c sparkpi - PullRequest
0 голосов
/ 16 марта 2020

Я запускаю основную c программу-искру, чтобы проверить мои настройки YARN с помощью Spark. Я выполняю задание, аналогичное примеру на веб-сайте.

 spark-submit --master yarn --deploy-mode cluster --num-executors 75 -- 
 executor-cores 2 --executor-memory 6g --class 
org.apache.spark.examples.JavaSparkPi  
 /home/spark/examples/jars/spark_examples.jar 1000

Однако задание никогда не завершается, и менеджеры узлов на разных узлах показывают эту ошибку:

2020-03-16 14:27:42,917 WARN org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl: couldn't find container container_1584386586744_0001_01_000319 while processing FINISH_CONTAINERS event

Я не уверен, что вызвало это. Любой совет приветствуется.

Вот файл пряжи. xml Файл для автономного кластера (вызывает ошибку):

<configuration>
<property>
 <name>yarn.nodemanager.aux-services</name>
 <value>mapreduce_shuffle</value>
 </property>
 <property>
 <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
 <value>org.apache.hadoop.mapred.ShuffleHandler</value>
 </property>
 <property>
        <name>yarn.acl.enable</name>
        <value>0</value>
</property>

<property>
        <name>yarn.resourcemanager.hostname</name>
        <value>172.16.1.1</value>
</property>
<property>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>10.66.4.100:8088</value>
</property>
<property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
</property>
<property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>172.16.1.1</value>
</property>
<property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>262144</value>
</property>
<property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>262144</value>
</property>
<property>
        <name>yarn.scheduler.maximum-allocation-vcores</name>
        <value>56</value>
</property>
<property>
        <name>yarn.nodemanager.resource.cpu-vcores</name>
        <value>56</value>
</property>

Вот сайт пряжи. xml файл для кластера EMR (который работает)

<configuration>
<property>
<name>yarn.timeline-service.hostname</name>
<value>ip-172-31-63-120.ec2.internal</value>
</property>

<property>
<name>yarn.web-proxy.address</name>
<value>ip-172-31-63-120.ec2.internal:20888</value>
</property> 

<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>ip-172-31-63-120.ec2.internal:8025</value>
</property>

<property>
<name>yarn.resourcemanager.address</name>
<value>ip-172-31-63-120.ec2.internal:8032</value>
</property>

<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>ip-172-31-63-120.ec2.internal:8030</value>
</property>

<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://ip-172-31-63-120.ec2.internal:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.dispatcher.exit-on-error</name>
<value>true</value>
</property>

<property>
 <name>yarn.nodemanager.local-dirs</name>
 <value>/mnt/yarn,/mnt1/yarn</value>
<final>true</final>
</property>

<property>
<description>Where to store container logs.</description>
<name>yarn.nodemanager.log-dirs</name>
<value>/var/log/hadoop-yarn/containers</value>
</property>

<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/var/log/hadoop-yarn/apps</value>
</property>


<property>
 <description>Classpath for typical applications.</description>
 <name>yarn.application.classpath</name>
 <value>
    $HADOOP_CONF_DIR,
    $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
    $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
    $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
    $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*,
    /usr/lib/hadoop-lzo/lib/*,
    /usr/share/aws/emr/emrfs/conf,
    /usr/share/aws/emr/emrfs/lib/*,
    /usr/share/aws/emr/emrfs/auxlib/*,
    /usr/share/aws/emr/lib/*,
    /usr/share/aws/emr/ddb/lib/emr-ddb-hadoop.jar,
    /usr/share/aws/emr/goodies/lib/emr-hadoop-goodies.jar,
    /usr/lib/spark/yarn/lib/datanucleus-api-jdo.jar,
    /usr/lib/spark/yarn/lib/datanucleus-core.jar,
    /usr/lib/spark/yarn/lib/datanucleus-rdbms.jar,
    /usr/share/aws/emr/cloudwatch-sink/lib/*,
    /usr/share/aws/aws-java-sdk/*
 </value>
 </property>

<!-- The defaut setting (2.1) is silly. The virtual memory is not 
   a limiting factor on 64Bit systems, at least not a limiting  
    resource, so make it large, very large. -->
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>5</value>
</property>

<property>
<name>yarn.node-labels.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.node-labels.am.default-node-label-expression</name>
<value>CORE</value>
</property>

<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>file:///mnt/var/lib/hadoop-yarn/nodelabels</value>
</property>

<property>
<name>yarn.node-labels.configuration-type</name>
<value>distributed</value>
</property>

<property>
<name>yarn.log-aggregation.enable-local-cleanup</name>
<value>false</value>
</property>

<property>
<name>yarn.nodemanager.address</name>
<value>${yarn.nodemanager.hostname}:8041</value>
</property>

<property>
<name>yarn.nodemanager.container-metrics.enable</name>
<value>false</value>
</property>

<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.nodemanager.recovery.supervised</name>
<value>true</value>
</property>

<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/emr/instance-controller/lib/yarn.nodes.exclude.xml</value>
</property>

<property>
<name>yarn.resourcemanager.webapp.cross-origin.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.scheduler.increment-allocation-mb</name>
<value>32</value>
</property>

<property>
<name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
<value>250</value>
</property>

<property>
<name>yarn.nodemanager.node-labels.provider</name>
<value>config</value>
</property>

<property>
<name>yarn.nodemanager.node-labels.provider.configured-node-partition</name>
<value>CORE</value>
</property>

<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.timeline-service.http-cross-origin.enabled</name>
<value>true</value>
</property>

<property>
<name>yarn.resourcemanager.client.thread-count</name>
<value>64</value>
</property>

<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>4</value>
</property>

<property>
<name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
<value>64</value>
</property>

<property>
<name>yarn.nodemanager.container-manager.thread-count</name>
<value>64</value>
</property>

<property>
<name>yarn.resourcemanager.scheduler.client.thread-count</name>
<value>64</value>
</property>

<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>12288</value>
</property>

<property>
<name>yarn.nodemanager.localizer.client.thread-count</name>
<value>20</value>
</property>

<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>172800</value>
</property>

<property>
<name>yarn.nodemanager.localizer.fetch.thread-count</name>
<value>20</value>
</property>

<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>12288</value>
</property>

<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>128</value>
</property>

<property>
<name>yarn.resourcemanager.hostname</name>
<value>172.31.63.120</value>
</property>

<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>32</value>
</property>

<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>

...