Не удается запустить Datanode: Hadoop 2.7.7 после интеграции с Kerberos - PullRequest
0 голосов
/ 18 октября 2018

Этот вопрос задавался ранее, но я смог найти решение, которое сработало для меня, поэтому я задаю вопрос здесь.Я специально ищу конфигурации, которые я мог пропустить или применить неправильно.

Вот как:

У меня есть 3-узловый кластер vanilla apache hadoop 2.7.7, работающий на Ubuntu 18.04- Ubuntu3 (работает Namenode, менеджер ресурсов)- Ubuntu4 (Datanode, Nodemanager)- ubuntu5 (Datanode, Nodemanager)

Без интеграции безопасности hadoop с kerberos кластер работает нормально.Но когда я настроил hadoop для работы с Kerberos, я сталкиваюсь со всевозможными странными проблемами, когда мои датоды и менеджеры узлов не запускаются.

При выполнении команды start-dfs.sh вывод команды jps onUbuntu3 это:

31602 NameNode8853 JPS

теперь то же самое для ubuntu4 и ubuntu5 дает:

13772 Jps

Когда я просматриваю журналы для датоданий на Ubuntu4и ubuntu5, я вижу следующее:

2018-10-17 07: 15: 37 650 FATAL org.apache.hadoop.hdfs.server.datanode.DataNode: Исключение в secureMain java.lang.RuntimeException: Невозможно запустить безопасный DataNode без настройки привилегированных ресурсов или защиты передачи данных SASL RPC и SSL для HTTP.Использование привилегированных ресурсов в сочетании с защитой передачи данных SASL RPC не поддерживается.в org.apache.hadoop.hdfs.server.datanode.DataNode.checkSecureConfig (DataNode.java:1208) в org.apache.hadoop.hdfs.server.datanode.DataNode.startDataNode (DataNode.java:1108) в org.ap.hadoop.hdfs.server.datanode.DataNode. (DataNode.java:429) в org.apache.hadoop.hdfs.server.datanode.DataNode.makeInstance (DataNode.java:2414) в org.apache.hadoop.hdfs.server.datanode.DataNode.instantiateDataNode (DataNode.java:2301) в org.apache.hadoop.hdfs.server.datanode.DataNode.createDataNode (DataNode.java:2348) в org.apache.hadoop.hdfs.server.datanode.DataNode.secureMain (DataNode.java:2530) в org.apache.hadoop.hdfs.server.datanode.DataNode.main (DataNode.java:2554) 2018-10-17 07: 15: 37,672 INFO org.apache.hadoop.util.ExitUtil: выход со статусом 1 2018-10-17 07: 15: 37 685 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: SHUTDOWN_MSG: / ************************************************************ SHUTDOWN_MSG: выключениеDataNode на Ubuntu4 / 192.168.200.13 ************************************************************ /

Прилагается мой core-site.xml, hadfs-site.xml, файлы yarn-site.xml и hadoop-env.sh:

core-site.xml

<configuration>
     <property>
        <name>fs.defaultFS</name>
        <value>hdfs://ubuntu3:9000</value>
    </property>
    <property>
        <name>hadoop.security.authentication</name>
        <value>kerberos</value>
    </property>
    <property>
        <name>hadoop.security.authorization</name>
        <value>true</value>
    </property>
    <property>
        <name>hadoop.rpc.protection</name>
        <value>privacy</value>
    </property>
</configuration>

hdfs-site.xml

<configuration>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <property>
        <name>dfs.permissions</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>/usr/local/hadoop/hadoop2_data/hdfs/namenode</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir</name>
        <value>/usr/local/hadoop/hadoop2_data/hdfs/datanode</value>
    </property>
    <property>
        <name>dfs.namenode.keytab.file</name>
        <value>/usr/local/hadoop/conf/hdfs.keytab</value>
    </property>
    <property>
        <name>dfs.namenode.kerberos.principal</name>
        <value>hdfs/hadoop@DAP.COM</value>
    </property>
    <property>
        <name>dfs.namenode.kerberos.internal.spnego.principal</name>
        <value>HTTP/hadoop@DAP.COM</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir.perm</name>
        <value>700</value>
    </property>
    <property>
        <name>dfs.datanode.address</name>
        <value>0.0.0.0:2004</value>
    </property>
    <property>
        <name>dfs.datanode.http.address</name>
        <value>0.0.0.0:2006</value>
    </property>
    <property>
        <name>dfs.datanode.keytab.file</name>
        <value>/usr/local/hadoop/conf/hdfs.keytab</value>
    </property>
    <property>
        <name>dfs.datanode.kerberos.principal</name>
        <value>hdfs/hadoop@DAP.COM</value>
    </property>
    <property>
        <name>dfs.web.authentication.kerberos.principal</name>
        <value>HTTP/hadoop@DAP.COM</value>
    </property>
</configuration>

yarn-site.xml

<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>ubuntu3:8025</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>ubuntu3:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address</name>
        <value>ubuntu3:8050</value>
    </property>
    <property>
        <name>yarn.resourcemanager.keytab</name>
        <value>/usr/local/hadoop/conf/yarn.keytab</value>
    </property>
    <property>
        <name>yarn.resourcemanager.principal</name>
        <value>yarn/hadoop@DAP.COM</value>
    </property>
    <property>
        <name>yarn.nodemanager.keytab</name>
        <value>/usr/local/hadoop/conf/yarn.keytab</value>
    </property>
    <property>
        <name>yarn.nodemanager.principal</name>
        <value>yarn/hadoop@DAP.COM</value>
    </property>
    <property>
        <name>yarn.nodemanager.container-executor.class</name>
 <value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
    </property>
    <property>
        <name>yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users</name>
        <value>false</value>
    </property>
    <property>
        <name>yarn.nodemanager.linux-container-executor.group</name>
        <value>hadoop</value>
    </property>
</configuration>

hadoop-env.sh

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set Hadoop-specific environment variables here.

# The only required environment variable is JAVA_HOME.  All others are
# optional.  When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes.

# The java implementation to use.
#export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/usr/lib/jvm/java-8-oracle

# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol.  Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
#export JSVC_HOME=/usr/local/hadoop/sbin

export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}

# Extra Java CLASSPATH elements.  Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
  if [ "$HADOOP_CLASSPATH" ]; then
    export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
  else
    export HADOOP_CLASSPATH=$f
  fi
done

# The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE=""

# Extra Java runtime options.  Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

# Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS"

export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS"

export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS"

# The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS"

# On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol.  This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
#export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER}
#export HADOOP_SECURE_DN_USER=hadoop

# Where log files are stored.  $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER

# Where log files are stored in the secure data environment.
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER}

###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS=""

###
# Advanced Users Only!
###

# The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by 
#       the user that will run the hadoop daemons.  Otherwise there is the
#       potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}

# A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
...