Какие конфигурации требуются для доступа к керберизованной HDFS из приложения pyspark, запущенного в удаленном искровом кластере?
Вот мой код
from pyspark import SparkConf, SparkContext
######
# Get fs handler from java gateway
######
# Create spark context
sc = SparkContext(appName="test-hdfs", conf=conf)
URI = sc._gateway.jvm.java.net.URI
Path = sc._gateway.jvm.org.apache.hadoop.fs.Path
FileSystem = sc._gateway.jvm.org.apache.hadoop.fs.FileSystem
fs = FileSystem.get(URI("hdfs://hadoop.com:8020"), sc._jsc.hadoopConfiguration())
fs.listStatus(Path('/hdfs/dir/'))
Я продолжаю сталкиваться с ошибкой ниже
Traceback (most recent call last):
File "/path/to/file/file.py", line 22, in <module>
fs.listStatus(Path('/hdfs/dir/'))
File "/usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__
File "/usr/local/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o28.listStatus.
: org.apache.hadoop.security.AccessControlException: SIMPLE authentication is not enabled. Available:[TOKEN, KERBEROS]
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
at org.apache.hadoop.hdfs.DFSClient.listPaths(DFSClient.java:2088)
at org.apache.hadoop.hdfs.DFSClient.listPaths(DFSClient.java:2069)