Мне нужна помощь в настройке установки hdfs в качестве хранилища артефактов для mlflow. У меня mlflow и hdfs работают в отдельных контейнерах через сеть docket. Когда я пытаюсь зарегистрировать модель, я получаю следующую ошибку:
FileNotFoundError Traceback (most recent call last)
<ipython-input-35-e54b25688d8e> in <module>
1 # log model artifacts
----> 2 pyfunc.log_model('hdfs://hdfs:8020/', python_model=LGBWrapper(), artifacts=artifacts, conda_env=conda_env)
3 # pyfunc.save_model('prediction_model8', python_model=LGBWrapper(), artifacts=artifacts, conda_env=conda_env)
4
5 # set tag for selecting model
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/mlflow/pyfunc/__init__.py in log_model(artifact_path, loader_module, data_path, code_path, conda_env, python_model, artifacts, registered_model_name)
697 artifacts=artifacts,
698 conda_env=conda_env,
--> 699 registered_model_name=registered_model_name)
700
701
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/mlflow/models/__init__.py in log(cls, artifact_path, flavor, registered_model_name, **kwargs)
100 mlflow_model = cls(artifact_path=artifact_path, run_id=run_id)
101 flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)
--> 102 mlflow.tracking.fluent.log_artifacts(local_path, artifact_path)
103 try:
104 mlflow.tracking.fluent._record_logged_model(mlflow_model)
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/mlflow/tracking/fluent.py in log_artifacts(local_dir, artifact_path)
321 """
322 run_id = _get_or_start_run().info.run_id
--> 323 MlflowClient().log_artifacts(run_id, local_dir, artifact_path)
324
325
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/mlflow/tracking/client.py in log_artifacts(self, run_id, local_dir, artifact_path)
265 :param artifact_path: If provided, the directory in ``artifact_uri`` to write to.
266 """
--> 267 self._tracking_client.log_artifacts(run_id, local_dir, artifact_path)
268
269 def _record_logged_model(self, run_id, mlflow_model):
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/mlflow/tracking/_tracking_service/client.py in log_artifacts(self, run_id, local_dir, artifact_path)
266 run = self.get_run(run_id)
267 artifact_repo = get_artifact_repository(run.info.artifact_uri)
--> 268 artifact_repo.log_artifacts(local_dir, artifact_path)
269
270 def list_artifacts(self, run_id, path=None):
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/mlflow/store/artifact/hdfs_artifact_repo.py in log_artifacts(self, local_dir, artifact_path)
47 hdfs_base_path = _resolve_base_path(self.path, artifact_path)
48
---> 49 with hdfs_system(host=self.host, port=self.port) as hdfs:
50
51 if not hdfs.exists(hdfs_base_path):
~/opt/anaconda3/envs/soptai/lib/python3.6/contextlib.py in __enter__(self)
79 def __enter__(self):
80 try:
---> 81 return next(self.gen)
82 except StopIteration:
83 raise RuntimeError("generator didn't yield") from None
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/mlflow/store/artifact/hdfs_artifact_repo.py in hdfs_system(host, port)
175 driver=driver,
176 kerb_ticket=kerb_ticket,
--> 177 extra_conf=extra_conf)
178 yield connected
179 connected.close()
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/pyarrow/hdfs.py in connect(host, port, user, kerb_ticket, driver, extra_conf)
213 fs = HadoopFileSystem(host=host, port=port, user=user,
214 kerb_ticket=kerb_ticket, driver=driver,
--> 215 extra_conf=extra_conf)
216 return fs
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/pyarrow/hdfs.py in __init__(self, host, port, user, kerb_ticket, driver, extra_conf)
36 driver='libhdfs', extra_conf=None):
37 if driver == 'libhdfs':
---> 38 _maybe_set_hadoop_classpath()
39
40 self._connect(host, port, user, kerb_ticket, driver, extra_conf)
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/pyarrow/hdfs.py in _maybe_set_hadoop_classpath()
138 classpath = _hadoop_classpath_glob(hadoop_bin)
139 else:
--> 140 classpath = _hadoop_classpath_glob('hadoop')
141
142 os.environ['CLASSPATH'] = classpath.decode('utf-8')
~/opt/anaconda3/envs/soptai/lib/python3.6/site-packages/pyarrow/hdfs.py in _hadoop_classpath_glob(hadoop_bin)
163
164 hadoop_classpath_args = (hadoop_bin, 'classpath', '--glob')
--> 165 return subprocess.check_output(hadoop_classpath_args)
166
167
~/opt/anaconda3/envs/soptai/lib/python3.6/subprocess.py in check_output(timeout, *popenargs, **kwargs)
354
355 return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
--> 356 **kwargs).stdout
357
358
~/opt/anaconda3/envs/soptai/lib/python3.6/subprocess.py in run(input, timeout, check, *popenargs, **kwargs)
421 kwargs['stdin'] = PIPE
422
--> 423 with Popen(*popenargs, **kwargs) as process:
424 try:
425 stdout, stderr = process.communicate(input, timeout=timeout)
~/opt/anaconda3/envs/soptai/lib/python3.6/subprocess.py in __init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors)
727 c2pread, c2pwrite,
728 errread, errwrite,
--> 729 restore_signals, start_new_session)
730 except:
731 # Cleanup if the child failed starting.
~/opt/anaconda3/envs/soptai/lib/python3.6/subprocess.py in _execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
1362 if errno_num == errno.ENOENT:
1363 err_msg += ': ' + repr(err_filename)
-> 1364 raise child_exception_type(errno_num, err_msg, err_filename)
1365 raise child_exception_type(err_msg)
1366
FileNotFoundError: [Errno 2] No such file or directory: 'hadoop': 'hadoop'
Доступ к hdfs не является проблемой, поскольку они находятся в той же сети, и другие службы, работающие в той же сети, также могут получить доступ к hdfs. Возможно, необходимо внести некоторые изменения в core-site. xml или hdfs-site. xml, как предложил тот, кто сообщил о подобной проблеме (https://github.com/mlflow/mlflow/issues/1466). К сожалению, я понятия не имею, какими должны быть эти изменения. Пожалуйста, помогите!