Я следил за руководством по конвейерам kedro 1 , создал все необходимые файлы, запустил kedro с помощью kedro run --node = preprocess_companies
Он возвращает следующую ошибку ValueError: Pipeline не содержат узлы с именем ['preprocess_companies']. пытался найти решение, но не смог. Спасибо за помощь!
Я использую следующие файлы 3:
1) C: \ Users \ Name \ tutorrr \ src \ tutorrr \ pipelines \ data_engineering \ nodes.py
**import pandas as pd
def _is_true(x):
return x == "t"
def _parse_percentage(x):
if isinstance(x, str):
return float(x.replace("%", "")) / 100
return float("NaN")
def _parse_money(x):
return float(x.replace("$", "").replace(",", ""))
def preprocess_companies(companies: pd.DataFrame) -> pd.DataFrame:
"""Preprocess the data for companies.
Args:
companies: Source data.
Returns:
Preprocessed data.
"""
companies["iata_approved"] = companies["iata_approved"].apply(_is_true)
companies["company_rating"] = companies["company_rating"].apply(_parse_percentage)
return companies
def preprocess_shuttles(shuttles: pd.DataFrame) -> pd.DataFrame:
"""Preprocess the data for shuttles.
Args:
shuttles: Source data.
Returns:
Preprocessed data.
"""
shuttles["d_check_complete"] = shuttles["d_check_complete"].apply(_is_true)
shuttles["moon_clearance_complete"] = shuttles["moon_clearance_complete"].apply(
_is_true
)
shuttles["price"] = shuttles["price"].apply(_parse_money)
return shuttles**
2 C: \ Users \ Name \ tutorrr \ src \ tutorrr.pipeline.py
**from typing import Dict
from kedro.pipeline import Pipeline
from src.tutorrr.pipelines.data_engineering import pipeline as de
def create_pipelines(**kwargs) -> Dict[str, Pipeline]:
"""Create the project's pipeline.
Args:
kwargs: Ignore any additional arguments added in the future.
Returns:
A mapping from a pipeline name to a ``Pipeline`` object.
"""
de_pipeline = de.create_pipeline()
return {
"de": de_pipeline,
"__default__": de_pipeline,
}**
3) C: \ Users \ Name \ tutorrr \ src \ tutorrr \ pipelines \ data_engineering \ pipeline.py
**from kedro.pipeline import node, Pipeline
from src.tutorrr.pipelines.data_engineering.nodes import (
preprocess_companies,
preprocess_shuttles,
)
def create_pipeline(**kwargs):
return Pipeline(
[
node(
func=preprocess_companies,
inputs="companies",
outputs="preprocessed_companies",
name="preprocessing_companies",
),
node(
func=preprocess_shuttles,
inputs="shuttles",
outputs="preprocessed_shuttles",
name="preprocessing_shuttles",
),
]
)**
И вот что я получаю:
new_env) C:\Users\Name\tutorrr>kedro run --node=preprocess_companies
Traceback (most recent call last):
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\kedro\cli\cli.py", line 594, in load_entry_points
entry_point_commands.append(entry_point.load())
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\pkg_resources\__init__.py", line 2449, in load
self.require(*args, **kwargs)
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\pkg_resources\__init__.py", line 2472, in require
items = working_set.resolve(reqs, env, installer, extras=self.extras)
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\pkg_resources\__init__.py", line 792, in resolve
raise VersionConflict(dist, req).with_context(dependent_req)
pkg_resources.VersionConflict: (python-dateutil 2.8.1 (c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages), Requirement.parse('python-dateutil==2.8.0'))
Error: Loading global commands from kedro-viz = kedro_viz.server:commands
2020-05-07 11:30:15,999 - root - INFO - ** Kedro project tutorrr
Traceback (most recent call last):
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "C:\Users\Name\AppData\Local\Continuum\anaconda4\envs\new_env\Scripts\kedro.exe\__main__.py", line 7, in <module>
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\kedro\cli\cli.py", line 638, in main
("Project specific commands", project_groups),
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\click\core.py", line 829, in __call__
return self.main(*args, **kwargs)
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\click\core.py", line 782, in main
rv = self.invoke(ctx)
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\click\core.py", line 1259, in invoke
return _process_result(sub_ctx.command.invoke(sub_ctx))
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\click\core.py", line 1066, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\click\core.py", line 610, in invoke
return callback(*args, **kwargs)
File "C:\Users\Name\tutorrr\kedro_cli.py", line 278, in run
pipeline_name=pipeline,
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\kedro\context\context.py", line 459, in run
from_inputs=from_inputs,
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\kedro\context\context.py", line 375, in _filter_pipeline
new_pipeline &= pipeline.only_nodes(*node_names)
File "c:\users\Name\appdata\local\continuum\anaconda4\envs\new_env\lib\site-packages\kedro\pipeline\pipeline.py", line 460, in only_nodes
list(unregistered_nodes)
ValueError: Pipeline does not contain nodes named ['preprocess_companies'].