ValueError: 1D-данные передаются в преобразователь, который ожидает 2D-данные.
Здесь я пытаюсь классифицировать текст 'text'
и в качестве второй функции 'setting'
в 'target'
.
Я не понимаю смысла этой ошибки, я думаю, что все сделал правильно.
Данные в 2D!
Текст - это строка, настройка - строка (но я хочу, чтобы она была категоричной).
def BoW_tokenizer(text):
doc = nlp(text)
tokens = [token for token in doc if not (token.is_stop or token.is_punct)]
tokens = [token.lemma_.lower() if token.lemma_ != "-PRON-"
else token.text.lower() for token in tokens]
return tokens
bow_vec = CountVectorizer(tokenizer=BoW_tokenizer, ngram_range=(1, 2))
tfidf_vec = TfidfTransformer()
lsvc = LinearSVC()
X = train_df[['text', 'setting']]
y = train_df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=8)
preproc = ColumnTransformer([('bow & tf-idf',
make_pipeline(bow_vec, tfidf_vec),
['text']),
('OrdinalEncoder',
OrdinalEncoder(),
['setting'])])
pipe = make_pipeline(preproc, lsvc)
pipe.fit(X_train, y_train)
predicted = pipe.predict(X_test)
Может ли кто-нибудь мне помочь?
Traceback (most recent call last):
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py", line 457, in _fit_transform
self._iter(fitted=fitted, replace_strings=True), 1))
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/joblib/parallel.py", line 1007, in __call__
while self.dispatch_one_batch(iterator):
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/joblib/parallel.py", line 835, in dispatch_one_batch
self._dispatch(tasks)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/joblib/parallel.py", line 754, in _dispatch
job = self._backend.apply_async(batch, callback=cb)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py", line 209, in apply_async
result = ImmediateResult(func)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/joblib/_parallel_backends.py", line 590, in __init__
self.results = batch()
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/joblib/parallel.py", line 256, in __call__
for func, args, kwargs in self.items]
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/joblib/parallel.py", line 256, in <listcomp>
for func, args, kwargs in self.items]
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py", line 728, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/base.py", line 574, in fit_transform
return self.fit(X, y, **fit_params).transform(X)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py", line 629, in fit
self._fit(X)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py", line 74, in _fit
X_list, n_samples, n_features = self._check_X(X)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/preprocessing/_encoders.py", line 43, in _check_X
X_temp = check_array(X, dtype=None)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/utils/validation.py", line 556, in check_array
"if it contains a single sample.".format(array))
ValueError: Expected 2D array, got 1D array instead:
array=['play' 'iot' 'transport' ... 'news' 'play' 'calendar'].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/Users/alessiomassini/Desktop/Hackaton - NLP/Py_Scratch.py", line 122, in <module>
pipe.fit(X_train, y_train)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py", line 350, in fit
Xt, fit_params = self._fit(X, y, **fit_params)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py", line 315, in _fit
**fit_params_steps[name])
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/joblib/memory.py", line 355, in __call__
return self.func(*args, **kwargs)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/pipeline.py", line 728, in _fit_transform_one
res = transformer.fit_transform(X, y, **fit_params)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py", line 518, in fit_transform
result = self._fit_transform(X, y, _fit_transform_one)
File "/Users/alessiomassini/anaconda3/lib/python3.7/site-packages/sklearn/compose/_column_transformer.py", line 460, in _fit_transform
raise ValueError(_ERR_MSG_1DCOLUMN)
ValueError: 1D data passed to a transformer that expects 2D data. Try to specify the column selection as a list of one item instead of a scalar.