Я использую Sklearn и Spacy для создания модели машинного обучения НЛП. Но у меня есть ошибка распараллеливания, когда я тренирую свою модель с классом RandomizedSearchCV()
.
Мой класс TextProcessor
позволяет мне обрабатывать текст с помощью библиотеки Spacy.
class TextProcessor(BaseEstimator, TransformerMixin):
def __init__(self, remove_stop_word=False):
self.remove_stop_word = remove_stop_word
self.nlp = spacy.load('en')
self.punctuations = string.punctuation
def spacy_text_processing(self, sentence):
'''
This function allow to process the text with spacy
'''
final_sentence = []
for word in self.nlp(sentence):
if self.remove_stop_word:
if word.is_stop:
continue
if word.text not in self.punctuations:
final_sentence.append(word.lemma_)
return final_sentence
def transform(self, X, y=None):
X_transformed = []
for sentence in X:
X_transformed.append(' '.join(self.spacy_text_processing(sentence)))
return X_transformed
def fit(self, X, y=None):
return self
После этого я использую конвейер sklearn для выполнения другой обработки текста и, наконец, добавляю модель SVR (ошибка возникает с любым типом модели). Но когда я использую параметр n_jobs
со значением, отличным от 1, я получаю ошибку распараллеливания.
param_grid = {...}
svr_model = Pipeline([('text_processing', TextProcessor()),
('vectorizer', CountVectorizer()),
('tfidf', TfidfTransformer()),
('svr', SVR())])
random_search_svr = RandomizedSearchCV(svr_model, param_grid, scoring='neg_mean_absolute_error', n_jobs=-1)
random_search_svr.fit(X_train, y_train)
Эта проблема очень раздражает, потому что модели обучения с такими классами, как GridSearchCV()
и RandomizedSearchCV()
, принимаютмного времени. Будет ли какой-либо способ решить проблему или обойти ее?
Переменные X_train и y_train содержат следующие примеры значений:
X_train = ["Morrisons book second consecutive quarter of sales growth", "Glencore to refinance its short-term debt early, shares rise", ...] #List of sentences
y_train = [0.43, 0.34, ...] #Sentiment between -1 and 1 associate to the sentence
Ошибка:
Exception in thread QueueFeederThread:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\backend\queues.py", line 150, in _feed
obj_ = dumps(obj, reducers=reducers)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\backend\reduction.py", line 243, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\backend\reduction.py", line 236, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\cloudpickle\cloudpickle.py", line 284, in dump
return Pickler.dump(self, obj)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 437, in dump
self.save(obj)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 887, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 816, in save_list
self._batch_appends(obj)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 843, in _batch_appends
save(tmp[0])
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 771, in save_tuple
save(element)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 771, in save_tuple
save(element)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 816, in save_list
self._batch_appends(obj)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 840, in _batch_appends
save(x)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 771, in save_tuple
save(element)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 786, in save_tuple
save(element)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 524, in save
rv = reduce(self.proto)
File "stringsource", line 2, in preshed.maps.PreshMap.__reduce_cython__
TypeError: self.c_map cannot be converted to a Python object for pickling
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\threading.py", line 917, in _bootstrap_inner
self.run()
File "C:\ProgramData\Anaconda3\lib\threading.py", line 865, in run
self._target(*self._args, **self._kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\backend\queues.py", line 175, in _feed
onerror(e, obj)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\process_executor.py", line 310, in _on_queue_feeder_error
self.thread_wakeup.wakeup()
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\process_executor.py", line 155, in wakeup
self._writer.send_bytes(b"")
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 183, in send_bytes
self._check_closed()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py", line 136, in _check_closed
raise OSError("handle is closed")
OSError: handle is closed
---------------------------------------------------------------------------
_RemoteTraceback Traceback (most recent call last)
_RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\backend\queues.py", line 150, in _feed
obj_ = dumps(obj, reducers=reducers)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\backend\reduction.py", line 243, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\loky\backend\reduction.py", line 236, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\externals\cloudpickle\cloudpickle.py", line 284, in dump
return Pickler.dump(self, obj)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 437, in dump
self.save(obj)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 887, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 816, in save_list
self._batch_appends(obj)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 843, in _batch_appends
save(tmp[0])
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 771, in save_tuple
save(element)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 771, in save_tuple
save(element)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 816, in save_list
self._batch_appends(obj)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 840, in _batch_appends
save(x)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 771, in save_tuple
save(element)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 856, in save_dict
self._batch_setitems(obj.items())
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 882, in _batch_setitems
save(v)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 549, in save
self.save_reduce(obj=obj, *rv)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 662, in save_reduce
save(state)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 504, in save
f(self, obj) # Call unbound method with explicit self
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 786, in save_tuple
save(element)
File "C:\ProgramData\Anaconda3\lib\pickle.py", line 524, in save
rv = reduce(self.proto)
File "stringsource", line 2, in preshed.maps.PreshMap.__reduce_cython__
TypeError: self.c_map cannot be converted to a Python object for pickling
"""
The above exception was the direct cause of the following exception:
PicklingError Traceback (most recent call last)
<ipython-input-12-8979d799633f> in <module>
15
16 random_search_svr = RandomizedSearchCV(svr_grid_model, param_grid_svr,scoring='neg_mean_absolute_error',n_jobs=-1)
---> 17 random_search_svr.fit(X_train, y_train)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
720 return results_container[0]
721
--> 722 self._run_search(evaluate_candidates)
723
724 results = results_container[0]
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1513 evaluate_candidates(ParameterSampler(
1514 self.param_distributions, self.n_iter,
-> 1515 random_state=self.random_state))
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
709 for parameters, (train, test)
710 in product(candidate_params,
--> 711 cv.split(X, y, groups)))
712
713 all_candidate_params.extend(candidate_params)
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
928
929 with self._backend.retrieval_context():
--> 930 self.retrieve()
931 # Make sure that we get a last message telling us we are done
932 elapsed_time = time.time() - self._start_time
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in retrieve(self)
831 try:
832 if getattr(self._backend, 'supports_timeout', False):
--> 833 self._output.extend(job.get(timeout=self.timeout))
834 else:
835 self._output.extend(job.get())
C:\ProgramData\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in wrap_future_result(future, timeout)
519 AsyncResults.get from multiprocessing."""
520 try:
--> 521 return future.result(timeout=timeout)
522 except LokyTimeoutError:
523 raise TimeoutError()
C:\ProgramData\Anaconda3\lib\concurrent\futures\_base.py in result(self, timeout)
423 raise CancelledError()
424 elif self._state == FINISHED:
--> 425 return self.__get_result()
426
427 self._condition.wait(timeout)
C:\ProgramData\Anaconda3\lib\concurrent\futures\_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
PicklingError: Could not pickle the task to send it to the workers.
Версия:
- Python: 3.7.1
- Пространство: 2.2.1
- Sklearn: 0,20,1