Я использую универсальный кодировщик предложений google в конвейере scikit, но я сталкиваюсь со следующей ошибкой: TypeError: не могу выбрать объекты _thread.RLock Я думаю, что дело в глубокой копировании объекта тензорного потока, используемого в качестве универсального кодировщика. Вот некоторые детали кода и ошибки:
module = hub.Module(href)
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" #@param ["https://tfhub.dev/google/universal-sentence-encoder/4", "https://tfhub.dev/google/universal-sentence-encoder-large/5"]
model = hub.load(module_url)
print ("module %s loaded" % module_url)
def embed(input):
return model(input)
class UnivEmbedding( BaseEstimator, TransformerMixin ):
'''
Universal embedding for pipeline
ATTENZIONE per clonare o slavare la classe, settare module=None
es: pipe_svm_clf.set_params(embed__module=None)
'''
#Class Constructor
def __init__( self, module, use_light=True, verbose=False):
self.module = module
self.use_light= use_light
self.verbose= verbose
#Return self nothing else to do here
def fit( self, X, y = None ):
return self
#Method that describes what we need this transformer to do
def transform( self, X, y = None ):
return embed(X) # universal_embedding (self.module, X, self.use_light, self.verbose)
def fit_transform( self, X, y = None ):
if self.verbose: print(self.module)
return embed(X) #universal_embedding (self.module, X, self.use_light, self.verbose)
def get_params(self, deep=True):
return {"module": self.module, "use_light": self.use_light, "verbose": self.verbose}
def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value)
return self
clf = StackingClassifier(estimators=estimators, final_estimator=RandomForestClassifier(
bootstrap=True, ccp_alpha=0.0, class_weight=None,
criterion='gini', max_depth=2, max_features=None,
max_leaf_nodes=2, max_samples=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=2, min_samples_split=3,
min_weight_fraction_leaf=0.0, n_estimators=200,
n_jobs=None, oob_score=False, random_state=0, verbose=0,
warm_start=False),
passthrough= False)
... ...
for train_index, test_index in cv.split(messages, y_real):
univ_emb= UnivEmbedding(module, use_light=False, verbose=False)
X_train= np.array(messages)[train_index]
y_train= y_real[train_index]
X_test= np.array(messages)[test_index]
y_test= y_real[test_index]
pipe_rnd_clf= Pipeline(
[("embed", univ_emb) ,
("rnd_clf", clone(rnd_clf))])
estimators = [
('rnd', pipe_rnd_clf),
('ada', pipe_ada_rnd_clf),
('bag', pipe_rnd_clf_bag)
]
clf = StackingClassifier(estimators=estimators, final_estimator=RandomForestClassifier(
bootstrap=True, ccp_alpha=0.0, class_weight=None,
criterion='gini', max_depth=2, max_features=None,
max_leaf_nodes=2, max_samples=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=2, min_samples_split=3,
min_weight_fraction_leaf=0.0, n_estimators=200,
n_jobs=None, oob_score=False, random_state=0, verbose=0,
warm_start=False),
passthrough= False)
print(clf.named_estimators)
clf.fit(X_train, y_train)
Я получаю следующую ошибку:
---------------------------------------------------------------------------
Empty Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
796 try:
--> 797 tasks = self._ready_batches.get(block=False)
798 except queue.Empty:
22 frames
/usr/lib/python3.6/queue.py in get(self, block, timeout)
160 if not self._qsize():
--> 161 raise Empty
162 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-30-6a97c8d96f75> in <module>()
45 print(clf.named_estimators)
46
---> 47 clf.fit(X_train, y_train)
48
49 y_pred_test = clf.predict(X_test)
/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/_stacking.py in fit(self, X, y, sample_weight)
411 self._le = LabelEncoder().fit(y)
412 self.classes_ = self._le.classes_
--> 413 return super().fit(X, self._le.transform(y), sample_weight)
414
415 @if_delegate_has_method(delegate='final_estimator_')
/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/_stacking.py in fit(self, X, y, sample_weight)
139 self.estimators_ = Parallel(n_jobs=self.n_jobs)(
140 delayed(_parallel_fit_estimator)(clone(est), X, y, sample_weight)
--> 141 for est in all_estimators if est != 'drop'
142 )
143
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in __call__(self, iterable)
1002 # remaining jobs.
1003 self._iterating = False
-> 1004 if self.dispatch_one_batch(iterator):
1005 self._iterating = self._original_iterator is not None
1006
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
806 big_batch_size = batch_size * n_jobs
807
--> 808 islice = list(itertools.islice(iterator, big_batch_size))
809 if len(islice) == 0:
810 return False
/usr/local/lib/python3.6/dist-packages/sklearn/ensemble/_stacking.py in <genexpr>(.0)
139 self.estimators_ = Parallel(n_jobs=self.n_jobs)(
140 delayed(_parallel_fit_estimator)(clone(est), X, y, sample_weight)
--> 141 for est in all_estimators if est != 'drop'
142 )
143
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in clone(estimator, safe)
69 new_object_params = estimator.get_params(deep=False)
70 for name, param in new_object_params.items():
---> 71 new_object_params[name] = clone(param, safe=False)
72 new_object = klass(**new_object_params)
73 params_set = new_object.get_params(deep=False)
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in clone(estimator, safe)
57 # XXX: not handling dictionaries
58 if estimator_type in (list, tuple, set, frozenset):
---> 59 return estimator_type([clone(e, safe=safe) for e in estimator])
60 elif not hasattr(estimator, 'get_params') or isinstance(estimator, type):
61 if not safe:
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in <listcomp>(.0)
57 # XXX: not handling dictionaries
58 if estimator_type in (list, tuple, set, frozenset):
---> 59 return estimator_type([clone(e, safe=safe) for e in estimator])
60 elif not hasattr(estimator, 'get_params') or isinstance(estimator, type):
61 if not safe:
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in clone(estimator, safe)
57 # XXX: not handling dictionaries
58 if estimator_type in (list, tuple, set, frozenset):
---> 59 return estimator_type([clone(e, safe=safe) for e in estimator])
60 elif not hasattr(estimator, 'get_params') or isinstance(estimator, type):
61 if not safe:
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in <listcomp>(.0)
57 # XXX: not handling dictionaries
58 if estimator_type in (list, tuple, set, frozenset):
---> 59 return estimator_type([clone(e, safe=safe) for e in estimator])
60 elif not hasattr(estimator, 'get_params') or isinstance(estimator, type):
61 if not safe:
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in clone(estimator, safe)
69 new_object_params = estimator.get_params(deep=False)
70 for name, param in new_object_params.items():
---> 71 new_object_params[name] = clone(param, safe=False)
72 new_object = klass(**new_object_params)
73 params_set = new_object.get_params(deep=False)
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in clone(estimator, safe)
60 elif not hasattr(estimator, 'get_params') or isinstance(estimator, type):
61 if not safe:
---> 62 return copy.deepcopy(estimator)
63 else:
64 raise TypeError("Cannot clone object '%s' (type %s): "
/usr/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
178 y = x
179 else:
--> 180 y = _reconstruct(x, memo, *rv)
181
182 # If is its own copy, don't memoize.
/usr/lib/python3.6/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
278 if state is not None:
279 if deep:
--> 280 state = deepcopy(state, memo)
281 if hasattr(y, '__setstate__'):
282 y.__setstate__(state)
/usr/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
/usr/lib/python3.6/copy.py in _deepcopy_dict(x, memo, deepcopy)
238 memo[id(x)] = y
239 for key, value in x.items():
--> 240 y[deepcopy(key, memo)] = deepcopy(value, memo)
241 return y
242 d[dict] = _deepcopy_dict
/usr/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
178 y = x
179 else:
--> 180 y = _reconstruct(x, memo, *rv)
181
182 # If is its own copy, don't memoize.
/usr/lib/python3.6/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
278 if state is not None:
279 if deep:
--> 280 state = deepcopy(state, memo)
281 if hasattr(y, '__setstate__'):
282 y.__setstate__(state)
/usr/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
148 copier = _deepcopy_dispatch.get(cls)
149 if copier:
--> 150 y = copier(x, memo)
151 else:
152 try:
/usr/lib/python3.6/copy.py in _deepcopy_dict(x, memo, deepcopy)
238 memo[id(x)] = y
239 for key, value in x.items():
--> 240 y[deepcopy(key, memo)] = deepcopy(value, memo)
241 return y
242 d[dict] = _deepcopy_dict
/usr/lib/python3.6/copy.py in deepcopy(x, memo, _nil)
167 reductor = getattr(x, "__reduce_ex__", None)
168 if reductor:
--> 169 rv = reductor(4)
170 else:
171 reductor = getattr(x, "__reduce__", None)
TypeError: can't pickle _thread.RLock objects
Я считаю, что проблема звонит:
from sklearn.base import clone
clone(univ_emb)
Есть ли у enyone какие-либо идеи? Спасибо