Я использую cross_validation_predict
на Google Colab и получаю queue.Empty
с ComplementNB.
Код:
import pandas as pd
import numpy as np
import torch
from torch.nn.functional import one_hot
from sklearn.model_selection import cross_val_predict
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import ComplementNB
class CustomNB:
def fit(self, X,y):
self.clf.fit(X, y)
def predict(self, X):
return self.clf.predict(X)
def get_params(self, deep=True):
return self.clf.get_params(deep)
def set_params(self, **params):
self.clf.set_params(params)
class CustomMultinomialNB(CustomNB):
def __init__(self, alpha=1.0, fit_prior=True, class_prior=None):
self.clf = MultinomialNB(alpha, fit_prior, class_prior)
class CustomComplementNB(CustomNB):
def __init__(self, alpha=1.0, fit_prior=True, class_prior=None):
self.clf = ComplementNB(alpha, fit_prior, class_prior)
np.random.seed(0)
nrs_df = pd.DataFrame({'user_nr':list(range(60)),
'path_nr':[i%30 for i in range(60)]})
acc =[]
for _, df in nrs_df.groupby('user_nr', sort=False):
l=df['path_nr'].tolist()
row = one_hot(torch.LongTensor(l), num_classes=200).sum(axis=0)
acc.append(row)
X = torch.stack(acc, axis=0).numpy()
y = np.random.choice(list("mf"), len(nrs_df['user_nr'].unique()))
#these work
clf = ComplementNB()
clf = MultinomialNB()
clf = CustomMultinomialNB()
#this one does not
clf = CustomComplementNB()
p = cross_val_predict(clf, X, y, cv=10)
С CustomComplementNB
это дает:
Empty Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
795 try:
--> 796 tasks = self._ready_batches.get(block=False)
797 except queue.Empty:
6 frames
/usr/lib/python3.6/queue.py in get(self, block, timeout)
160 if not self._qsize():
--> 161 raise Empty
162 elif timeout is None:
Empty:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-27-8325406c36f4> in <module>()
52 clf = CustomComplementNB()
53
---> 54 p = cross_val_predict(clf, X, y, cv=10)
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py in cross_val_predict(estimator, X, y, groups, cv, n_jobs, verbose, fit_params, pre_dispatch, method)
787 prediction_blocks = parallel(delayed(_fit_and_predict)(
788 clone(estimator), X, y, train, test, verbose, fit_params, method)
--> 789 for train, test in cv.split(X, y, groups))
790
791 # Concatenate the predictions
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in __call__(self, iterable)
1001 # remaining jobs.
1002 self._iterating = False
-> 1003 if self.dispatch_one_batch(iterator):
1004 self._iterating = self._original_iterator is not None
1005
/usr/local/lib/python3.6/dist-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
805 big_batch_size = batch_size * n_jobs
806
--> 807 islice = list(itertools.islice(iterator, big_batch_size))
808 if len(islice) == 0:
809 return False
/usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py in <genexpr>(.0)
787 prediction_blocks = parallel(delayed(_fit_and_predict)(
788 clone(estimator), X, y, train, test, verbose, fit_params, method)
--> 789 for train, test in cv.split(X, y, groups))
790
791 # Concatenate the predictions
/usr/local/lib/python3.6/dist-packages/sklearn/base.py in clone(estimator, safe)
63 for name, param in new_object_params.items():
64 new_object_params[name] = clone(param, safe=False)
---> 65 new_object = klass(**new_object_params)
66 params_set = new_object.get_params(deep=False)
67
TypeError: __init__() got an unexpected keyword argument 'norm'
Причина подклассов в том, что мне изначально требовались обертки, чтобы тренироваться с partial_fit
под капотом. Для отладки я упростил его, так что оболочка практически ничего не делает.