When I tried to test out different hyperparameter options using GLM
with a tweedie via RandomizedSearchCV, I got this error:
/opt/anaconda/lib/python3.6/site-
packages/statsmodels/genmod/families/family.py in __init__(self,
link, var_power)
1361 link = L.log()
1362 super(Tweedie, self).__init__(
-> 1363 link=link, variance=V.Power(power=var_power *
1.))
1365 def _resid_dev(self, endog, mu):
TypeError: unsupported operand type(s) for *: 'NoneType' and 'float'
******************************************************************
Note: this is part of a pipeline implementation.
When I tried to do the pipeline by itself, the code was working as
expected.
I tried tuning the other parameters in the pipeline, and got the
same error.
#############################################
Background code - Given that statsmodel was not a native model in
scitlearn, I first created the transformer before using it in the
pipeline
#############################################
class GLM(BaseEstimator, RegressorMixin):
def __init__(self, tweedie_power=1.5, intercept=True):
self._power = tweedie_power
self._intercept = intercept
def fit(self, X, y=None):
tweedie = sm.families.Tweedie(var_power=self._power)
if self._intercept:
X["intercept"] = 1
self.model = sm.GLM(y, X, family=tweedie, maxiter=1000).fit()
self.params_ = self.model.params
self.standard_errors_ = self.model.bse
self.pvalues_ = self.model.pvalues
return self
def predict(self, X):
if self._intercept:
X["intercept"] = 1
y = self.model.predict(X)
return y
#
Перед использованием RandomizedSearchCV конвейер, как показано ниже, # функционировал, как ожидалось, и GLM не возвращает никакой ошибки.
#
steps = [("copy", Copy()),
("credit_fix", CreditScoreHistoricalNoHitFix()),
("glm", GLM(tweedie_power=1.1))]
pipe = Pipeline(steps)
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
#
После добавления настройки гиперпараметра с помощью RandomizedSearchCV в виде #lowlow, именно тогда я начал получать эту ошибку # / opt / anaconda / lib / python3.6 / site- # packages / statsmodels / genmod / family / family.py в init (self, link, #var_power)
# 1361 link = L.log()
# 1362 super(Tweedie, self).__init__(
#-> 1363 link=link, variance=V.Power(power=var_power *
1.))
# 1364
# 1365 def _resid_dev(self, endog, mu):
#TypeError: unsupported operand type(s) for *: 'NoneType' and
'float'
#
pipe = Pipeline(steps)
hyperparameters = {
"glm__tweedie_power": [1.5, 1.6]
}
model = RandomizedSearchCV(pipe, hyperparameters)
model.fit(X_train, y_train)
y_pred = pipe.predict(X_test)