#filter the columns data that we do not want
columns= [c for c in columns if c not in['Job']]
target= 'Job'
X= German_data[columns]
Y= German_data[target]
#Print the shape of x and y
print(X.shape)
print(y.shape)
ВЫХОД :::
(1000, 5)
(1000,)
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
#Define a random state
state=1
#define the outlier detection model
classifiers= {
'Isolation Forest': IsolationForest (max_samples=len(X),
contamination= outliers_fraction,
random_state=state),
'Local Outlier Factor': LocalOutlierFactor(n_neighbors=20,
contamination= outliers_fraction)
}
#fit the model
n_outliers=len(C_pass)
for i,(clf_name,clf) in enumerate(classifiers.items()):
#fit the data and tag outliers
if clf_name=='Local Outlier Factor':
y_pred = clf.fit_predict(X)
scores_pred=clf.negative_Outlier_Factor_
else:
clf.fit(X)
scores_pred=clf.descision_function(X)
y_pred=clf.predict(X)
# reshape the prediction value for 0 to pass and 1 to loss
y_pred[y_pred==1]
y_pred[y_pred==0]
n_errors=[y_pred != Y].sum()
#Run classification metrix
print('{}: {}'.format(clf_name,n_errors))
print(accuracy_score(Y, y_pred))
print(classifiaction_report(Y,y_pred))
ОШИБКА ::
12) clf.fit(X)
raise ValueError("Percentiles must be in the range [0, 100]")
return _quantile_unchecked(
a, q, axis, out, overwrite_input, interpolation, keepdims)
Percentiles must be in the range [0, 100]