Я реализовал наивный метод Байеса без библиотеки sklearn, и я хочу найти оценку перекрестной проверки. Как мне это реализовать? ну, для кросс-балльной оценки требуется классификатор в качестве параметра, и я не знаю, что мне следует включить туда
# for train input caclulating no. of spam mails
for i in range(len(train_target)):
if (train_target[i] == 1):
train_spam_count += 1
# for train input caclulating no. of spam mails
for i in range(len(test_target)):
if (test_target[i] == 1):
test_spam_count += 1
#calculate the probablity of spam mails:
probablity_train_spam = train_spam_count / len(train_target)
probablity_test_spam = test_spam_count/ len(test_target)
#not spam calculation
probablity_train_not_spam = 1 - probablity_train_spam
probablity_test_not_spam = 1 - probablity_test_spam
#dividing the feature into spam and not spam
mean_train_spam, mean_train_not_spam = [] , []
std_dev_train_spam , std_dev_train_not_spam = [] , []
for each_feature in range(train_input.shape[1]):
feature_spam , feature_not_spam = [] , []
for each_feature_row in range(len(train_target)):
if (train_target[each_feature_row] == 1):
feature_spam.append(train_input[each_feature_row][each_feature])
else:
feature_not_spam.append(train_input[each_feature_row][each_feature])
mean_train_spam.append(np.mean(feature_spam))
mean_train_not_spam.append(np.mean(feature_not_spam))
std_dev_train_spam.append(np.std(feature_spam))
std_dev_train_not_spam.append(np.std(feature_not_spam))
#3rd Part - Gaussian Equation Implementation
def gauss_value(x,mean,std_deviation):
if (std_deviation == 0):
std_deviation = default_standard_deviation
step_1 = 1.0/float(np.sqrt(2*np.pi)*std_deviation)
if (step_1 <= 0.0000000000000000000000000000000000001):
step_1 = 0.0000000000000000000000000000000000001
step_2 = step_1 * float(np.exp(-((x-mean)**2)/(2*float(std_deviation**2))))
if (step_2 <= 0.000000000000000000000000000000000000000000001):
step_2 = 0.00000000000000000000000000000000000000001
return step_2
class_x = 0
result = [] #to store test output predicted values
for each_row in range(len(test_input)):
class_1 = np.log(probablity_train_spam)
class_0 = np.log(probablity_train_not_spam)
for each_feature in range(test_input.shape[1]):
x = test_input[each_row][each_feature]
class_1 += np.log(gauss_value(x,mean_train_spam[each_feature],std_dev_train_spam[each_feature]))
class_0 += np.log(gauss_value(x,mean_train_not_spam[each_feature],std_dev_train_not_spam[each_feature]))
class_x = np.argmax([class_0, class_1])
result.append(class_x)