наивный байесовский кросс - PullRequest
0 голосов
/ 22 апреля 2020

Я реализовал наивный метод Байеса без библиотеки sklearn, и я хочу найти оценку перекрестной проверки. Как мне это реализовать? ну, для кросс-балльной оценки требуется классификатор в качестве параметра, и я не знаю, что мне следует включить туда

# for train input caclulating no. of spam mails
for i in range(len(train_target)):
    if (train_target[i] == 1):
        train_spam_count += 1
# for train input caclulating no. of spam mails
for i in range(len(test_target)):
    if (test_target[i] == 1):
        test_spam_count += 1
#calculate the probablity of spam mails:
probablity_train_spam = train_spam_count / len(train_target)
probablity_test_spam = test_spam_count/ len(test_target)
#not spam calculation
probablity_train_not_spam = 1 - probablity_train_spam
probablity_test_not_spam = 1 - probablity_test_spam

#dividing the feature into spam and not spam
mean_train_spam, mean_train_not_spam = [] , []
std_dev_train_spam , std_dev_train_not_spam = [] , []

for each_feature in range(train_input.shape[1]):
    feature_spam , feature_not_spam = []  , []
    for each_feature_row in range(len(train_target)):
        if (train_target[each_feature_row] == 1):
            feature_spam.append(train_input[each_feature_row][each_feature])
        else:
            feature_not_spam.append(train_input[each_feature_row][each_feature])
    mean_train_spam.append(np.mean(feature_spam))
    mean_train_not_spam.append(np.mean(feature_not_spam))
    std_dev_train_spam.append(np.std(feature_spam))
    std_dev_train_not_spam.append(np.std(feature_not_spam))

#3rd Part - Gaussian Equation Implementation

def gauss_value(x,mean,std_deviation):
    if (std_deviation == 0):
        std_deviation = default_standard_deviation
    step_1 = 1.0/float(np.sqrt(2*np.pi)*std_deviation)
    if (step_1 <= 0.0000000000000000000000000000000000001):
        step_1 = 0.0000000000000000000000000000000000001
    step_2 = step_1 * float(np.exp(-((x-mean)**2)/(2*float(std_deviation**2))))

    if (step_2 <= 0.000000000000000000000000000000000000000000001):
        step_2 = 0.00000000000000000000000000000000000000001
    return step_2

class_x = 0
result = [] #to store test output predicted values

for each_row in range(len(test_input)):
    class_1 = np.log(probablity_train_spam)
    class_0 = np.log(probablity_train_not_spam)
    for each_feature in range(test_input.shape[1]):
        x = test_input[each_row][each_feature]
        class_1 += np.log(gauss_value(x,mean_train_spam[each_feature],std_dev_train_spam[each_feature]))
        class_0 += np.log(gauss_value(x,mean_train_not_spam[each_feature],std_dev_train_not_spam[each_feature]))
    class_x = np.argmax([class_0, class_1])
    result.append(class_x)
...