Печать отчетов
def display(mess, values):
print()
print("-----", mess, "-----")
print(values)
print("------------------------")
Библиотечные
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
Загрузка из базы данных
health_data = pd.read_csv("C:/Users/??/Downloads/Population(1).csv")
Тестирование и обучение (в процентах)
health_train, health_test = train_test_split(health_data, test_size=0.1)
Столбцы базы данных, которые проходят обучение и тестирование
f_train = health_train[['Age', 'Weight in lbs', 'Height in Inch',
'Units of alcohol per day', 'Cigarettes per day', 'Maritial Status Num',
'Additional People in household', 'Salary', 'ActiveNum']].copy()
f_test = health_test[['Age', 'Weight in lbs', 'Height in Inch',
'Units of alcohol per day', 'Cigarettes per day', 'Maritial Status Num',
'Additional People in household', 'Salary', 'ActiveNum']].copy()
s_train = health_train[['Health Score (high is good)']].copy()
s_test = health_test[['Health Score (high is good)']].copy()
display("features", f_train)
display("Health Score (high is good)", s_train)
Создание наивного байесовского классификатора. По соглашению, olf означает «Классификатор»
clf = GaussianNB()
. Обучите Классификатор, чтобы использовать функции обучения и узнать, как они связывают
с обучением y (видом)
clf.fit(f_train, s_train).predict(f_train)
#correct = 0
#wrong = 0
for index, row in health_test.iterrows():
prediction = clf.predict([row[['Age', 'Weight in lbs', 'Height in Inch',
'Units of alcohol per day', 'Cigarettes per day', 'Maritial Status Num',
'Additional People in household', 'Salary', 'ActiveNum']]])
print("Number of columns ", len(s_test.columns))
print("Number of rows", s_test.shape[0])
#diff = abs(row['Health Score (high is good)'] - prediction)
#if (diff < 10):
#correct = correct + 1
#else:
#wrong = wrong + 1
#total = correct + wrong
#print("Correct ", correct, " wrong", wrong)
#print("Total ", total, " percentage right", (correct*100)/total,"%")