Я построил модель на Python, используя гауссовский наивный байесовский метод.Теперь, когда модель построена, я хочу ввести пользовательский ввод данных и затем проверить результат модели для него.Как мне это сделать?Вот код для модели
# Importing the libraries
# Required Python Machine learning Packages
import pandas as pd
import numpy as np
# For preprocessing the data
from sklearn.preprocessing import Imputer
from sklearn import preprocessing
# To split the dataset into train and test datasets
from sklearn.cross_validation import train_test_split
# To model the Gaussian Navie Bayes classifier
from sklearn.naive_bayes import GaussianNB
# To calculate the accuracy score of the model
from sklearn.metrics import accuracy_score
# importing the data
adult_df = pd.read_csv('adult.data.txt',
header = None, delimiter=' *, *', engine='python')
# adding header to the data for ease of processing
adult_df.columns = ['age', 'workclass', 'fnlwgt', 'education', 'education_num',
'marital_status', 'occupation', 'relationship',
'race', 'sex', 'capital_gain', 'capital_loss',
'hours_per_week', 'native_country', 'income']
# looking for any null values
adult_df.isnull().sum()
#looking for ? value
for value in ['workclass', 'education',
'marital_status', 'occupation',
'relationship','race', 'sex',
'native_country', 'income']:
print (value,":", sum(adult_df[value] == '?'))
adult_df_rev = adult_df
for value in ['workclass', 'education', 'marital_status', 'occupation',
'relationship','race', 'sex','native_country', 'income']:
adult_df_rev[value].replace(['?'], [adult_df_rev.describe(include='all')[value][2]],inplace=True)`enter code here`
# encoding all data for consistency - label encoders
le = preprocessing.LabelEncoder()
workclass_cat = le.fit_transform(adult_df.workclass)
education_cat = le.fit_transform(adult_df.education)
marital_cat = le.fit_transform(adult_df.marital_status)
occupation_cat = le.fit_transform(adult_df.occupation)
relationship_cat = le.fit_transform(adult_df.relationship)
race_cat = le.fit_transform(adult_df.race)
sex_cat = le.fit_transform(adult_df.sex)
native_country_cat = le.fit_transform(adult_df.native_country)
#initialize the encoded categorical columns
adult_df_rev['workclass_cat'] = workclass_cat
adult_df_rev['education_cat'] = education_cat
adult_df_rev['marital_cat'] = marital_cat
adult_df_rev['occupation_cat'] = occupation_cat
adult_df_rev['relationship_cat'] = relationship_cat
adult_df_rev['race_cat'] = race_cat
adult_df_rev['sex_cat'] = sex_cat
adult_df_rev['native_country_cat'] = native_country_cat
#drop the old categorical columns from dataframe
dummy_fields = ['workclass', 'education', 'marital_status',
'occupation', 'relationship', 'race',
'sex', 'native_country']
adult_df_rev = adult_df_rev.drop(dummy_fields, axis = 1)
# re-indexing the columns
adult_df_rev = adult_df_rev.reindex_axis(['age', 'workclass_cat', 'fnlwgt', 'education_cat',
'education_num', 'marital_cat', 'occupation_cat',
'relationship_cat', 'race_cat', 'sex_cat', 'capital_gain',
'capital_loss', 'hours_per_week', 'native_country_cat',
'income'], axis= 1)
adult_df_rev.head(1)
#standardiation of data
num_features = ['age', 'workclass_cat', 'fnlwgt', 'education_cat', 'education_num',
'marital_cat', 'occupation_cat', 'relationship_cat', 'race_cat',
'sex_cat', 'capital_gain', 'capital_loss', 'hours_per_week',
'native_country_cat']
scaled_features = {}
for each in num_features:
mean, std = adult_df_rev[each].mean(), adult_df_rev[each].std()
scaled_features[each] = [mean, std]
adult_df_rev.loc[:, each] = (adult_df_rev[each] - mean)/std
#data slicing
features = adult_df_rev.values[:,:14]
target = adult_df_rev.values[:,14]
features_train, features_test, target_train, target_test =
train_test_split(features,
target, test_size = 0.33, random_state = 10)
clf = GaussianNB()
clf.fit(features_train, target_train)
target_pred = clf.predict(features_test)
Вот некоторые из набора данных
37, Federal-gov, 29054, Some-college, 10, Married-civ-spouse, Adm-clerical, Husband, White, Male, 0, 0, 42, United-States, >50K
34, Private, 304030, HS-grad, 9, Married-civ-spouse, Adm-clerical, Husband, Black, Male, 0, 0, 40, United-States, <=50K
41, Self-emp-not-inc, 143129, Bachelors, 13, Divorced, Exec-managerial, Not-in-family, White, Female, 0, 0, 40, United-States, <=50K
53, ?, 135105, Bachelors, 13, Divorced, ?, Not-in-family, White, Female, 0, 0, 50, United-States, <=50K
31, Private, 99928, Masters, 14, Married-civ-spouse, Prof-specialty, Wife, White, Female, 0, 0, 50, United-States, <=50K
58, State-gov, 109567, Doctorate, 16, Married-civ-spouse, Prof-specialty, Husband, White, Male, 0, 0, 1, United-States, >50K
38, Private, 155222, Some-college, 10, Divorced, Machine-op-inspct, Not-in-family, Black, Female, 0, 0, 28, United-States, <=50K
24, Private, 159567, Some-college, 10, Married-civ-spouse, Machine-op-inspct, Husband, White, Male, 0, 0, 40, United-States, <=50K
41, Local-gov, 523910, Bachelors, 13, Married-civ-spouse, Craft-repair, Husband, Black, Male, 0, 0, 40, United-States, <=50K
Теперь предположим, что я хочу предсказать и проверить выходные данные длявходные данные типа
[42, Local-gov, 254817, Some-college, 10, Never-married, Prof-specialty, Not-in-family, White, Female, 0, 1340, 40, United-States]
Должны отображаться выходные данные, поскольку зарплата составляет <= 50К или> 50К
как мне это сделать сейчас?