Я начинаю разрабатывать веб-приложение, используя django, это приложение предсказывает продажу, для этого я использовал линейную регрессию, у меня есть несколько переменных строки, чтобы обучить модель, я конвертирую все строковые переменные в int, используя метод handle_non_numeric_data ( ). пользователь приложения вводит поле строки в виде строки
метод
def handle_non_numerical_data(df):
columns = df.columns.values
for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]
if df[column].dtype != np.int64 and df[column].dtype != np.float64:
column_contents = df[column].values.tolist()
unique_elements = set(column_contents)
x = 0
for unique in unique_elements:
if unique not in text_digit_vals:
text_digit_vals[unique] = x
x = x + 1
df[column] = list(map(convert_to_int, df[column]))
return df
моя модель
# Libraries
import numpy as np
import pandas as pd
import pickle
from matplotlib import pyplot as plt
from sklearn import metrics
from sklearn import model_selection
#from sklearn import preprocessing
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
#from sklearn.linear_model import Ridge
from sklearn.externals import joblib
# Importing Dataset
data = pd.read_csv('ml_code/ml_process/test.csv')
data.fillna(0, inplace=True)
def handle_non_numerical_data(df):
columns = df.columns.values
for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]
if df[column].dtype != np.int64 and df[column].dtype != np.float64:
column_contents = df[column].values.tolist()
unique_elements = set(column_contents)
x = 0
for unique in unique_elements:
if unique not in text_digit_vals:
text_digit_vals[unique] = x
x = x + 1
df[column] = list(map(convert_to_int, df[column]))
return df
data = handle_non_numerical_data(data)
data = data.as_matrix()
#X matrice des var. explicatives
X = data[:,0:9]
#y vecteur de la var. à prédire
y = data[:,9]
X2_train, X2_test, y2_train, y2_test = train_test_split(X, y, test_size=0.3, random_state=0)
lreg = LinearRegression()
lreg.fit(X2_train, y2_train)
print('Accuracy of linear regression on training set: {:.2f}'.format(lreg.score(X2_train, y2_train)))
print('Accuracy of linear regression on test set: {:.2f}'.format(lreg.score(X2_test, y2_test)))
# Saving the Logistic Regression Model
linear_regression_model = pickle.dumps(lreg)
# Saving the model to a file
#with open('ml_code/linear_regression_model.pkl','wb') as f:
joblib.dump(linear_regression_model, 'ml_code/linear_regression_model.pkl')
для предсказания
import pickle
from sklearn.externals import joblib
linear_regression_model = joblib.load('ml_code/linear_regression_model.pkl')
lreg = pickle.loads(linear_regression_model)
def get_prediction(magasin, numero_article, designation_article, moyen_de_ventes_par_jour, vente_2013,
vente_2014, ventes_2015, ventes_2016, ventes_2017
):
predicted_sales = lreg.predict([[magasin, numero_article, designation_article, moyen_de_ventes_par_jour,
vente_2013, vente_2014, ventes_2015, ventes_2016, ventes_2017
]])
return predicted_sales
но я получаю эту ошибку
ValueError at /
could not convert string to float: 'tea'
Request Method: POST
Request URL: http://127.0.0.1:8000/
Django Version: 2.2
Exception Type: ValueError
Exception Value:
could not convert string to float: 'tea'
Exception Location: C:\Users\hp\AppData\Local\Programs\Python\Python36\dj\f\lib\site-packages\sklearn\utils\validation.py in check_array, line 448
Python Executable: C:\Users\hp\AppData\Local\Programs\Python\Python36\dj\f\Scripts\python.exe
Python Version: 3.6.5
Python Path:
['C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\appweb pred',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\Scripts\\python36.zip',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\DLLs',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\lib',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\Scripts',
'c:\\users\\hp\\appdata\\local\\programs\\python\\python36\\Lib',
'c:\\users\\hp\\appdata\\local\\programs\\python\\python36\\DLLs',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f',
'C:\\Users\\hp\\AppData\\Local\\Programs\\Python\\Python36\\dj\\f\\lib\\site-packages']
Server time: Sat, 27 Apr 2019 03:32:44 +0000
потому что я конвертирую все переменные моей модели,