Я написал код для модели линейной регрессии, но он дает меньшую точность, но я ожидаю большей точности, чем текущая точность.
Что нужно сделать для повышения максимальной точности
Код линейной регрессии '' '
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
train_dataset = pd.read_csv("train.csv")
test_dataset = pd.read_csv("test.csv")
train_dataset.isna().sum()
test_dataset.isna().sum()
train_dataset['Age']=train_dataset['Age'].fillna(train_dataset['Age'].mean())
train_dataset['Time_of_service']=train_dataset['Time_of_service'].fillna(train_dataset['Time_of_service'].mean())
train_dataset['Work_Life_balance']=train_dataset['Work_Life_balance'].fillna(train_dataset['Work_Life_bal ance'].mean())
train_dataset['Pay_Scale']=train_dataset['Pay_Scale'].fillna(train_dataset['Pay_Scale'].mean())
train_dataset['VAR2']=train_dataset['VAR2'].fillna(train_dataset['VAR2'].mean())
train_dataset['VAR4']=train_dataset['VAR4'].fillna(train_dataset['VAR4'].mean())
test_dataset['Age']=test_dataset['Age'].fillna(test_dataset['Age'].mean())
test_dataset['Time_of_service']=test_dataset['Time_of_service'].fillna(test_dataset['Time_of_service'].mean())
test_dataset['Work_Life_balance']=test_dataset['Work_Life_balance'].fillna(test_dataset['Work_Life_balance'].mean())
test_dataset['Pay_Scale']=test_dataset['Pay_Scale'].fillna(test_dataset['Pay_Scale'].mean())
test_dataset['VAR2']=test_dataset['VAR2'].fillna(test_dataset['VAR2'].mean())
test_dataset['VAR4']=test_dataset['VAR4'].fillna(test_dataset['VAR4'].mean())
attributes_to_drop=['Employee_ID','Hometown']
train_dataset=train_dataset.drop(attributes_to_drop,axis=1)
test_dataset=test_dataset.drop(attributes_to_drop,axis=1)
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
#label encoding
label_encoder = LabelEncoder()
train_dataset.iloc[:,3] = label_encoder.fit_transform(train_dataset.iloc[:,3])
#label_encoder_2 = LabelEncoder()
train_dataset.iloc[:,4] = label_encoder.fit_transform(train_dataset.iloc[:,4])
#label_encoder_3 = LabelEncoder()
train_dataset.iloc[:,5] = label_encoder.fit_transform(train_dataset.iloc[:,5])
#label_encoder_4 = LabelEncoder()
train_dataset.iloc[:,12] = label_encoder.fit_transform(train_dataset.iloc[:,12])
#label_encoder_5 = LabelEncoder()
train_dataset.iloc[:,0] = label_encoder.fit_transform(train_dataset.iloc[:,0])
label_encoder = LabelEncoder()
test_dataset.iloc[:,3] = label_encoder.fit_transform(test_dataset.iloc[:,3])
#label_encoder_2 = LabelEncoder()
test_dataset.iloc[:,4] = label_encoder.fit_transform(test_dataset.iloc[:,4])
#label_encoder_3 = LabelEncoder()
test_dataset.iloc[:,5] = label_encoder.fit_transform(test_dataset.iloc[:,5])
#label_encoder_4 = LabelEncoder()
test_dataset.iloc[:,12] = label_encoder.fit_transform(test_dataset.iloc[:,12])
#label_encoder_5 = LabelEncoder()
test_dataset.iloc[:,0] = label_encoder.fit_transform(test_dataset.iloc[:,0])
x=train_dataset.iloc[:,:-1]
y=train_dataset.iloc[:,-1]
from sklearn.linear_model import LinearRegression
sim_lin_reg = LinearRegression()
sim_lin_reg.fit(x,y)
y_bpred = sim_lin_reg.predict(test_dataset)
print(y_bpred)
sim_lin_reg.score(x,y)#accuracy of model
' ''
Наблюдаемая точность 0,00546752698619779
Ожидаемая точность 0,75 или более
Как мы можем повысить точность