Я использую случайный лес для предсказания. Я хочу узнать, что не так в коде y, и правильно ли выполнено кодирование. *
import warnings
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
warnings.filterwarnings("ignore")
train = pd.io.parsers.read_csv("train.csv")
test = pd.io.parsers.read_csv('train.csv')
train.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
train = train.replace('�', 0, regex=True)
test.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
test = test.replace('�', 0, regex=True)
# train_Y = le.transform(train_Y[VCH_CATG].astype(str))
# train_Y = train.VCH_CATG
train_predictor_columns = ['COLOUR', 'FUEL', 'MAKER', 'MAKER_MODEL', 'MANU_YEAR',
'MODEL_NAME','OWNER CODE','OWNER SR','PURCHASE DATE', 'REGN_DATE', 'REGN_TYPE','RTO_CD']
train_X = train[train_predictor_columns]
test_X = test[train_predictor_columns]
# label encode the categorical values and convert them to numbers
le = LabelEncoder()
le.fit(train['VCH_CATG'].astype(str))
train_Y = le.transform(train['VCH_CATG'].astype(str))
for i in train_predictor_columns:
le.fit(train_X[i].astype(str))
train_X[i] = le.transform(train_X[i].astype(str))
test_X[i] = le.transform(test_X[i].astype(str))
# train the model
my_model = RandomForestRegressor()
my_model.fit(train_X, train_Y)
predictions = my_model.predict(test_X)
print(predictions)
print("-------------------------------------------------------------")
print(list(le.inverse_transform(predictions)))
. не помогите мне и не копируйте вставьте пример в официальную документацию