Import the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
Импорт набора данных
dataset = pd.read_csv("50_Startups.csv")
x = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]
print(x)
print(y)
Обработка категориальных данных
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder() , [3])], remainder = 'passthrough')
x = np.array(ct.fit_transform(x))
print(x)
Разделение набора данных на обучающий набор и набор тестов
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
print(x_train)
print(x_test)
print(y_train)
print(y_test)
Обучение множественной линейной регрессии на обучающем наборе
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(x_train ,y_train)
Прогнозирование результатов теста
y_pred = regressor.predict(x_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))