Я разделил набор данных следующим образом
X = []
y = []
# first, compute the number of samples in the training set:
n_train = int(len(df) * 0.7)
# The training set is the first n_train samples in the dataset
X_train = df[: n_train]
Y_train = df[: n_train] # INSERT YOUR CODE HERE
# The test set is the remaining samples in the dataset
X_test = df[n_train:]
Y_test = df[n_train:]
# Print the number of samples in the training set
print('The number of samples in the training set:')
# INSERT YOUR CODE HERE
print(len(Y_train))
# Print the number of samples in the test set
print('The number of samples in the test set:')
# INSERT YOUR CODE HERE
print(len(Y_test))
Затем я создал линейную модель, подобную этой
lr = linear_model.LinearRegression()
Но когда я пытаюсь вписать в нее данные своего поезда
lr.fit(X_train, Y_train)
Я получаю эту ошибку
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-65-9d85ca185925> in <module>
2
3 # INSERT YOUR CODE HERE
----> 4 lr.fit(X_train, Y_train)
~\Anaconda3\ana01\lib\site-packages\sklearn\linear_model\base.py in fit(self, X, y, sample_weight)
456 n_jobs_ = self.n_jobs
457 X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
--> 458 y_numeric=True, multi_output=True)
459
460 if sample_weight is not None and np.atleast_1d(sample_weight).ndim > 1:
~\Anaconda3\ana01\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
754 ensure_min_features=ensure_min_features,
755 warn_on_dtype=warn_on_dtype,
--> 756 estimator=estimator)
757 if multi_output:
758 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
~\Anaconda3\ana01\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
550 "Reshape your data either using array.reshape(-1, 1) if "
551 "your data has a single feature or array.reshape(1, -1) "
--> 552 "if it contains a single sample.".format(array))
553
554 # in the future np.flexible dtypes will be handled like object dtypes
ValueError: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
Набор данных
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2938 entries, 0 to 2937
Data columns (total 22 columns):
Country 2938 non-null object
Year 2938 non-null int64
Status 2938 non-null object
Life 2938 non-null float64
Adult Mortality 2938 non-null float64
infant deaths 2938 non-null int64
Alcohol 2938 non-null float64
percentage expenditure 2938 non-null float64
Hepatitis B 2938 non-null float64
Measles 2938 non-null int64
BMI 2938 non-null float64
under-five deaths 2938 non-null int64
Polio 2938 non-null float64
Total expenditure 2938 non-null float64
Diphtheria 2938 non-null float64
HIV/AIDS 2938 non-null float64
GDP 2938 non-null float64
Population 2938 non-null float64
thinness 1-19 years 2938 non-null float64
thinness 5-9 years 2938 non-null float64
Income composition of resources 2938 non-null float64
Schooling 2938 non-null float64
dtypes: float64(16), int64(4), object(2)
memory usage: 505.0+ KB
None
пример набора данных
