Поэтому я пытаюсь разработать программу глубокого обучения, которая может предсказать качество вина на основе проблемы регрессии. Набор данных из https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/. Я прочитал некоторые учебные пособия, но в основном он основан на https://www.datacamp.com/community/tutorials/deep-learning-python.
Этот код написан и работает на colab.research.google.com. Он работает без каких-либо проблем, однако r2_score отрицателен, и я не до конца понимаю, почему мы иногда используем X_test и X [test], например, для прогнозирования r2_score и т. Д.
import matplotlib.pyplot as plt
import h5py # export models in HDF5 format
from keras.datasets import mnist
from keras.utils import np_utils
from keras.layers import Activation, Dense, Dropout
from keras.models import Sequential
from keras import optimizers
from keras import losses
from keras import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
# Read in red wine data
# Read a comma-separated values (csv) file into DataFrame.
red = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv", sep=';')
X = red.drop('quality', axis=1) # Isolate data # Drop specified labels from rows or columns. # same as ix[:,0:11]
Y = red.quality
X = StandardScaler().fit_transform(X) # Scale the data with `StandardScaler
# StandardScaler transforms data such that its distribution will have a mean value 0 and standard deviation of 1.
# Each value in the dataset will have the sample mean value subtracted, and then divided by the standard deviation of the whole dataset.
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
seed = 7
np.random.seed(seed)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
NB_EPOCH = 20
# split up the data into K partitions / K-fold cross-validation
# Generate indices to split data into training and test set
for train, test in kfold.split(X, Y):
model = Sequential() # Initialize the model
model.add(Dense(64, input_dim=11, activation='relu')) # Add input layer
model.add(Dense(1)) # Add output layer
model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
#model.compile(loss='categorical_crossentropy', optimizer=optimizers.SGD(), metrics=['accuracy'])
history = model.fit(X[test], Y[test], validation_split=0.25, epochs=NB_EPOCH, verbose=1)
#history = model.fit(X_train, y_train, validation_split=0.25, epochs=20, verbose=1)
mse_value, mae_value = model.evaluate(X[test], Y[test], verbose=0)
print("Mean Squared Error: "+ str(mse_value)) # quantifies the difference between the estimator and what is estimated
print("Mean Absolute Error: " + str(mae_value)) #quantifies how close predictions are to the eventual outcomes
score = model.evaluate(X_test, y_test, verbose=1)
print("Test score:", score[0])
print('Test accuracy:', score[1])
# generating the graph through matplotlib
# Plot training & validation mea values
fig= plt.figure(figsize=(20,5))
plt.plot(history.history['mean_absolute_error'])
plt.plot(history.history['val_mean_absolute_error'])
plt.title('Model MAE')
plt.ylabel('MAE')
plt.xlabel('Epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
from sklearn.metrics import r2_score
y_pred = model.predict(X[test])
print("this is r2:" + str(r2_score(Y[test], y_pred)))
print(test)
print(X[test])