Я пытаюсь запустить этот код, и у меня есть эта ошибка: ValueError: Ошибка при проверке цели: ожидалось, что dens_19 будет иметь форму (28,), но получил массив с формой (1,). Я не знаю, какая раздел модели имеет проблему, что я получаю эту ошибку. Я новичок ie в python программировании и глубоком обучении. Какие могут быть возможные причины этой ошибки?
# Downloading training and test sets to local drive
try:
training_set_path = get_file('KDDTrain%2B.csv', origin='https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTrain%2B.csv')
except:
print('Error downloading')
raise
try:
test_set_path = get_file('KDDTest%2B.csv', origin='https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTest%2B.csv')
except:
print('Error downloading')
raise
training_df = pd.read_csv(training_set_path, header=None)
testing_df = pd.read_csv(test_set_path, header=None)
#######################################################################################
#Since the CSV files don't contain a header we'll need to assign column names ourselves.
columns = [
'duration',
'protocol_type',
'service',
'flag',
'src_bytes',
'dst_bytes',
'land',
'wrong_fragment',
'urgent',
'hot',
'num_failed_logins',
'logged_in',
'num_compromised',
'root_shell',
'su_attempted',
'num_root',
'num_file_creations',
'num_shells',
'num_access_files',
'num_outbound_cmds',
'is_host_login',
'is_guest_login',
'count',
'srv_count',
'serror_rate',
'srv_serror_rate',
'rerror_rate',
'srv_rerror_rate',
'same_srv_rate',
'diff_srv_rate',
'srv_diff_host_rate',
'dst_host_count',
'dst_host_srv_count',
'dst_host_same_srv_rate',
'dst_host_diff_srv_rate',
'dst_host_same_src_port_rate',
'dst_host_srv_diff_host_rate',
'dst_host_serror_rate',
'dst_host_srv_serror_rate',
'dst_host_rerror_rate',
'dst_host_srv_rerror_rate',
'outcome',
'difficulty'
]
training_df.columns = columns
testing_df.columns = columns
#print("Training set has {} rows.".format(len(training_df)))
#print("Testing set has {} rows.".format(len(testing_df)))
training_outcomes=training_df["outcome"].unique()
testing_outcomes=testing_df["outcome"].unique()
#print("The training set has {} possible outcomes \n".format(len(training_outcomes)) )
#print(", ".join(training_outcomes)+".")
#print("\nThe testing set has {} possible outcomes \n".format(len(testing_outcomes)))
#print(", ".join(testing_outcomes)+".")
##############################################################
# A list ot attack names that belong to each general attack type
dos_attacks=["snmpgetattack","back","land","neptune","smurf","teardrop","pod","apache2","udpstorm","processtable","mailbomb"]
r2l_attacks=["snmpguess","worm","httptunnel","named","xlock","xsnoop","sendmail","ftp_write","guess_passwd","imap","multihop","phf","spy","warezclient","warezmaster"]
u2r_attacks=["sqlattack","buffer_overflow","loadmodule","perl","rootkit","xterm","ps"]
probe_attacks=["ipsweep","nmap","portsweep","satan","saint","mscan"]
# Our new labels
classes=["Normal","Dos","R2L","U2R","Probe"]
#Helper function to label samples to 5 classes
def label_attack (row):
if row["outcome"] in dos_attacks:
return classes[1]
if row["outcome"] in r2l_attacks:
return classes[2]
if row["outcome"] in u2r_attacks:
return classes[3]
if row["outcome"] in probe_attacks:
return classes[4]
return classes[0]
#We combine the datasets temporarily to do the labeling
test_samples_length = len(testing_df)
df=pd.concat([training_df,testing_df])
df["Class"]=df.apply(label_attack,axis=1)
# The old outcome field is dropped since it was replaced with the Class field, the difficulty field will be dropped as well.
df=df.drop("outcome",axis=1)
df=df.drop("difficulty",axis=1)
# we again split the data into training and test sets.
training_df= df.iloc[:-test_samples_length, :]
testing_df= df.iloc[-test_samples_length:,:]
#Let's take a look at the new labels
training_outcomes=training_df["Class"].unique()
testing_outcomes=testing_df["Class"].unique()
#print("The training set has {} possible outcomes \n".format(len(training_outcomes)) )
#print(", ".join(training_outcomes)+".")
#print("\nThe testing set has {} possible outcomes \n".format(len(testing_outcomes)))
#print(", ".join(testing_outcomes)+".")
########################################################################################
# Helper function for scaling continous values
def minmax_scale_values(training_df,testing_df, col_name):
scaler = MinMaxScaler()
scaler = scaler.fit(training_df[col_name].reshape(-1, 1))
train_values_standardized = scaler.transform(training_df[col_name].reshape(-1, 1))
training_df[col_name] = train_values_standardized
test_values_standardized = scaler.transform(testing_df[col_name].reshape(-1, 1))
testing_df[col_name] = test_values_standardized
#Helper function for one hot encoding
def encode_text(training_df,testing_df, name):
training_set_dummies = pd.get_dummies(training_df[name])
testing_set_dummies = pd.get_dummies(testing_df[name])
for x in training_set_dummies.columns:
dummy_name = "{}_{}".format(name, x)
training_df[dummy_name] = training_set_dummies[x]
if x in testing_set_dummies.columns :
testing_df[dummy_name]=testing_set_dummies[x]
else :
testing_df[dummy_name]=np.zeros(len(testing_df))
training_df.drop(name, axis=1, inplace=True)
testing_df.drop(name, axis=1, inplace=True)
sympolic_columns=["protocol_type","service","flag"]
label_column="Class"
for column in df.columns :
if column in sympolic_columns:
encode_text(training_df,testing_df,column)
elif not column == label_column:
minmax_scale_values(training_df,testing_df, column)
#training_df.head(5)
#testing_df.head(5)
#############################################################################################
#Next we extract the values from the pandas dataframes as Numpy arrays, where :
#x holds the features of the training dataset
#y holds the classification of the training dataset to one of the five possible values
#x_test holds the features of the testing dataset
#y_test holds the classification of the testing dataset to one of the five possible values
#y0 holds the classification of the training dataset to one of two possible labels, 0 for normal traffic or 1 for an attack
#y0_test holds the classification of the testing dataset to one of two possible labels, 0 for normal traffic or 1 for an attack
x,y=training_df,training_df.pop("Class").values
x=x.values
x_test,y_test=testing_df,testing_df.pop("Class").values
x_test=x_test.values
y0=np.ones(len(y),np.int8)
y0[np.where(y==classes[0])]=0
y0_test=np.ones(len(y_test),np.int8)
y0_test[np.where(y_test==classes[0])]=0
############################################################################################
def getModel():
inp = Input(shape=(x.shape[1],))
#d1=Dropout(0.5)(inp)
encoded = Dense(14, activation='sigmoid', activity_regularizer=regularizers.l2(10e-5))(inp)
encoded = Dense(28, activation='sigmoid')(encoded)
encoded = Dense(28, activation='softmax')(encoded)
autoencoder = Model(inp, encoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')
return autoencoder
autoencoder=getModel()
history=autoencoder.fit(x,y,
epochs=10,
batch_size=100,
shuffle=True,
validation_split=0.1
)