ValueError: Ошибка при проверке целевого объекта: ожидалось, что dens_19 будет иметь форму (28,), но получил массив с формой (1,) - PullRequest
0 голосов
/ 04 мая 2020

Я пытаюсь запустить этот код, и у меня есть эта ошибка: ValueError: Ошибка при проверке цели: ожидалось, что dens_19 будет иметь форму (28,), но получил массив с формой (1,). Я не знаю, какая раздел модели имеет проблему, что я получаю эту ошибку. Я новичок ie в python программировании и глубоком обучении. Какие могут быть возможные причины этой ошибки?

# Downloading training and test sets to local drive
try:
    training_set_path = get_file('KDDTrain%2B.csv', origin='https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTrain%2B.csv')
except:
    print('Error downloading')
    raise


try:
    test_set_path = get_file('KDDTest%2B.csv', origin='https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTest%2B.csv')
except:
    print('Error downloading')
    raise
training_df = pd.read_csv(training_set_path, header=None)
testing_df = pd.read_csv(test_set_path, header=None)

#######################################################################################
#Since the CSV files don't contain a header we'll need to assign column names ourselves.
columns = [
    'duration',
    'protocol_type',
    'service',
    'flag',
    'src_bytes',
    'dst_bytes',
    'land',
    'wrong_fragment',
    'urgent',
    'hot',
    'num_failed_logins',
    'logged_in',
    'num_compromised',
    'root_shell',
    'su_attempted',
    'num_root',
    'num_file_creations',
    'num_shells',
    'num_access_files',
    'num_outbound_cmds',
    'is_host_login',
    'is_guest_login',
    'count',
    'srv_count',
    'serror_rate',
    'srv_serror_rate',
    'rerror_rate',
    'srv_rerror_rate',
    'same_srv_rate',
    'diff_srv_rate',
    'srv_diff_host_rate',
    'dst_host_count',
    'dst_host_srv_count',
    'dst_host_same_srv_rate',
    'dst_host_diff_srv_rate',
    'dst_host_same_src_port_rate',
    'dst_host_srv_diff_host_rate',
    'dst_host_serror_rate',
    'dst_host_srv_serror_rate',
    'dst_host_rerror_rate',
    'dst_host_srv_rerror_rate',
    'outcome',
    'difficulty'
]
training_df.columns = columns
testing_df.columns = columns

#print("Training set has {} rows.".format(len(training_df)))
#print("Testing set has {} rows.".format(len(testing_df)))

training_outcomes=training_df["outcome"].unique()
testing_outcomes=testing_df["outcome"].unique()
#print("The training set has {} possible outcomes \n".format(len(training_outcomes)) )
#print(", ".join(training_outcomes)+".")
#print("\nThe testing set has {} possible outcomes \n".format(len(testing_outcomes)))
#print(", ".join(testing_outcomes)+".")

##############################################################
# A list ot attack names that belong to each general attack type
dos_attacks=["snmpgetattack","back","land","neptune","smurf","teardrop","pod","apache2","udpstorm","processtable","mailbomb"]
r2l_attacks=["snmpguess","worm","httptunnel","named","xlock","xsnoop","sendmail","ftp_write","guess_passwd","imap","multihop","phf","spy","warezclient","warezmaster"]
u2r_attacks=["sqlattack","buffer_overflow","loadmodule","perl","rootkit","xterm","ps"]
probe_attacks=["ipsweep","nmap","portsweep","satan","saint","mscan"]

# Our new labels
classes=["Normal","Dos","R2L","U2R","Probe"]

#Helper function to label samples to 5 classes
def label_attack (row):
    if row["outcome"] in dos_attacks:
        return classes[1]
    if row["outcome"] in r2l_attacks:
        return classes[2]
    if row["outcome"] in u2r_attacks:
        return classes[3]
    if row["outcome"] in probe_attacks:
        return classes[4]
    return classes[0]


#We combine the datasets temporarily to do the labeling 
test_samples_length = len(testing_df)
df=pd.concat([training_df,testing_df])
df["Class"]=df.apply(label_attack,axis=1)


# The old outcome field is dropped since it was replaced with the Class field, the difficulty field will be dropped as well.
df=df.drop("outcome",axis=1)
df=df.drop("difficulty",axis=1)

# we again split the data into training and test sets.
training_df= df.iloc[:-test_samples_length, :]
testing_df= df.iloc[-test_samples_length:,:]

#Let's take a look at the new labels
training_outcomes=training_df["Class"].unique()
testing_outcomes=testing_df["Class"].unique()
#print("The training set has {} possible outcomes \n".format(len(training_outcomes)) )
#print(", ".join(training_outcomes)+".")
#print("\nThe testing set has {} possible outcomes \n".format(len(testing_outcomes)))
#print(", ".join(testing_outcomes)+".")

########################################################################################
# Helper function for scaling continous values
def minmax_scale_values(training_df,testing_df, col_name):
    scaler = MinMaxScaler()
    scaler = scaler.fit(training_df[col_name].reshape(-1, 1))
    train_values_standardized = scaler.transform(training_df[col_name].reshape(-1, 1))
    training_df[col_name] = train_values_standardized
    test_values_standardized = scaler.transform(testing_df[col_name].reshape(-1, 1))
    testing_df[col_name] = test_values_standardized


#Helper function for one hot encoding
def encode_text(training_df,testing_df, name):
    training_set_dummies = pd.get_dummies(training_df[name])
    testing_set_dummies = pd.get_dummies(testing_df[name])
    for x in training_set_dummies.columns:
        dummy_name = "{}_{}".format(name, x)
        training_df[dummy_name] = training_set_dummies[x]
        if x in testing_set_dummies.columns :
            testing_df[dummy_name]=testing_set_dummies[x]
        else :
            testing_df[dummy_name]=np.zeros(len(testing_df))
    training_df.drop(name, axis=1, inplace=True)
    testing_df.drop(name, axis=1, inplace=True)


sympolic_columns=["protocol_type","service","flag"]
label_column="Class"
for column in df.columns :
    if column in sympolic_columns:
        encode_text(training_df,testing_df,column)
    elif not column == label_column:
        minmax_scale_values(training_df,testing_df, column)


#training_df.head(5)
#testing_df.head(5)
#############################################################################################   
#Next we extract the values from the pandas dataframes as Numpy arrays, where :
#x holds the features of the training dataset
#y holds the classification of the training dataset to one of the five possible values
#x_test holds the features of the testing dataset
#y_test holds the classification of the testing dataset to one of the five possible values
#y0 holds the classification of the training dataset to one of two possible labels, 0 for normal traffic or 1 for an attack
#y0_test holds the classification of the testing dataset to one of two possible labels, 0 for normal traffic or 1 for an attack

x,y=training_df,training_df.pop("Class").values
x=x.values
x_test,y_test=testing_df,testing_df.pop("Class").values
x_test=x_test.values
y0=np.ones(len(y),np.int8)
y0[np.where(y==classes[0])]=0
y0_test=np.ones(len(y_test),np.int8)
y0_test[np.where(y_test==classes[0])]=0

############################################################################################
def getModel():
    inp = Input(shape=(x.shape[1],))
    #d1=Dropout(0.5)(inp)
    encoded = Dense(14, activation='sigmoid', activity_regularizer=regularizers.l2(10e-5))(inp)
    encoded = Dense(28, activation='sigmoid')(encoded)
    encoded = Dense(28, activation='softmax')(encoded)
    autoencoder = Model(inp, encoded)
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    return autoencoder


autoencoder=getModel()
history=autoencoder.fit(x,y,
               epochs=10,
                batch_size=100,
                shuffle=True,
                validation_split=0.1
                       )
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...