Я реализую слой внимания на модели seq2se2. Мой код
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import numpy as np
import scipy as sp
import sklearn
from imblearn.over_sampling import RandomOverSampler
import random
from sklearn.preprocessing import MinMaxScaler
import time
from sklearn import preprocessing, model_selection
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.utils import shuffle
from keras.layers import Embedding, LSTM, SpatialDropout1D
data = pd.read_csv('ivyversion.csv')
#data = shuffle(data)
#data = data.drop(['name', 'version', 'name'], axis =1)
#X = data.drop(['bug'], axis = 1)
X = data
X = np.array(X)
#Y = data['bug']
print(X.shape)
X.shape
X=np.array(X)
#print(X)
#X = X.flatten()
print(X)
scaler = MinMaxScaler()
MinMaxScaler(copy=True, feature_range=(0, 1))
cols_to_norm = ['wmc', 'dit', 'noc', 'cbo', 'rfc', 'lcom', 'ca', 'ce', 'npm', 'lcom3', 'loc', 'dam', 'moa', 'mfa', 'cam', 'ic', 'cbm', 'amc', 'max_cc', 'avg_cc']
data[cols_to_norm] = MinMaxScaler().fit_transform(data[cols_to_norm])
print(data.head())
from random import randint
from numpy import array
from numpy import argmax
from numpy import array_equal
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import RepeatVector
from attention_decodernew import AttentionDecoder
from time_distributed import time_distributed_densenew2
data.shape
k=X.data
print(k)
# one hot encode sequence
def one_hot_encode(sequence, n_unique):
encoding = list()
for value in sequence:
vector = [0 for _ in range(n_unique)]
vector[value] = 1
encoding.append(vector)
return array(encoding)
# decode a one hot encoded string
def one_hot_decode(encoded_seq):
return [argmax(vector) for vector in encoded_seq]
def get_pair(n_in, n_out, cardinality):
# generate random sequence
sequence_in = k[n_in, cardinality]
sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in-n_out)]
# one hot encode
X = one_hot_encode(sequence_in, cardinality)
Y = one_hot_encode(sequence_out, cardinality)
# reshape as 3D
X = X.reshape((1, X.shape[0], X.shape[1]))
Y = Y.reshape((1, Y.shape[0], Y.shape[1]))
return X,Y
# define the encoder-decoder model
def baseline_model(n_timesteps_in, n_features):
model = Sequential()
model.add(LSTM(150, input_shape=(n_timesteps_in, n_features)))
model.add(RepeatVector(n_timesteps_in))
model.add(LSTM(150, return_sequences=True))
model.add(TimeDistributed(Dense(n_features, activation='softmax')))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
return model
# define the encoder-decoder with attention model
def attention_model(n_timesteps_in, n_features):
model = Sequential()
model.add(LSTM(150, input_shape=(n_timesteps_in, n_features), return_sequences=True))
model.add(AttentionDecoder(150, n_features))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
return model
# configure problem
n_features = 21
n_timesteps_in = 5
n_timesteps_out = 5
n_repeats = 10
# evaluate encoder-decoder model
# evaluate encoder-decoder model
print('Encoder-Decoder Model with attension')
results = list()
for _ in range(n_repeats):
model = attention_model(n_timesteps_in, n_features)
accuracy = train_evaluate_model(model, n_timesteps_in, n_timesteps_out, n_features)
results.append(accuracy)
print(accuracy)
И я получаю следующее сообщение об ошибке
IndexError Traceback (последний последний вызов) в 4 для _ в диапазоне (n_repeats): 5 модель = внимание_модели (n_timesteps_in, n_features) ----> 6 точности = train_evaluate_model (модель, n_timesteps_in, n_timesteps_out, n_features) 7 results.append (точность) 8 print (точность)
в train_evaluate_model (модель, n_timesteps_out, n__n_t_n_n_n_n_ n_features) 4 для периода в диапазоне (5000): 5 # генерировать новую случайную последовательность ----> 6 X, Y = get_pair (n_timesteps_in, n_timesteps_out, n_features) 7 # подгонять модель для одной эпохи в этой последовательности 8 model.fit ( X, Y, эпох = 1, многословно = 0)
в get_pair (n_in, n_out, cardinality) 1 def get_pair (n_in, n_out, cardinality): 2 # генерировать случайную последовательность ----> 3 sequence_in = k [n_in, количество элементов] 4 sequence_out = sequence_in [: n_out] + [0 для _ в диапазоне (n_in-n_out)] 5 # одно горячее кодирование
IndexError: индексировать за пределами измерения 2