IndexError: индекс за пределами измерения 2 - PullRequest
0 голосов
/ 10 января 2020

Я реализую слой внимания на модели seq2se2. Мой код

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import numpy as np 
import scipy as sp 
import sklearn
from imblearn.over_sampling import RandomOverSampler
import random 
from sklearn.preprocessing import MinMaxScaler
import time 
from sklearn import preprocessing, model_selection
from keras.models import Sequential 
from keras.layers import Dense 
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from keras.utils.np_utils import to_categorical
from sklearn.utils import shuffle
from keras.layers import Embedding, LSTM, SpatialDropout1D
data = pd.read_csv('ivyversion.csv')
#data = shuffle(data)

#data = data.drop(['name', 'version', 'name'], axis =1)
#X = data.drop(['bug'], axis = 1)
X = data
X = np.array(X)
#Y = data['bug']
print(X.shape)
X.shape
X=np.array(X)
#print(X)
#X = X.flatten()
print(X)
scaler = MinMaxScaler()
MinMaxScaler(copy=True, feature_range=(0, 1))
cols_to_norm = ['wmc', 'dit', 'noc', 'cbo', 'rfc', 'lcom', 'ca', 'ce', 'npm', 'lcom3', 'loc', 'dam', 'moa', 'mfa', 'cam', 'ic', 'cbm', 'amc', 'max_cc', 'avg_cc']
data[cols_to_norm] = MinMaxScaler().fit_transform(data[cols_to_norm])
print(data.head())

from random import randint
from numpy import array
from numpy import argmax
from numpy import array_equal
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.layers import RepeatVector
from attention_decodernew import AttentionDecoder
from time_distributed import time_distributed_densenew2
data.shape
k=X.data
print(k)


# one hot encode sequence
def one_hot_encode(sequence, n_unique):
    encoding = list()
    for value in sequence:
        vector = [0 for _ in range(n_unique)]
        vector[value] = 1
        encoding.append(vector)
    return array(encoding)

# decode a one hot encoded string
def one_hot_decode(encoded_seq):
    return [argmax(vector) for vector in encoded_seq]

def get_pair(n_in, n_out, cardinality):
    # generate random sequence
    sequence_in = k[n_in, cardinality]
    sequence_out = sequence_in[:n_out] + [0 for _ in range(n_in-n_out)]
    # one hot encode
    X = one_hot_encode(sequence_in, cardinality)
    Y = one_hot_encode(sequence_out, cardinality)
    # reshape as 3D
    X = X.reshape((1, X.shape[0], X.shape[1]))
    Y = Y.reshape((1, Y.shape[0], Y.shape[1]))
    return X,Y
# define the encoder-decoder model
def baseline_model(n_timesteps_in, n_features):
    model = Sequential()
    model.add(LSTM(150, input_shape=(n_timesteps_in, n_features)))
    model.add(RepeatVector(n_timesteps_in))
    model.add(LSTM(150, return_sequences=True))
    model.add(TimeDistributed(Dense(n_features, activation='softmax')))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
    return model
# define the encoder-decoder with attention model
def attention_model(n_timesteps_in, n_features):
    model = Sequential()
    model.add(LSTM(150, input_shape=(n_timesteps_in, n_features), return_sequences=True))
    model.add(AttentionDecoder(150, n_features))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
    return model
# configure problem
n_features = 21
n_timesteps_in = 5
n_timesteps_out = 5
n_repeats = 10
# evaluate encoder-decoder model

# evaluate encoder-decoder model
print('Encoder-Decoder Model with attension')
results = list()
for _ in range(n_repeats):
    model = attention_model(n_timesteps_in, n_features)
    accuracy = train_evaluate_model(model, n_timesteps_in, n_timesteps_out, n_features)
    results.append(accuracy)
    print(accuracy)

И я получаю следующее сообщение об ошибке

IndexError Traceback (последний последний вызов) в 4 для _ в диапазоне (n_repeats): 5 модель = внимание_модели (n_timesteps_in, n_features) ----> 6 точности = train_evaluate_model (модель, n_timesteps_in, n_timesteps_out, n_features) 7 results.append (точность) 8 print (точность)

в train_evaluate_model (модель, n_timesteps_out, n__n_t_n_n_n_n_ n_features) 4 для периода в диапазоне (5000): 5 # генерировать новую случайную последовательность ----> 6 X, Y = get_pair (n_timesteps_in, n_timesteps_out, n_features) 7 # подгонять модель для одной эпохи в этой последовательности 8 model.fit ( X, Y, эпох = 1, многословно = 0)

в get_pair (n_in, n_out, cardinality) 1 def get_pair (n_in, n_out, cardinality): 2 # генерировать случайную последовательность ----> 3 sequence_in = k [n_in, количество элементов] 4 sequence_out = sequence_in [: n_out] + [0 для _ в диапазоне (n_in-n_out)] 5 # одно горячее кодирование

IndexError: индексировать за пределами измерения 2

...