странная проблема в моей функции прогнозирования - PullRequest
0 голосов
/ 25 апреля 2020

Во-первых, теперь я прошу прощения, если вопрос может показаться кому-то глупым.

Я уже два дня пытаюсь понять свои ошибки.

Может ли кто-нибудь мне помочь?

Кстати, я пытаюсь реализовать следующую статью

An-Analysis-of-Layer-Networks-in-Unsupervised-Feature-Learning

Ошибка в Функция предсказания, предпоследняя функция файла.

И это отображает это,

> File "redo.py", line 177, in <module>
>     val_acc = (net.predict(testXC) == y_test).mean()   File "/home/pc/Bureau/lastchance/An-Analysis-of-Single-Layer-Networks-in-Unsupervised-Feature-Learning/neural_net.py",
> line 349, in predict
>     y_pred=np.argmax(np.maximum(0,(X.dot(self.params['W1'])+self.params['b1']))\ ValueError: shapes (100,0) and (6400,200) not aligned: 0 (dim 1) !=
> 6400 (dim 0)
import numpy as np
import matplotlib.pyplot as plt
import time


class TwoLayerNet(object):

    def __init__(self, input_size, hidden_size, output_size, std=1e-4, init_method="Normal"):


        self.params = {}
        self.params["W1"] = std * np.random.randn(input_size, hidden_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["W2"] = std * np.random.randn(hidden_size, output_size)
        self.params["b2"] = np.zeros(output_size)

        if init_method =="i":
            self.params["W1"] = np.random.randn(input_size, hidden_size)/np.sqrt(input_size)
            self.params["W2"] = np.random.randn(hidden_size, output_size)/np.sqrt(hidden_size)
        elif init_method =="io":
            self.params["W1"] = np.random.randn(input_size,hidden_size)*np.sqrt(2.0/(input_size+hidden_size))
            self.params["W2"] = np.random.randn(hidden_size,output_size)*np.sqrt(2.0/(output_size+hidden_size))
        elif init_method == "ReLu":
            self.params["W1"] = np.random.randn(input_size,hidden_size)*np.sqrt(2.0/input_size)
            self.params["W2"] = np.random.randn(hidden_size,output_size)*np.sqrt(2.0/(output_size+hidden_size))

    def loss(self, X, y=None, reg=0.0, dropout=0, dropMask=None, activation="Relu"):

        W1, b1 = self.params["W1"], self.params["b1"]
        W2, b2 = self.params["W2"], self.params["b2"]
        N,D = X.shape


        scores = None

        if activation == "leaky":
            inp=X.dot(W1) + b1
            a2 = np.maximum(inp,.01*inp)
        else:
            a2 = np.maximum(X.dot(W1)+b1,0)
        if (dropout != 0)and (dropout <1):
            a2 *= (np.random.randn(*a2.shape)<dropout)/dropout
        elif dropout>1:
            W2*=dropMask['W2']/(dropout-1)
            b2*=dropMask['b2']/(dropout-1)


        scores = a2.dot(W2)+b2


        if y is None:
            return scores


        loss = None


        #dabord un softmax
        if dropout>1:
            print(dropMask["W2"])
        exp_scores = np.exp(scores)

        a3 = exp_scores/(np.sum(exp_scores,1))[:,None]

        loss = - np.sum(np.log(a3[range(len(a3)),y]))/len(a3)+ 0.5*reg*(np.sum(np.power(W1,2))+np.sum(np.power(W2,2)))   



        grads = {}



        delta_3 = a3
        delta_3[range(len(a3)), y] = a3[range(len(a3)),y]-1
        delta_3/=len(a3)
        grads["W2"] = a2.T.dot(delta_3) + reg*W2
        grads["b2"] = np.sum(delta_3,0)

        dF = np.ones(np.shape(a2))
        if activation == "leaky":
            dF[a2<0.0] = 0.01
        else:
            dF[a2==0.0]=0


        delta_2 = delta_3.dot(W2.T)*dF
        grads["W1"]= X.T.dot(delta_2)+reg*W1
        grads["b1"]= np.sum(delta_2,0)

        return loss, grads

    def train(self, X, y, X_val, y_val, learning_rate =1e-3, learning_rate_decay=0.95, reg=1e-5, num_iters = 10, batch_size = 200, verbose = False, update = "SGD", arg=.99, dropout=0, activation ="ReLU" ):

        num_train = X.shape[0]
        iterations_per_epoch = max(num_train / batch_size, 1)

        loss_history = []
        train_acc_history = []
        val_acc_history = []
        top_params = dict()
        cache_params = dict()
        top_acc = 0
        cache  =dict()
        dropMask = dict()
        start_time = time.time()

        for it in range(num_iters):
            X_batch = None
            y_batch = None

            if num_train >= batch_size:
                rand_idx = np.random.choice(num_train, batch_size)
            else:
                rand_idx = np.random.choice(num_train,batch_size, replace = True)
            X_batch = X[rand_idx]
            y_batch = y[rand_idx]

            if dropout >1:
                for param in ["W2", "b2"]:
                    dropMask[param] = np.random.randn(*self.params[param].shape) < (dropout-1)


            loss, grads = self.loss(X_batch, y=y_batch, reg=reg, dropout=dropout, dropMask= dropMask, activation= activation)
            loss_history.append(loss)


            if np.isnan(grads["W1"]).any() or np.isnan(grads["W2"]).any() or np.isnan(grads["b1"]).any() or np.isnan(grads["b2"]).any():
                continue

            dx = None
            for param in self.params:
                if update == "SGD":
                    dx = learning_rate*grads[param]

                elif update == "momentum":
                    if not param in cache:
                        cache[param] = np.zeros(grads[param].shape)
                    cache[param] = arg*cache[param]-learning_rate*grads[param]
                    dx =-cache[param]

                elif update=="Nesterov momentum":
                    if not param in cache:
                        cache[param] = np.zeros(grads[param].shape)
                    v_prev = cache[param]
                    cache[param] = arg*cache[param] - learning_rate * grads[param]
                    dx = arg* v_prev - (1+arg) * cache[param]
                elif updtae == "rmsprop":
                    if not param in cache:
                        cache[param] = np.zeros(grads[param].shape)
                    cache[param] = arg *cache[param]+(1-arg)*np.power(grads[param],2)
                    dx = learning_rate*grads[param]/np.sqrt(cache[param]+1e-8)
                elif update == "Adam":
                    print("updtate error")
                elif update == "Adagrad":
                    print ("updtate error")
                else:
                    print("choose update method !")
                if dropout > 1:
                    if param == "W2" or param == "b2":
                        dx*=dropMask[param]
                self.params[param]-=dx

            it+=1




            if verbose and it % 100 == 0:
                print ("iteration %d / %d : loss %f" %(it, num_iters, loss))
            if it % iterations_per_epoch == 0:

                train_acc = (self.predict(X_batch) == y_batch).mean()
                val_acc = (self.predict(X_val)==y_val).mean()
                train_acc_history.append(train_acc)
                val_acc_history.append(val_acc)


                learning_rate *= learning_rate_decay

                if val_acc > top_acc:
                    top_acc = val_acc
                    top_params = self.params.copy()

                if verbose:
                    print('train_acc %f, val_acc %f, time %d' %(train_acc, val_acc, (time.tome()-start_time)/60.0))
            self.params = top_params.copy()

            return{

                "loss_history" : loss_history,
                "train_acc_history": train_acc_history,
                "val_acc_history" :val_acc_history,
            }


    def predict(self,X):



        y_pred = None


        y_pred=np.argmax(np.maximum(0,(X.dot(self.params['W1'])+self.params['b1'])).dot(self.params['W2'])+self.params['b2'],1)



        return y_pred


    def accuracy(self,X,y):

        acc = (self.predict(X) == y).mean()

        return acc
    def gradient_check(self, X,y):
        realGrads = dict()
        _,grads = self.loss(X,y)
        keys = ["W1", "b1",
                "W2", "b2"]

        for key in keys:
            W1 = self.params[key]
            W1_grad = []
            delta = 1e-4
            if len(np.shape(W1)) == 2:
                for i in range(np.shape(W1)[0]):
                    grad = []
                    for j in range(np.shape(W1)[1]):
                        W1[i,j]+=delta
                        self.params[key]=W1
                        l_plus,_=self.loss(X,y)
                        W1[i,j]-=2*delta
                        self.params[keys]=W1
                        l_minus,_ =self.loss(X,y)
                        grad.append((l_plus-l_minus)/2.0/delta)
                        W1[i,j] += delta
                    W1_grad.append(grad)
            else:
                for i in rang(len(W1)):
                    W1[i] += delta
                    self.params[key] = W1
                    l_plus,_=self.loss(X,y)
                    W1[i] -= 2*delta
                    self.params[key] = W1
                    l_minus,_=self.loss(X,y)
                    W1_grad.append((l_plus-l_minus)/2.0/delta)
                    w1[i]+=delta
            print(W1_grad)
            print(grads[key])
            print(key, 'error', np.mean(np.sum(np.power((W1_grad-grads[key]),2), len(np.shape(W1))-1)/np.sum(np.power((W1_grad+grads[key]),2),len(np.shape(W1)-1))))

Вот файл, где я создал свой объект TwoLaye rNet:

from data_utils import load_CIFAR10
from neural_net import *
import matplotlib.pyplot as plt
import time
import numpy as np
np.seterr(divide='ignore', invalid='ignore')

start_time = time.time()

def get_CIFAR10_data(num_training = 4900, num_validation = 100, num_test = 100):
    cifar10_dir = '../datasets/cifar-10-batches-py/'
    print(cifar10_dir)
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]
    mean_image = np.mean(X_train, axis = 0)
    X_train -= mean_image
    X_val = X_val - mean_image
    X_test = X_test - mean_image
    X_train = X_train.swapaxes(1,3)
    X_val = X_val.swapaxes(1,3)
    return X_train, y_train, X_val, y_val, X_test, y_test

X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print("chargement terminé")
print("donnée d'entrainement :" , X_train.shape)
print("donnée de validation :" , X_val.shape)
print("donnée de test :" , X_test.shape)
print("Temps :" , (time.time()-start_time)/60)

rfSize = 6
numCentroids = 1600
whitening = False
numPatches = 400000
CIFAR_DIM = [32,32,3]

patches = []
for i in range(numPatches):
    if(np.mod(i, 10000)== 0):
        print("echantillonage pour kmeans",i,"/", numPatches)
    start_r = np.random.randint(CIFAR_DIM[0] - rfSize)
    start_c = np.random.randint(CIFAR_DIM[1] - rfSize)
    patch = np.array([])
    img = X_train[np.mod(i, X_train.shape[0])]
    for layer in img:
        patch = np.append(patch, layer[start_r:start_r + rfSize].T[start_c:start_c + rfSize].T.ravel())
    patches.append(patch)
patches = np.array(patches)

#on normalise les patches
patches = (patches-patches.mean(1)[:,None])/np.sqrt(patches.var(1)+ 10)[:, None]
print("time", (time.time()-start_time)/60)

del X_train, y_train, X_val, y_val, X_test, y_test

#blanchiment
print("Blanchiment")

[D,V]= np.linalg.eig(np.cov(patches, rowvar = 0))

P = V.dot(np.diag(np.sqrt(1/(D + 0.1)))).dot(V.T)
patches = patches.dot(P)

print("time", (time.time() - start_time)/60.0)
del D,V

centroids = np.random.randn(numCentroids, patches.shape[1])*.1
num_iters = 2
batch_size = 1000
for ite in range(num_iters):
    print("kmeans iters", ite+1,"/", num_iters )
    hf_c2_sum = .5*np.power(centroids, 2).sum(1)
    counts = np.zeros(numCentroids)
    summation = np.zeros_like(centroids)
    for i in range(0, len(patches), batch_size):
        last_i = min(i+batch_size, len(patches))
        idx = np.argmax(patches[i:last_i].dot(centroids.T) -hf_c2_sum.T, axis = 1)
        S = np.zeros([last_i - i, numCentroids])
        S[range(last_i-i), np.argmax(patches[i:last_i].dot(centroids.T) -hf_c2_sum.T, axis=1)] 
        summation+=S.T.dot(patches[i:last_i])
        counts+= S.sum(0)
    centroids = summation/counts[:,None]
    centroids[counts==0]=0

print("time", (time.time()-start_time)/60.0)



def sliding(img, window=[6,6]):
    out = np.array([])
    for i in range(3):
        s = img.shape
        row = s[1]
        col = s[2]
        col_extent = col - window[1]+ 1
        row_extent = row - window[0]+ 1
        start_idx = np.arange(window[0])[:,None]*col + np.arange(window[1])
        offset_idx = np.arange(row_extent)[:,None]*col + np.arange(col_extent)
        if len(out)==0:
            out = np.take(img[i], start_idx.ravel()[:,None] + offset_idx.ravel())
        else:
            out=np.append(out,np.take(img[i], start_idx.ravel()[:,None] + offset_idx.ravel()),axis=0)
    return out

def extract_features(X_train):
    trainXC = []
    idx = 0
    for img in X_train:
        idx += 1
        if not np.mod(idx,1000):
            print('extract feature', idx, "/", len(X_train))
            print("time", (time.time()-start_time)/60)
        patches = sliding(img,[rfSize, rfSize]).T
        #on normalise
        patches = (patches-patches.mean(1)[:,None])/(np.sqrt(patches.var(1)+ 10)[:,None])
        patches = patches.dot(P)

        x2 = np.power(patches,2).sum(1)
        c2 = np.power(centroids,2).sum(1)
        xc = patches.dot(centroids.T)

        dist = np.sqrt(-2*xc+x2[:,None] + c2)
        u = dist.mean(1)
        patches = np.maximum(-dist+u[:, None],0)
        rs = CIFAR_DIM[0] - rfSize+1
        cs = CIFAR_DIM[1] - rfSize + 1
        patches = np.reshape(patches, [rs, cs, -1])
        q = []
        q.append(patches[0:int(rs/2), 0:int(cs/2)].sum(0).sum(0))
        q.append(patches[0:int(rs/2), int(cs/2):cs-1].sum(0).sum(0))
        q.append(patches[int(rs/2):rs-1,0:int(cs/2)].sum(0).sum(0))
        q.append(patches[int(rs/2):rs-1,int(cs/2):cs-1].sum(0).sum(0))
        q = np.array(q).ravel()
        trainXC.append(q)
        trainXC=(trainXC-trainXC.mean(1)[:,None])/(np.sqrt(trainXC.var(1)+.01)[:,None]) 
    trainXC = np.array(trainXC)

    return trainXC

X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
trainXC = extract_features(X_train)

print("time", (time.time()-start_time)/60.0)
valXC = extract_features(X_val)
testXC = extract_features(X_test)



from neural_net import *


input_size = trainXC.shape[1]
hidden_size=200
momentum=.95
learning_rate=5e-4
learning_rate_decay=.99
dropout=.3
num_classes = 10
net = TwoLayerNet(input_size, hidden_size, num_classes, 1e-4)
stats = net.train(trainXC, y_train, valXC, y_val,num_iters=70000, batch_size=128,learning_rate=learning_rate, learning_rate_decay=learning_rate_decay,reg=0, verbose=True,update="momentum",arg=momentum,dropout=dropout)


val_acc = (net.predict(trainXC)==y_train).mean()
print("Précision de l'entraienment", val_acc)
val_acc = (net.predict(valXC)==y_val).mean()
print("Validation accuracy: ", val_acc)

val_acc = (net.predict(testXC) == y_test).mean()
print("test accuracy : ", val_acc)

print("time," (time.time()-start_time/60.0))```



Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...