Во-первых, теперь я прошу прощения, если вопрос может показаться кому-то глупым.
Я уже два дня пытаюсь понять свои ошибки.
Может ли кто-нибудь мне помочь?
Кстати, я пытаюсь реализовать следующую статью
An-Analysis-of-Layer-Networks-in-Unsupervised-Feature-Learning
Ошибка в Функция предсказания, предпоследняя функция файла.
И это отображает это,
> File "redo.py", line 177, in <module>
> val_acc = (net.predict(testXC) == y_test).mean() File "/home/pc/Bureau/lastchance/An-Analysis-of-Single-Layer-Networks-in-Unsupervised-Feature-Learning/neural_net.py",
> line 349, in predict
> y_pred=np.argmax(np.maximum(0,(X.dot(self.params['W1'])+self.params['b1']))\ ValueError: shapes (100,0) and (6400,200) not aligned: 0 (dim 1) !=
> 6400 (dim 0)
import numpy as np
import matplotlib.pyplot as plt
import time
class TwoLayerNet(object):
def __init__(self, input_size, hidden_size, output_size, std=1e-4, init_method="Normal"):
self.params = {}
self.params["W1"] = std * np.random.randn(input_size, hidden_size)
self.params["b1"] = np.zeros(hidden_size)
self.params["W2"] = std * np.random.randn(hidden_size, output_size)
self.params["b2"] = np.zeros(output_size)
if init_method =="i":
self.params["W1"] = np.random.randn(input_size, hidden_size)/np.sqrt(input_size)
self.params["W2"] = np.random.randn(hidden_size, output_size)/np.sqrt(hidden_size)
elif init_method =="io":
self.params["W1"] = np.random.randn(input_size,hidden_size)*np.sqrt(2.0/(input_size+hidden_size))
self.params["W2"] = np.random.randn(hidden_size,output_size)*np.sqrt(2.0/(output_size+hidden_size))
elif init_method == "ReLu":
self.params["W1"] = np.random.randn(input_size,hidden_size)*np.sqrt(2.0/input_size)
self.params["W2"] = np.random.randn(hidden_size,output_size)*np.sqrt(2.0/(output_size+hidden_size))
def loss(self, X, y=None, reg=0.0, dropout=0, dropMask=None, activation="Relu"):
W1, b1 = self.params["W1"], self.params["b1"]
W2, b2 = self.params["W2"], self.params["b2"]
N,D = X.shape
scores = None
if activation == "leaky":
inp=X.dot(W1) + b1
a2 = np.maximum(inp,.01*inp)
else:
a2 = np.maximum(X.dot(W1)+b1,0)
if (dropout != 0)and (dropout <1):
a2 *= (np.random.randn(*a2.shape)<dropout)/dropout
elif dropout>1:
W2*=dropMask['W2']/(dropout-1)
b2*=dropMask['b2']/(dropout-1)
scores = a2.dot(W2)+b2
if y is None:
return scores
loss = None
#dabord un softmax
if dropout>1:
print(dropMask["W2"])
exp_scores = np.exp(scores)
a3 = exp_scores/(np.sum(exp_scores,1))[:,None]
loss = - np.sum(np.log(a3[range(len(a3)),y]))/len(a3)+ 0.5*reg*(np.sum(np.power(W1,2))+np.sum(np.power(W2,2)))
grads = {}
delta_3 = a3
delta_3[range(len(a3)), y] = a3[range(len(a3)),y]-1
delta_3/=len(a3)
grads["W2"] = a2.T.dot(delta_3) + reg*W2
grads["b2"] = np.sum(delta_3,0)
dF = np.ones(np.shape(a2))
if activation == "leaky":
dF[a2<0.0] = 0.01
else:
dF[a2==0.0]=0
delta_2 = delta_3.dot(W2.T)*dF
grads["W1"]= X.T.dot(delta_2)+reg*W1
grads["b1"]= np.sum(delta_2,0)
return loss, grads
def train(self, X, y, X_val, y_val, learning_rate =1e-3, learning_rate_decay=0.95, reg=1e-5, num_iters = 10, batch_size = 200, verbose = False, update = "SGD", arg=.99, dropout=0, activation ="ReLU" ):
num_train = X.shape[0]
iterations_per_epoch = max(num_train / batch_size, 1)
loss_history = []
train_acc_history = []
val_acc_history = []
top_params = dict()
cache_params = dict()
top_acc = 0
cache =dict()
dropMask = dict()
start_time = time.time()
for it in range(num_iters):
X_batch = None
y_batch = None
if num_train >= batch_size:
rand_idx = np.random.choice(num_train, batch_size)
else:
rand_idx = np.random.choice(num_train,batch_size, replace = True)
X_batch = X[rand_idx]
y_batch = y[rand_idx]
if dropout >1:
for param in ["W2", "b2"]:
dropMask[param] = np.random.randn(*self.params[param].shape) < (dropout-1)
loss, grads = self.loss(X_batch, y=y_batch, reg=reg, dropout=dropout, dropMask= dropMask, activation= activation)
loss_history.append(loss)
if np.isnan(grads["W1"]).any() or np.isnan(grads["W2"]).any() or np.isnan(grads["b1"]).any() or np.isnan(grads["b2"]).any():
continue
dx = None
for param in self.params:
if update == "SGD":
dx = learning_rate*grads[param]
elif update == "momentum":
if not param in cache:
cache[param] = np.zeros(grads[param].shape)
cache[param] = arg*cache[param]-learning_rate*grads[param]
dx =-cache[param]
elif update=="Nesterov momentum":
if not param in cache:
cache[param] = np.zeros(grads[param].shape)
v_prev = cache[param]
cache[param] = arg*cache[param] - learning_rate * grads[param]
dx = arg* v_prev - (1+arg) * cache[param]
elif updtae == "rmsprop":
if not param in cache:
cache[param] = np.zeros(grads[param].shape)
cache[param] = arg *cache[param]+(1-arg)*np.power(grads[param],2)
dx = learning_rate*grads[param]/np.sqrt(cache[param]+1e-8)
elif update == "Adam":
print("updtate error")
elif update == "Adagrad":
print ("updtate error")
else:
print("choose update method !")
if dropout > 1:
if param == "W2" or param == "b2":
dx*=dropMask[param]
self.params[param]-=dx
it+=1
if verbose and it % 100 == 0:
print ("iteration %d / %d : loss %f" %(it, num_iters, loss))
if it % iterations_per_epoch == 0:
train_acc = (self.predict(X_batch) == y_batch).mean()
val_acc = (self.predict(X_val)==y_val).mean()
train_acc_history.append(train_acc)
val_acc_history.append(val_acc)
learning_rate *= learning_rate_decay
if val_acc > top_acc:
top_acc = val_acc
top_params = self.params.copy()
if verbose:
print('train_acc %f, val_acc %f, time %d' %(train_acc, val_acc, (time.tome()-start_time)/60.0))
self.params = top_params.copy()
return{
"loss_history" : loss_history,
"train_acc_history": train_acc_history,
"val_acc_history" :val_acc_history,
}
def predict(self,X):
y_pred = None
y_pred=np.argmax(np.maximum(0,(X.dot(self.params['W1'])+self.params['b1'])).dot(self.params['W2'])+self.params['b2'],1)
return y_pred
def accuracy(self,X,y):
acc = (self.predict(X) == y).mean()
return acc
def gradient_check(self, X,y):
realGrads = dict()
_,grads = self.loss(X,y)
keys = ["W1", "b1",
"W2", "b2"]
for key in keys:
W1 = self.params[key]
W1_grad = []
delta = 1e-4
if len(np.shape(W1)) == 2:
for i in range(np.shape(W1)[0]):
grad = []
for j in range(np.shape(W1)[1]):
W1[i,j]+=delta
self.params[key]=W1
l_plus,_=self.loss(X,y)
W1[i,j]-=2*delta
self.params[keys]=W1
l_minus,_ =self.loss(X,y)
grad.append((l_plus-l_minus)/2.0/delta)
W1[i,j] += delta
W1_grad.append(grad)
else:
for i in rang(len(W1)):
W1[i] += delta
self.params[key] = W1
l_plus,_=self.loss(X,y)
W1[i] -= 2*delta
self.params[key] = W1
l_minus,_=self.loss(X,y)
W1_grad.append((l_plus-l_minus)/2.0/delta)
w1[i]+=delta
print(W1_grad)
print(grads[key])
print(key, 'error', np.mean(np.sum(np.power((W1_grad-grads[key]),2), len(np.shape(W1))-1)/np.sum(np.power((W1_grad+grads[key]),2),len(np.shape(W1)-1))))
Вот файл, где я создал свой объект TwoLaye rNet:
from data_utils import load_CIFAR10
from neural_net import *
import matplotlib.pyplot as plt
import time
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
start_time = time.time()
def get_CIFAR10_data(num_training = 4900, num_validation = 100, num_test = 100):
cifar10_dir = '../datasets/cifar-10-batches-py/'
print(cifar10_dir)
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
mask = range(num_training, num_training + num_validation)
X_val = X_train[mask]
y_val = y_train[mask]
mask = range(num_training)
X_train = X_train[mask]
y_train = y_train[mask]
mask = range(num_test)
X_test = X_test[mask]
y_test = y_test[mask]
mean_image = np.mean(X_train, axis = 0)
X_train -= mean_image
X_val = X_val - mean_image
X_test = X_test - mean_image
X_train = X_train.swapaxes(1,3)
X_val = X_val.swapaxes(1,3)
return X_train, y_train, X_val, y_val, X_test, y_test
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print("chargement terminé")
print("donnée d'entrainement :" , X_train.shape)
print("donnée de validation :" , X_val.shape)
print("donnée de test :" , X_test.shape)
print("Temps :" , (time.time()-start_time)/60)
rfSize = 6
numCentroids = 1600
whitening = False
numPatches = 400000
CIFAR_DIM = [32,32,3]
patches = []
for i in range(numPatches):
if(np.mod(i, 10000)== 0):
print("echantillonage pour kmeans",i,"/", numPatches)
start_r = np.random.randint(CIFAR_DIM[0] - rfSize)
start_c = np.random.randint(CIFAR_DIM[1] - rfSize)
patch = np.array([])
img = X_train[np.mod(i, X_train.shape[0])]
for layer in img:
patch = np.append(patch, layer[start_r:start_r + rfSize].T[start_c:start_c + rfSize].T.ravel())
patches.append(patch)
patches = np.array(patches)
#on normalise les patches
patches = (patches-patches.mean(1)[:,None])/np.sqrt(patches.var(1)+ 10)[:, None]
print("time", (time.time()-start_time)/60)
del X_train, y_train, X_val, y_val, X_test, y_test
#blanchiment
print("Blanchiment")
[D,V]= np.linalg.eig(np.cov(patches, rowvar = 0))
P = V.dot(np.diag(np.sqrt(1/(D + 0.1)))).dot(V.T)
patches = patches.dot(P)
print("time", (time.time() - start_time)/60.0)
del D,V
centroids = np.random.randn(numCentroids, patches.shape[1])*.1
num_iters = 2
batch_size = 1000
for ite in range(num_iters):
print("kmeans iters", ite+1,"/", num_iters )
hf_c2_sum = .5*np.power(centroids, 2).sum(1)
counts = np.zeros(numCentroids)
summation = np.zeros_like(centroids)
for i in range(0, len(patches), batch_size):
last_i = min(i+batch_size, len(patches))
idx = np.argmax(patches[i:last_i].dot(centroids.T) -hf_c2_sum.T, axis = 1)
S = np.zeros([last_i - i, numCentroids])
S[range(last_i-i), np.argmax(patches[i:last_i].dot(centroids.T) -hf_c2_sum.T, axis=1)]
summation+=S.T.dot(patches[i:last_i])
counts+= S.sum(0)
centroids = summation/counts[:,None]
centroids[counts==0]=0
print("time", (time.time()-start_time)/60.0)
def sliding(img, window=[6,6]):
out = np.array([])
for i in range(3):
s = img.shape
row = s[1]
col = s[2]
col_extent = col - window[1]+ 1
row_extent = row - window[0]+ 1
start_idx = np.arange(window[0])[:,None]*col + np.arange(window[1])
offset_idx = np.arange(row_extent)[:,None]*col + np.arange(col_extent)
if len(out)==0:
out = np.take(img[i], start_idx.ravel()[:,None] + offset_idx.ravel())
else:
out=np.append(out,np.take(img[i], start_idx.ravel()[:,None] + offset_idx.ravel()),axis=0)
return out
def extract_features(X_train):
trainXC = []
idx = 0
for img in X_train:
idx += 1
if not np.mod(idx,1000):
print('extract feature', idx, "/", len(X_train))
print("time", (time.time()-start_time)/60)
patches = sliding(img,[rfSize, rfSize]).T
#on normalise
patches = (patches-patches.mean(1)[:,None])/(np.sqrt(patches.var(1)+ 10)[:,None])
patches = patches.dot(P)
x2 = np.power(patches,2).sum(1)
c2 = np.power(centroids,2).sum(1)
xc = patches.dot(centroids.T)
dist = np.sqrt(-2*xc+x2[:,None] + c2)
u = dist.mean(1)
patches = np.maximum(-dist+u[:, None],0)
rs = CIFAR_DIM[0] - rfSize+1
cs = CIFAR_DIM[1] - rfSize + 1
patches = np.reshape(patches, [rs, cs, -1])
q = []
q.append(patches[0:int(rs/2), 0:int(cs/2)].sum(0).sum(0))
q.append(patches[0:int(rs/2), int(cs/2):cs-1].sum(0).sum(0))
q.append(patches[int(rs/2):rs-1,0:int(cs/2)].sum(0).sum(0))
q.append(patches[int(rs/2):rs-1,int(cs/2):cs-1].sum(0).sum(0))
q = np.array(q).ravel()
trainXC.append(q)
trainXC=(trainXC-trainXC.mean(1)[:,None])/(np.sqrt(trainXC.var(1)+.01)[:,None])
trainXC = np.array(trainXC)
return trainXC
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
trainXC = extract_features(X_train)
print("time", (time.time()-start_time)/60.0)
valXC = extract_features(X_val)
testXC = extract_features(X_test)
from neural_net import *
input_size = trainXC.shape[1]
hidden_size=200
momentum=.95
learning_rate=5e-4
learning_rate_decay=.99
dropout=.3
num_classes = 10
net = TwoLayerNet(input_size, hidden_size, num_classes, 1e-4)
stats = net.train(trainXC, y_train, valXC, y_val,num_iters=70000, batch_size=128,learning_rate=learning_rate, learning_rate_decay=learning_rate_decay,reg=0, verbose=True,update="momentum",arg=momentum,dropout=dropout)
val_acc = (net.predict(trainXC)==y_train).mean()
print("Précision de l'entraienment", val_acc)
val_acc = (net.predict(valXC)==y_val).mean()
print("Validation accuracy: ", val_acc)
val_acc = (net.predict(testXC) == y_test).mean()
print("test accuracy : ", val_acc)
print("time," (time.time()-start_time/60.0))```