Код ниже (модель уровня клиента частично опущена):
import tensorflow as tf
print(tf.__version__)
from __future__ import absolute_import, division, print_function
# TensorFlow and tf.keras
#from tensorflow.keras import datasets, layers, models
import numpy as np
import os
import pickle
import gzip
import urllib.request
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Layer
from keras.utils import np_utils
from keras.models import load_model
# custom layers starts from here.
class STFTLayer(Layer):
def __init__(self, patch_size, stride, **kwargs):
super(STFTLayer, self).__init__(**kwargs)
self.patch_size = patch_size
self.stride = stride
# TensorFlow uses this function internally to get the right
# shape for next layer in the model
# DO NOT EDIT
def compute_output_shape(self, input_shape):
mock_shape = (1, input_shape[1], input_shape[2], input_shape[3])
mock_data = tf.zeros(mock_shape)
mock_patches = tf.image.extract_image_patches(mock_data,
sizes=[1,self.patch_size,self.patch_size,1],strides=[1,self.stride,self.stride,1],rates=[1,1,1,1],padding='VALID')
op_shape = mock_patches.shape
return (input_shape[0], op_shape[1], op_shape[2], op_shape[3])
# TensorFlow uses the this function to save the layer when
# model.save is called
# DO NOT EDIT
def get_config(self):
config = super(STFTLayer,self).get_config()
config['patch_size'] = self.patch_size
config['stride'] = self.stride
return config
# Change this function to change how the sampler vectors look
def call(self, inputs):
# Get patches from the input batch of images
# code omitted here
###
return op
# Main code for modeling begins here
# Load MNIST datasets
def extract_data(filename, num_images):
with gzip.open(filename) as bytestream:
bytestream.read(16)
buf = bytestream.read(num_images*28*28)
data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
data = (data / 255) - 0.5
data = data.reshape(num_images, 28, 28, 1)
return data
def extract_labels(filename, num_images):
with gzip.open(filename) as bytestream:
bytestream.read(8)
buf = bytestream.read(1 * num_images)
labels = np.frombuffer(buf, dtype=np.uint8)
return (np.arange(10) == labels[:, None]).astype(np.float32)
class MNIST:
def __init__(self):
if not os.path.exists("data"):
os.mkdir("data")
files = ["train-images-idx3-ubyte.gz",
"t10k-images-idx3-ubyte.gz",
"train-labels-idx1-ubyte.gz",
"t10k-labels-idx1-ubyte.gz"]
for name in files:
urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/' + name, "data/"+name)
train_data = extract_data("data/train-images-idx3-ubyte.gz", 60000)
train_labels = extract_labels("data/train-labels-idx1-ubyte.gz", 60000)
self.test_data = extract_data("data/t10k-images-idx3-ubyte.gz", 10000)
self.test_labels = extract_labels("data/t10k-labels-idx1-ubyte.gz", 10000)
VALIDATION_SIZE = 5000
self.validation_data = train_data[:VALIDATION_SIZE, :, :, :]
self.validation_labels = train_labels[:VALIDATION_SIZE]
self.train_data = train_data[VALIDATION_SIZE:, :, :, :]
self.train_labels = train_labels[VALIDATION_SIZE:]
print(MNIST().train_data.shape)
print(MNIST().validation_data.shape)
train_images = MNIST().train_data
train_labels = MNIST().train_labels
test_images = MNIST().validation_data
test_labels = MNIST().validation_labels
MODEL_PATH = '/home/bo/Documents/Weibo/STFT_model/models/custom_layers'
# If saved model exists, load it
# If any changes are made to STFTLayer logic, remember
# to delete existing saved model file
if not os.path.isdir('/home/bo/Documents/Weibo/STFT_model/models/'):
os.makedirs('/home/bo/Documents/Weibo/STFT_model/models/')
if os.path.exists(MODEL_PATH):
model = load_model(MODEL_PATH, custom_objects={'STFTLayer': STFTLayer})
model.summary()
# Otherwise create new model.
else:
model = Sequential()
# Add our STFTLayer as the first layer with patch_size 4 and stride 1
model.add(STFTLayer(4,1,input_shape=(28,28,1),name='STFTLayer'))
model.add(Conv2D(32, (1, 1)))
#model.add(Conv2D(32, (1, 1), input_shape=(28,28,1)))
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (1, 1)))
model.add(Activation('relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (1, 1)))
model.add(Activation('relu'))
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
#model.add(Dense(10, activation = 'softmax'))
model.add(Dense(10))
def fn(correct, predicted):
return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
logits=predicted/1)
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss=fn,
optimizer='adamax',
metrics=['accuracy'])
model.fit(train_images, train_labels,
batch_size=128,
validation_data=(test_images, test_labels),
nb_epoch=20,
shuffle=True)
model.summary()
#model.compile(optimizer='adamax',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#model.compile(optimizer='adamax',loss='categorical_crossentropy',metrics=['accuracy'])
# Train model
#model.fit(train_images, train_labels, epochs=10, batch_size=128, shuffle=True)
# Save the model
model.save(MODEL_PATH)
test_loss, test_acc = model.evaluate(test_images, test_labels)
Код выдал мне ошибку:
TypeError Traceback (most recent call last)
<ipython-input-3-44798d695c82> in <module>()
190 # Add our STFTLayer as the first layer with patch_size 4 and stride 1
191 model.add(STFTLayer(4,1,input_shape=(28,28,1),name='STFTLayer'))
--> 192 model.add(Conv2D(32, (1, 1)))
193 #model.add(Conv2D(32, (1, 1), input_shape=(28,28,1)))
194 model.add(Activation('relu'))
5 frames
/usr/local/lib/python3.6/dist-packages/keras/engine/sequential.py in add(self, layer)
180 self.inputs = network.get_source_inputs(self.outputs[0])
181 elif self.outputs:
--> 182 output_tensor = layer(self.outputs[0])
183 if isinstance(output_tensor, list):
184 raise TypeError('All layers in a Sequential model '
/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py in __call__(self, inputs, **kwargs)
423 'You can build it manually via: '
424 '`layer.build(batch_input_shape)`')
--> 425 self.build(unpack_singleton(input_shapes))
426 self.built = True
427
/usr/local/lib/python3.6/dist-packages/keras/layers/convolutional.py in build(self, input_shape)
139 name='kernel',
140 regularizer=self.kernel_regularizer,
--> 141 constraint=self.kernel_constraint)
142 if self.use_bias:
143 self.bias = self.add_weight(shape=(self.filters,),
/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py in wrapper(*args, **kwargs)
89 warnings.warn('Update your `' + object_name + '` call to the ' +
90 'Keras 2 API: ' + signature, stacklevel=2)
---> 91 return func(*args, **kwargs)
92 wrapper._original_function = func
93 return wrapper
/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py in add_weight(self, name, shape, dtype, initializer, regularizer, trainable, constraint)
241 if dtype is None:
242 dtype = self.dtype
--> 243 weight = K.variable(initializer(shape, dtype=dtype),
244 dtype=dtype,
245 name=name,
/usr/local/lib/python3.6/dist-packages/keras/initializers.py in __call__(self, shape, dtype)
207 scale /= max(1., fan_out)
208 else:
--> 209 scale /= max(1., float(fan_in + fan_out) / 2)
210 if self.distribution == 'normal':
211 # 0.879... = scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.)
TypeError: float() argument must be a string or a number, not 'Dimension'
Но код работает, когда я использую tenSenflow.keras для написания модели нейронных сетей ниже приведен код, который работает хорошо:
from __future__ import absolute_import, division, print_function
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow.keras import layers, models
import os
# Helper libraries
import numpy as np
import urllib.request
import gzip
# Custom layers starts here.
class STFTLayer(layers.Layer):
def __init__(self, patch_size, stride, **kwargs):
super(STFTLayer, self).__init__(**kwargs)
self.patch_size = patch_size
self.stride = stride
# TensorFlow uses this function internally to get the right
# shape for next layer in the model
# DO NOT EDIT
def compute_output_shape(self, input_shape):
mock_shape = (1, input_shape[1], input_shape[2], input_shape[3])
mock_data = tf.zeros(mock_shape)
mock_patches = tf.image.extract_image_patches(mock_data,
sizes=[1,self.patch_size,self.patch_size,1],strides=[1,self.stride,self.stride,1],rates=[1,1,1,1],padding='VALID')
op_shape = mock_patches.shape
return (input_shape[0], op_shape[1], op_shape[2], op_shape[3])
# TensorFlow uses the this function to save the layer when
# model.save is called
# DO NOT EDIT
def get_config(self):
config = super(STFTLayer,self).get_config()
config['patch_size'] = self.patch_size
config['stride'] = self.stride
return config
# Change this function to change how the sampler vectors look
def call(self, inputs):
# Get patches from the input batch of images
# code omitted here
###
return op
# Main code for modeling begins here
# Load MNIST datasets
def extract_data(filename, num_images):
with gzip.open(filename) as bytestream:
bytestream.read(16)
buf = bytestream.read(num_images*28*28)
data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32)
data = (data / 255) - 0.5
data = data.reshape(num_images, 28, 28, 1)
return data
def extract_labels(filename, num_images):
with gzip.open(filename) as bytestream:
bytestream.read(8)
buf = bytestream.read(1 * num_images)
labels = np.frombuffer(buf, dtype=np.uint8)
return (np.arange(10) == labels[:, None]).astype(np.float32)
class MNIST:
def __init__(self):
if not os.path.exists("data"):
os.mkdir("data")
files = ["train-images-idx3-ubyte.gz",
"t10k-images-idx3-ubyte.gz",
"train-labels-idx1-ubyte.gz",
"t10k-labels-idx1-ubyte.gz"]
for name in files:
urllib.request.urlretrieve('http://yann.lecun.com/exdb/mnist/' + name, "data/"+name)
train_data = extract_data("data/train-images-idx3-ubyte.gz", 60000)
train_labels = extract_labels("data/train-labels-idx1-ubyte.gz", 60000)
self.test_data = extract_data("data/t10k-images-idx3-ubyte.gz", 10000)
self.test_labels = extract_labels("data/t10k-labels-idx1-ubyte.gz", 10000)
VALIDATION_SIZE = 5000
self.validation_data = train_data[:VALIDATION_SIZE, :, :, :]
self.validation_labels = train_labels[:VALIDATION_SIZE]
self.train_data = train_data[VALIDATION_SIZE:, :, :, :]
self.train_labels = train_labels[VALIDATION_SIZE:]
print(MNIST().train_data.shape)
print(MNIST().validation_data.shape)
train_images = MNIST().train_data
train_labels = MNIST().train_labels
test_images = MNIST().validation_data
test_labels = MNIST().validation_labels
MODEL_PATH = '/home/bo/Documents/Weibo/STFT_model/models/custom_layers'
# If saved model exists, load it
# If any changes are made to STFTLayer logic, remember
# to delete existing saved model file
if not os.path.isdir('/home/bo/Documents/Weibo/STFT_model/models/'):
os.makedirs('/home/bo/Documents/Weibo/STFT_model/models/')
if os.path.exists(MODEL_PATH):
model = keras.models.load_model(MODEL_PATH, custom_objects={'STFTLayer': STFTLayer})
model.summary()
# Otherwise create new model.
else:
model = models.Sequential()
# Add our STFTLayer as the first layer with patch_size 4 and stride 1
model.add(STFTLayer(4,1,input_shape=(28,28,1),name='STFTLayer'))
model.add(layers.Conv2D(32, (1, 1), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (1, 1), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (1, 1), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
#model.add(layers.Dense(10, activation='softmax'))
model.add(layers.Dense(10))
model.summary()
def fn(correct, predicted):
return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
logits=predicted/1)
sgd = tf.keras.optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss=fn,
optimizer='adamax',
metrics=['accuracy'])
model.fit(train_images, train_labels,
batch_size=128,
validation_data=(test_images, test_labels),
nb_epoch=10,
shuffle=True)
#model.compile(optimizer='adamax',loss='categorical_crossentropy',metrics=['accuracy'])
# Train model
#model.fit(train_images, train_labels, epochs=10, batch_size=128, shuffle=True)
# Save the model
model.save(MODEL_PATH)
test_loss, test_acc = model.evaluate(test_images, test_labels)
Итак, первая версия кода с использованием импорта keras не работает для уровня клиента , но вторая версия кода - это , работающая с использованием импорта tenorflow.keras. Я хочу, чтобы первая версия кода также соответствовала коду других людей. Нужна ваша помощь, спасибо!