Я пытаюсь обучить модель классификации без присмотра, для которой я использую глубокую кластеризацию с моей моделью на Керасе. Код, который я имею в виду для кластеризации: this . Во время выполнения кода я получаю ошибку в слое cutom при добавлении весов. Ниже вы можете увидеть код и ошибку.
import metrics
import numpy as np
from tensorflow.keras.layers import Layer, InputSpec
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from sklearn.cluster import KMeans
class ClusteringLayer(Layer):
"""
Clustering layer converts input sample (feature) to soft label, i.e. a vector that represents the probability of the
sample belonging to each cluster. The probability is calculated with student's t-distribution.
# Example
```
model.add(ClusteringLayer(n_clusters=10))
```
# Arguments
n_clusters: number of clusters.
weights: list of Numpy array with shape `(n_clusters, n_features)` witch represents the initial cluster centers.
alpha: parameter in Student's t-distribution. Default to 1.0.
# Input shape
2D tensor with shape: `(n_samples, n_features)`.
# Output shape
2D tensor with shape: `(n_samples, n_clusters)`.
"""
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
""" student t-distribution, as same as used in t-SNE algorithm.
q_ij = 1/(1+dist(x_i, u_j)^2), then normalize it.
Arguments:
inputs: the variable containing data, shape=(n_samples, n_features)
Return:
q: student's t-distribution, or soft labels for each sample. shape=(n_samples, n_clusters)
"""
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1))
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class Inf:
def __init__(self, D1, D2, n_clusters):
from tensorflow.keras.models import model_from_json
self.n_clusters = n_clusters
json_file = open(D1, 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights(D2)
print("Loaded model from disk")
loaded_model.summary()
self.model = loaded_model
def create_model(self):
hidden = self.model.get_layer(name='encoded').output
self.encoder = Model(inputs = self.model.input, outputs = hidden)
clustering_layer = ClusteringLayer(n_clusters=self.n_clusters)(hidden)
self.model = Model(inputs = self.model.input, outputs = clustering_layer)
self.model = model
def compile(self, loss='kld', optimizer='adam'):
self.model.compile(loss=loss, optimizer=optimizer)
def fit(self, x, y=None, batch_size=16, maxiter=2e4, tol=1e-3, update_interval=140, save_dir='./results/temp'):
print('Update interval', update_interval)
save_interval = x.shape[0] / batch_size * 5
print('Save interval', save_interval)
print('Initializing cluster centers with k-means.')
kmeans = KMeans(n_clusters=self.n_clusters, n_init=20)
self.y_pred = kmeans.fit_predict(self.encoder.predict(x))
y_pred_last = np.copy(self.y_pred)
self.model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
# Step : deep clustering
# logging file
import csv, os
if not os.path.exists(save_dir):
os.makedirs(save_dir)
logfile = open(save_dir + '/dcec_log.csv', 'w')
logwriter = csv.DictWriter(logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L', 'Lc', 'Lr'])
logwriter.writeheader()
loss = [0, 0, 0]
index = 0
for ite in range(int(maxiter)):
if ite % update_interval == 0:
q, _ = self.model.predict(x, verbose=0)
p = self.target_distribution(q) # update the auxiliary target distribution p
# evaluate the clustering performance
self.y_pred = q.argmax(1)
if y is not None:
acc = np.round(metrics.acc(y, self.y_pred), 5)
nmi = np.round(metrics.nmi(y, self.y_pred), 5)
ari = np.round(metrics.ari(y, self.y_pred), 5)
loss = np.round(loss, 5)
logdict = dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss[0], Lc=loss[1], Lr=loss[2])
logwriter.writerow(logdict)
print('Iter', ite, ': Acc', acc, ', nmi', nmi, ', ari', ari, '; loss=', loss)
# check stop criterion
delta_label = np.sum(self.y_pred != y_pred_last).astype(np.float32) / self.y_pred.shape[0]
y_pred_last = np.copy(self.y_pred)
if ite > 0 and delta_label < tol:
print('delta_label ', delta_label, '< tol ', tol)
print('Reached tolerance threshold. Stopping training.')
logfile.close()
break
# train on batch
if (index + 1) * batch_size > x.shape[0]:
loss = self.model.train_on_batch(x=x[index * batch_size::],
y=[p[index * batch_size::], x[index * batch_size::]])
index = 0
else:
loss = self.model.train_on_batch(x=x[index * batch_size:(index + 1) * batch_size],
y=[p[index * batch_size:(index + 1) * batch_size],
x[index * batch_size:(index + 1) * batch_size]])
index += 1
# save intermediate model
if ite % save_interval == 0:
# save DCEC model checkpoints
print('saving model to:', save_dir + '/dcec_model_' + str(ite) + '.h5')
self.model.save_weights(save_dir + '/dcec_model_' + str(ite) + '.h5')
ite += 1
# save the trained model
logfile.close()
print('saving model to:', save_dir + '/dcec_model_final.h5')
self.model.save_weights(save_dir + '/dcec_model_final.h5')
Мой выходной слой - это плотный слой с выходным размером (?, 128). Я получаю следующую ошибку на уровне кластеризации.
File "C:/Users/u/Desktop/trained/inference.py", line 45, in build
self.clusters = self.add_weight(shape=(self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 384, in add_weight
aggregation=aggregation)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\training\tracking\base.py", line 663, in _add_variable_with_custom_getter
**kwargs_for_getter)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer_utils.py", line 155, in make_variable
shape=variable_shape if variable_shape.rank else None)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 259, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 220, in _variable_v1_call
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 198, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 2495, in default_variable_creator
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\variables.py", line 263, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 460, in __init__
shape=shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\resource_variable_ops.py", line 604, in _init_from_args
initial_value() if init_from_fn else initial_value,
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\keras\engine\base_layer_utils.py", line 135, in <lambda>
init_val = lambda: initializer(shape, dtype=dtype)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\init_ops.py", line 533, in __call__
shape, -limit, limit, dtype, seed=self.seed)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\random_ops.py", line 239, in random_uniform
shape = _ShapeTensor(shape)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\ops\random_ops.py", line 44, in _ShapeTensor
return ops.convert_to_tensor(shape, dtype=dtype, name="shape")
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1087, in convert_to_tensor
return convert_to_tensor_v2(value, dtype, preferred_dtype, name)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1145, in convert_to_tensor_v2
as_ref=False)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\ops.py", line 1224, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 305, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 246, in constant
allow_broadcast=True)
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\constant_op.py", line 284, in _constant_impl
allow_broadcast=allow_broadcast))
File "C:\Users\u\AppData\Local\Continuum\anaconda3\envs\test_env\lib\site-packages\tensorflow\python\framework\tensor_util.py", line 562, in make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'tuple'> to Tensor. Contents: (17, Dimension(128)). Consider casting elements to a supported type.
Я использовал кодировщик автоэнкодера в качестве входа. Ниже приведена часть кодировщика автоэнкодера.
ip = Input(shape=(256,256,1))
x = Conv2D(16, (3,3), padding='same')(ip)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = Dropout(0.2)(x)
x = MaxPooling2D((2,2), padding='same')(x)
x = Flatten()(x)
x = Dense(128, name="encoded")(x)