удлиненные результаты в t-SNE - PullRequest
0 голосов
/ 16 февраля 2020

Я пытался запустить t-SNE на моей модели, предварительно обученной в наборе данных Fashion-MNIST. Выходные результаты показывают несколько удлиненных кластеров, а не круглых кластеров, что заставляет меня задуматься, делаю ли я что-то не так или это ожидаемый результат. Обратите внимание, что я пробовал настраивать разные гиперпараметры, но безуспешно. Я приложил разброс результатов вывода и скрипт ниже.

enter image description here

#!/usr/bin/env python
# coding: utf-8

# In[1]:

import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras import backend as K
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers
from keras.models import Model, load_model
from keras_sequential_ascii import sequential_model_to_ascii_printout


# In[3]:


MODEL_NAME = 'model_fmnist_interleaved.h5'
DATA_NAME = 'FMNIST'
LAYER_NUM = 'layer3'
ALGO_NAME = 'interleaved'
FIG_NAME = DATA_NAME + '_' + LAYER_NUM + '_' + ALGO_NAME +'.pdf'


# In[4]:


# Load the fashion-mnist pre-shuffled train data and test data
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

#select digits bw 0-7
pretrain_data_index = np.where(y_train!=9)
x_train = x_train[pretrain_data_index]
y_train = y_train[pretrain_data_index]

pretest_data_index = np.where(y_test!=9)
x_test = x_test[pretest_data_index]
y_test = y_test[pretest_data_index]

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols)
    input_shape = (img_rows, img_cols)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)


# In[5]:


model = load_model(MODEL_NAME)
model.summary()


# In[6]:


def create_truncated_model(trained_model):
    model = keras.Sequential([
        keras.layers.Flatten(input_shape=(28, 28)),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(9, activation='softmax')
    ])
    for i, layer in enumerate(model.layers):
        layer.set_weights(trained_model.layers[i].get_weights())
    model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    return model

truncated_model = create_truncated_model(model)
hidden_features = truncated_model.predict(x_test)
print("Hidden features shape: " + str(hidden_features.shape))


# In[11]:


from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

#pca = PCA(n_components=9)
#pca_result = pca.fit_transform(hidden_features)
#print('Variance PCA: {}'.format(np.sum(pca.explained_variance_ratio_)))

#Run T-SNE on the PCA features.
tsne = TSNE(n_components=2, verbose = 1)
tsne_results = tsne.fit_transform(hidden_features)


# In[12]:


class_labels = ['tshirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sweater', 'bag']
color_map = y_test
plt.figure(figsize=(10,10))
for cl in range(len(class_labels)):
    indices = np.where(color_map==cl)
    indices = indices[0]
    plt.scatter(tsne_results[indices,0], tsne_results[indices, 1], label=class_labels[cl], s=5)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['left'].set_linewidth(1.2)
plt.gca().spines['bottom'].set_linewidth(1.2)
lgnd = plt.legend(frameon=False, fontsize=18)
for handle in lgnd.legendHandles:
    handle.set_sizes([40])
plt.xticks([])
plt.yticks([])
plt.tick_params(axis='both', which='major', labelsize=22)
plt.tight_layout()
plt.savefig(FIG_NAME)
...