Я хочу создать и обучить существующую сверточную нейронную сеть.
Я использую изображения DICOM, и я могу извлечь код просто отлично. Я конвертирую их в размер 224 * 224 и затем использую его.
Я также создал несколько служебных функций, которые помогут сделать мой код немного модульным.
Я получаю ошибку всякий раз, когда тренирую свою нейронную сеть. Я думаю, что это в основном связано с архитектурой сети, хотя я не могу найти какой-либо недостаток.
Я пробовал несколько разные комбинации, хотя я хотел бы придерживаться данной сети, поскольку я фактически реализую существующую сеть.
Вот основная сеть:
cnn_1.py
##SETUP
import tensorflow as tf
from retriever import IMG_PX_SIZE, classLength
#ALL HELPER FUNCTIONS
#INIT WEIGHTS
def init_weights(shape):
init_rand_dist = tf.truncated_normal(shape, stddev=0.01)
return tf.Variable(init_rand_dist)
#INIT BIAS
def init_bias(shape):
init_bias_vals = tf.constant(0.1, shape=shape)
return tf.Variable(init_bias_vals)
def init_bias_2(shape, value):
init_bias_vals = tf.constant(value, shape=shape)
return tf.Variable(init_bias_vals)
#CONV2D
def conv2d(x, W, stridec=[1, 1, 1, 1]):
# x---> [batch, h, w, channels]
# W---> [filter_h, filter_w, channel_in, channel_out]
print(x.shape)
return tf.nn.conv2d(x, W, strides=stridec, padding='SAME')
#POOLING
#Fixed ksize and strides
def max_pool_3by3(x):
# x---> [batch, h, w, channels]
print(x.shape)
return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
#CONVOLUTIONAL LAYER
def convolutional_layer(input_x, shape, stridec=[1, 1, 1, 1]):
W = init_weights(shape)
b = init_bias_2([shape[3]], 0.0)
#must cast b to float32
return tf.nn.relu(conv2d(input_x, W, stridec=stridec)+b)
def convolutional_layer_2(input_x, shape, stridec=[1, 1, 1, 1]):
W = init_weights(shape)
return conv2d(input_x, W, stridec=stridec)
#NORMAL LAYER
def normal_full_layer(input_layer, size):
print(input_layer.shape)
input_size = int(input_layer.get_shape()[1])
W = init_weights([input_size, size])
b = init_bias_2([size], 1.0)
return tf.matmul(input_layer, W)+b
#PLACEHOLDER
#x = tf.placeholder(tf.float32, shape=[None, IMG_PX_SIZE*IMG_PX_SIZE])
y_true = tf.placeholder(tf.float32, shape=[None, classLength])
#LAYERS
x_image = tf.placeholder(tf.float32, shape=[None, IMG_PX_SIZE, IMG_PX_SIZE, 1])
#x_image = tf.reshape(x, [-1, IMG_PX_SIZE, IMG_PX_SIZE, 1])
#decoy = convolutional_layer(x_image, shape=[11, 11, 1, 64])
convo1 = convolutional_layer(x_image, shape=[11, 11, 1, 64], stridec=[1, 4, 4, 1])
convo1_pool = max_pool_3by3(convo1)
convo2 = convolutional_layer(convo1_pool, shape=[5, 5, 64, 192])
convo2_pool = max_pool_3by3(convo2)
convo3 = convolutional_layer(convo2_pool, shape=[5, 5, 192, 384])
convo4 = convolutional_layer(convo3, shape=[3, 3, 384, 256])
convo5 = convolutional_layer(convo4, shape=[3, 3, 256, 256])
convo5_pool = max_pool_3by3(convo5)
print('Convolutional layers end')
#the dimension 8*8*256 might be wrong because of insufficient details of strides and paddings of some layers
convo5_flat = tf.reshape(convo5_pool, [-1, 14*14*256])
full_layer_one = tf.nn.relu(normal_full_layer(convo5_flat, 4096))
#DROPOUT
hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one, keep_prob=hold_prob)
full_layer_two = tf.nn.relu(normal_full_layer(full_one_dropout, 4096))
#DROPOUT 2
full_two_dropout = tf.nn.dropout(full_layer_two, keep_prob=hold_prob)
full_layer_three = tf.nn.relu(normal_full_layer(full_two_dropout, 4096))
y_pred = normal_full_layer(full_layer_three, 15)
#LOSS FUNCTION
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))
#OPTIMIZER
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train = optimizer.minimize(cross_entropy)
init = tf.global_variables_initializer()
print('Setup Completed')
##THIS MARKS THE END OF THE SETUP
##DATASET GENERATION
from retriever import getSubFolders, getImageArray, datasetPath
import numpy as np
import matplotlib.pyplot as plt
subfolders = getSubFolders(datasetPath)
print('Subfolders saved')
images = getImageArray(datasetPath, subfolders, 10)
print('Dataset Generated')
##THIS MARKS THE END OF THE DATASET GENERATION
##TRAINING
from utils import nextImageBatch, nextImageRandomBatch
steps = 1
with tf.Session() as sess:
sess.run(init)
index = 0
for i in range(steps):
#10 images per class
length = len(images)
batch_x, batch_y, index = nextImageBatch(images, length, len(subfolders), index)
sess.run(train, feed_dict={x_image:batch_x, y_true:batch_y, hold_prob:0.5})
#print accuracy every few steps
if i%50==0:
print("ON STEP {}".format(i))
print("ACCURACY: ")
matches = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y_true, 1))
test_x, test_y, index = nextImageBatch(images, len(images)/(len(subfolders)*30), len(subfolders), index)
acc = tf.reduce_mean(tf.cast(matches, tf.float32))
print(sess.run(acc, feed_dict={x_image:test_x, y_true:test_y, hold_prob:1.0}))
print('\n')
Вот моя функция, которая извлекает мой набор данных из группы папок, хотя я не думаю, что это нужно изучать, потому что я проверил его, и он прекрасно работает.
retriever.py
import pydicom as pydi
import dicom_numpy as dinum
import numpy as np
import os
import matplotlib.pyplot as plt
import cv2
from PIL import Image
datasetPath = '<my_folder>\\Dataset'
def getSubFolders(path):
subfolders = []
os.chdir(path)
for x in os.listdir('.'):
if not (x.endswith('.tcia')):
subfolders.append(x)
return subfolders
subfolders = getSubFolders(datasetPath)
#print(subfolders)
classLength = len(subfolders)
IMG_PX_SIZE = 224
def resize(img_dcm, IMG_PX_SIZE):
return cv2.resize(np.array(img_dcm.pixel_array), (IMG_PX_SIZE, IMG_PX_SIZE))
def getImageArray(path, subfolders, length):
#Following is the data structure that would store the images
images = []
#Instead of using each element of subfolders, we will use an index based on the length of the subfolders
#So that we can store the index value of the class (subfolders) instead of the class string value
for s in range(len(subfolders)):
Path = os.path.join(path, subfolders[s])
count = 0
print(s)
for root, dirs, files in os.walk(Path):
for file in files:
if file.endswith('.dcm') and count<length:
img = pydi.dcmread(os.path.join(root, file))
#print(type(img.pixel_array))
image = resize(img, IMG_PX_SIZE)
#print(image.shape)
count = count+1
images.append((s, image))
return images
Вот мой файл для служебной функции. Он возвращает изображение размером (224, 224, 1), как я хочу.
import numpy as np
import math
import random
from retriever import IMG_PX_SIZE
#images: [(image_class, image_array), ......]
#This function returns an array that has equal number of images per class and not randomized
#length: total length of batch_x
def nextImageBatch(images, length=150, classes=15, index=0):
perClass = int(length/classes)
i = 0
classLength = int(len(images)/15)
#batch_x contains the images, batch_y contains the index of the associated class in the subfolders array
batchx, batchy = np.zeros((length, IMG_PX_SIZE, IMG_PX_SIZE, 1)), np.zeros((length, classes))
counter = 0
for cla in range(classes):
for i in range(perClass):
#Following is the index of the image to be appended
point = (cla*classLength) + (index*perClass) + i
#need to convert the 2d image to 3d
newimage = images[point][1]
batchx[counter] = newimage[:, :, np.newaxis]
batchy[counter][images[point][0]] = 1
counter = counter + 1
print('batch_x shape: {}'.format(batchx.shape))
print('batch_y shape: {}'.format(batchy.shape))
return batchx, batchy, index+1
Вот вывод, который я получаю, пока он работает нормально. (Я напишу ошибку отдельно)
(входной размер для каждого слоя)
(?, 224, 224, 1)
(?, 56, 56, 64)
(?, 28, 28, 64)
(?, 28, 28, 192)
(?, 14, 14, 192)
(?, 14, 14, 384)
(?, 14, 14, 256)
(?, 14, 14, 256)
Convolutional layers end
(?, 50176)
(?, 4096)
(?, 4096)
(?, 4096)
Setup Completed
Subfolders saved
(Это номер класса изображения для классификации. Не о чем беспокоиться)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
Dataset Generated
(Размеры серии. 150 изображений каждого размера (224,224,1) и относится к одному из 15 классов)
(batch_y [0] пример: [1., 0. 0. 0., 0. 0. 0., 0. 0. 0., 0. 0. 0., 0.]
batch_x shape: (150, 224, 224, 1)
batch_y shape: (150, 15)
Ошибка (основная ошибка в последней строке)
Traceback (most recent call last):
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1327, in _do_call
return fn(*args)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1306, in _run_fn
status, run_metadata)
File "D:\Anaconda3\envs\tfdeeplearning\lib\contextlib.py", line 66, in __exit__
next(self.gen)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 1881600 values, but the requested shape requires a multiple of 50176
[[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](MaxPool_2, Reshape/shape)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\lib\python\ptvsd\__main__.py", line 391, in main
run()
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\lib\python\ptvsd\__main__.py", line 272, in run_file
runpy.run_path(target, run_name='__main__')
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "f:\python_projects\tensorflow\full-tensorflow-notes-and-data\tensorflow-bootcamp-master\Research\CBIR_CNN\cnn_1.py", line 152, in <module>
sess.run(train, feed_dict={x_image:batch_x, y_true:batch_y, hold_prob:0.5})
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1124, in _run
feed_dict_tensor, options, run_metadata)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1321, in _do_run
options, run_metadata)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\client\session.py", line 1340, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Input to reshape is a tensor with 1881600 values, but the requested shape requires a multiple of 50176
[[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](MaxPool_2, Reshape/shape)]]
Caused by op 'Reshape', defined at:
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\lib\python\ptvsd\__main__.py", line 391, in main
run()
File "c:\Users\Admin\.vscode\extensions\ms-python.python-2019.3.6558\pythonFiles\lib\python\ptvsd\__main__.py", line 272, in run_file
runpy.run_path(target, run_name='__main__')
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "D:\Anaconda3\envs\tfdeeplearning\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "f:\python_projects\tensorflow\full-tensorflow-notes-and-data\tensorflow-bootcamp-master\Research\CBIR_CNN\cnn_1.py", line 93, in <module>
convo5_flat = tf.reshape(convo5_pool, [-1, 14*14*256])
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 2619, in reshape
name=name)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "D:\Anaconda3\envs\tfdeeplearning\lib\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Input to reshape is a tensor with 1881600 values, but the requested shape requires a multiple of 50176
[[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](MaxPool_2, Reshape/shape)]]
Пожалуйста, смотрите на архитектуру сети более внимательно, чем на остальную часть кода. Я думаю, что главная проблема здесь.
PS
Есть ли какой-нибудь способ, которым я могу видеть преобразования размера изображения через слои во время выполнения. Это помогло бы больше.