Question

Я пытаюсь использовать графические процессоры для ускорения операций свертки и объединения в моем приложении нейронной сети (Spiking networks).Я написал небольшой скрипт, чтобы увидеть, насколько я могу получить ускорение с помощью Tensorflow.Удивительно, но SciPy / Numpy работает лучше.В моем приложении все входные данные (изображения) хранятся на диске, но для примера я создал случайно инициализированное изображение размером 27x27 и весовое ядро размером 5x5x30, я убедился, что я ничего не переношу изCPU к GPU, и я также увеличил размер входного изображения до 270x270 и ядро весов до 7x7x30, но я не вижу никаких улучшений.Я убедился, что все методы TF фактически выполняются на моих графических процессорах, установив

sess =tf.Session(config=tf.ConfigProto(log_device_placement=True))

У меня есть доступ к 2 графическим процессорам (Tesla K20m) в кластере.

Вот мой код:

import tensorflow as tf
import numpy as np
from scipy import signal
import time
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

image_size = 27
kernel_size = 5
nofMaps = 30

def convolution(Image, weights):
    in_channels = 1 # 1 because our image has 1 units in the -z direction. 
    out_channels = weights.shape[-1]
    strides_1d = [1, 1, 1, 1]

    #in_2d = tf.constant(Image, dtype=tf.float32)
    in_2d = Image
    #filter_3d = tf.constant(weights, dtype=tf.float32)
    filter_3d =weights

    in_width = int(in_2d.shape[0])
    in_height = int(in_2d.shape[1])

    filter_width = int(filter_3d.shape[0])
    filter_height = int(filter_3d.shape[1])

    input_4d   = tf.reshape(in_2d, [1, in_height, in_width, in_channels])
    kernel_4d = tf.reshape(filter_3d, [filter_height, filter_width, in_channels, out_channels])
    inter = tf.nn.conv2d(input_4d, kernel_4d, strides=strides_1d, padding='VALID')
    output_3d = tf.squeeze(inter)
    output_3d= sess.run(output_3d)
    return output_3d


def pooling(Image):
    in_channels = Image.shape[-1]
    Image_3d = tf.constant(Image, dtype = tf.float32)
    in_width = int(Image.shape[0])
    in_height = int(Image.shape[1])
    Image_4d = tf.reshape(Image_3d,[1,in_width,in_height,in_channels])
    pooled_pots4d = tf.layers.max_pooling2d(inputs=Image_4d, pool_size=[2, 2], strides=2)
    pooled_pots3d = tf.squeeze(pooled_pots4d)
    return sess.run(pooled_pots3d)


t1 = time.time()
#with tf.device('/device:GPU:1'):
Image = tf.random_uniform([image_size, image_size], name='Image')
weights = tf.random_uniform([kernel_size,kernel_size,nofMaps], name='Weights')
conv_result = convolution(Image,weights)
pool_result = pooling(conv_result)

print('Time taken:{}'.format(time.time()-t1))
#with tf.device('/device:CPU:0'):
print('Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])


def scipy_convolution(Image,weights):
    instant_conv1_pots = np.zeros((image_size-kernel_size+1,image_size-kernel_size+1,nofMaps))
    for i in range(weights.shape[-1]):
        instant_conv1_pots[:,:,i]=signal.correlate(Image,weights[:,:,i],mode='valid',method='fft')
    return instant_conv1_pots

def scipy_pooling(conv1_spikes):
    '''
       Reshape splitting each of the two axes into two each such that the
       latter of the split axes is of the same length as the block size.
       This would give us a 4D array. Then, perform maximum finding along those
       latter axes, which would be the second and fourth axes in that 4D array.
       https://stackoverflow.com/questions/41813722/numpy-array-reshaped-but-how-to-change-axis-for-pooling
    '''
    if(conv1_spikes.shape[0]%2!=0): #if array is odd size then omit the last row and col
        conv1_spikes = conv1_spikes[0:-1,0:-1,:]
    else:
        conv1_spikes = conv1_spikes
    m,n = conv1_spikes[:,:,0].shape
    o   = conv1_spikes.shape[-1]
    pool1_spikes = np.zeros((m/2,n/2,o))
    for i in range(o):
        pool1_spikes[:,:,i]=conv1_spikes[:,:,i].reshape(m/2,2,n/2,2).max(axis=(1,3))
    return pool1_spikes
t1 = time.time()
Image = np.random.rand(image_size,image_size)
weights = np.random.rand(kernel_size,kernel_size,nofMaps)
conv_result = scipy_convolution(Image,weights)
pool_result = scipy_pooling(conv_result)
print('Time taken:{}'.format(time.time()-t1))
print('Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])
~

Результаты выглядят следующим образом:

Time taken:0.746644973755
Pool_result shape:(11, 11, 30)
Time taken:0.0127348899841
Pool_result shape:(11, 11, 30)

Ruthvik Vaila · Answer 1 · 06 июня 2018

С предложениями от комментатора я установил image_size=270 и включил обе convolution and pool функции в цикл for, теперь TF работает лучше, чем SciPy, обратите внимание, что я использую tf.nn.conv2d, а НЕ tf.layers.conv2d.Я также установил параметр use_cudnn_on_gpu=True в tf.nn.conv2d, но это не повредило и не помогло.

Вот код:

import tensorflow as tf
import numpy as np
from scipy import signal
import time
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

image_size = 270
kernel_size = 5
nofMaps = 30

def convolution(Image, weights):
    in_channels = 1 # 1 because our image has 1 units in the -z direction. 
    out_channels = weights.shape[-1]
    strides_1d = [1, 1, 1, 1]

    #in_2d = tf.constant(Image, dtype=tf.float32)
    in_2d = Image
    #filter_3d = tf.constant(weights, dtype=tf.float32)
    filter_3d =weights

    in_width = int(in_2d.shape[0])
    in_height = int(in_2d.shape[1])

    filter_width = int(filter_3d.shape[0])
    filter_height = int(filter_3d.shape[1])

    input_4d   = tf.reshape(in_2d, [1, in_height, in_width, in_channels])
    kernel_4d = tf.reshape(filter_3d, [filter_height, filter_width, in_channels, out_channels])
    inter = tf.nn.conv2d(input_4d, kernel_4d, strides=strides_1d, padding='VALID',use_cudnn_on_gpu=True)
    output_3d = tf.squeeze(inter)
    #t1 = time.time()
    output_3d= sess.run(output_3d)
    #print('TF Time for Conv:{}'.format(time.time()-t1))
    return output_3d


def pooling(Image):
    in_channels = Image.shape[-1]
    Image_3d = tf.constant(Image, dtype = tf.float32)
    in_width = int(Image.shape[0])
    in_height = int(Image.shape[1])
    Image_4d = tf.reshape(Image_3d,[1,in_width,in_height,in_channels])
    pooled_pots4d = tf.layers.max_pooling2d(inputs=Image_4d, pool_size=[2, 2], strides=2)
    pooled_pots3d = tf.squeeze(pooled_pots4d)
    #t1 = time.time()
    pool_res = sess.run(pooled_pots3d)
    #print('TF Time for Pool:{}'.format(time.time()-t1))
    return pool_res


#with tf.device('/device:GPU:1'):
Image = tf.random_uniform([image_size, image_size], name='Image')
weights = tf.random_uniform([kernel_size,kernel_size,nofMaps], name='Weights')
#init = tf.global_variables_initializer
#sess.run(init)
t1 = time.time()
for i in range(150):
    #t1 = time.time()
    conv_result = convolution(Image,weights)
    pool_result = pooling(conv_result)
    #print('TF Time taken:{}'.format(time.time()-t1))
print('TF Time taken:{}'.format(time.time()-t1))
#with tf.device('/device:CPU:0'):
print('TF Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])


def scipy_convolution(Image,weights):
    instant_conv1_pots = np.zeros((image_size-kernel_size+1,image_size-kernel_size+1,nofMaps))
    for i in range(weights.shape[-1]):
        instant_conv1_pots[:,:,i]=signal.correlate(Image,weights[:,:,i],mode='valid',method='fft')
    return instant_conv1_pots

def scipy_pooling(conv1_spikes):
    '''
       Reshape splitting each of the two axes into two each such that the
       latter of the split axes is of the same length as the block size.
       This would give us a 4D array. Then, perform maximum finding along those
       latter axes, which would be the second and fourth axes in that 4D array.
       https://stackoverflow.com/questions/41813722/numpy-array-reshaped-but-how-to-change-axis-for-pooling
    '''
    if(conv1_spikes.shape[0]%2!=0): #if array is odd size then omit the last row and col
        conv1_spikes = conv1_spikes[0:-1,0:-1,:]
    else:
        conv1_spikes = conv1_spikes
    m,n = conv1_spikes[:,:,0].shape
    o   = conv1_spikes.shape[-1]
    pool1_spikes = np.zeros((m/2,n/2,o))
    for i in range(o):
        pool1_spikes[:,:,i]=conv1_spikes[:,:,i].reshape(m/2,2,n/2,2).max(axis=(1,3))
    return pool1_spikes
Image = np.random.rand(image_size,image_size)
weights = np.random.rand(kernel_size,kernel_size,nofMaps)
t1 = time.time()
for i in range(150):
    conv_result = scipy_convolution(Image,weights)
    pool_result = scipy_pooling(conv_result)
print('Scipy Time taken:{}'.format(time.time()-t1))
print('Scipy Pool_result shape:{}'.format(pool_result.shape))
#print('first map of pool result:\n',pool_result[:,:,0])

Вот результаты:

image_size = 27x27
kernel_size = 5x5x30
iterations = 150
TF Time taken:11.0800771713
TF Pool_result shape:(11, 11, 30)
Scipy Time taken:1.4141368866
Scipy Pool_result shape:(11, 11, 30)

image_size = 270x270
kernel_size = 5x5x30
iterations = 150

TF Time taken:26.2359631062
TF Pool_result shape:(133, 133, 30)
Scipy Time taken:31.6651778221
Scipy Pool_result shape:(11, 11, 30)


image_size = 500x500
kernel_size = 5x5x30
iterations = 150

TF Time taken:89.7967050076
TF Pool_result shape:(248, 248, 30)
Scipy Time taken:143.391746044
Scipy Pool_result shape:(248, 248, 30)

Во втором случае вы можете видеть, что я получил сокращение времени примерно на 18%.В третьем случае вы можете заметить, что я сократил время примерно на 38%.

SciPy / Numpy's Pooling / Convolution быстрее, чем Tensorflow Convolution / Pooling?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

SciPy / Numpy's Pooling / Convolution быстрее, чем Tensorflow Convolution / Pooling?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Нет похожих вопросов