Как я могу заставить мой скрипт tenorflow работать на моем GPU? - PullRequest
0 голосов
/ 16 мая 2018

Я пытаюсь обучить DNN, используя следующий скрипт:

import numpy as np
import os, sys
import argparse
from PIL import Image
from freeze_graph import freeze_graph
import tensorflow as tf
import time

from net import *
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), "./"))
from custom_vgg16 import *


# gram matrix per layer
def gram_matrix(x):
    assert isinstance(x, tf.Tensor)
    b, h, w, ch = x.get_shape().as_list()
    features = tf.reshape(x, [b, h*w, ch])
    # gram = tf.batch_matmul(features, features, adj_x=True)/tf.constant(ch*w*h, tf.float32)
    gram = tf.matmul(features, features, adjoint_a=True)/tf.constant(ch*w*h, tf.float32)
    return gram

# total variation denoising
def total_variation_regularization(x, beta=1):
    assert isinstance(x, tf.Tensor)
    wh = tf.constant([[[[ 1], [ 1], [ 1]]], [[[-1], [-1], [-1]]]], tf.float32)
    ww = tf.constant([[[[ 1], [ 1], [ 1]], [[-1], [-1], [-1]]]], tf.float32)
    tvh = lambda x: conv2d(x, wh, p='SAME')
    tvw = lambda x: conv2d(x, ww, p='SAME')
    dh = tvh(x)
    dw = tvw(x)
    tv = (tf.add(tf.reduce_sum(dh**2, [1, 2, 3]), tf.reduce_sum(dw**2, [1, 2, 3]))) ** (beta / 2.)
    return tv

parser = argparse.ArgumentParser(description='Real-time style transfer')
parser.add_argument('--gpu', '-g', default=-1, type=int,
                    help='GPU ID (negative value indicates CPU)')
parser.add_argument('--dataset', '-d', default='dataset', type=str,
                    help='dataset directory path (according to the paper, use MSCOCO 80k images)')
parser.add_argument('--style_image', '-s', type=str, required=True,
                    help='style image path')
parser.add_argument('--batchsize', '-b', type=int, default=1,
                    help='batch size (default value is 1)')
parser.add_argument('--ckpt', '-c', default=None, type=int,
                    help='the global step of checkpoint file desired to restore.')
parser.add_argument('--lambda_tv', '-l_tv', default=10e-4, type=float,
                    help='weight of total variation regularization according to the paper to be set between 10e-4 and 10e-6.')
parser.add_argument('--lambda_feat', '-l_feat', default=1e0, type=float)
parser.add_argument('--lambda_style', '-l_style', default=1e1, type=float)
parser.add_argument('--epoch', '-e', default=2, type=int)
parser.add_argument('--lr', '-l', default=1e-3, type=float)
parser.add_argument('--pb', '-pb', default=True, type=bool, help='save a pb format as well.')
args = parser.parse_args()

data_dict = loadWeightsData('./vgg16.npy')

batchsize = args.batchsize
gpu = args.gpu
dataset = args.dataset
epochs = args.epoch
learning_rate = args.lr
ckpt = args.ckpt
lambda_tv = args.lambda_tv
lambda_f = args.lambda_feat
lambda_s = args.lambda_style
style_image = args.style_image
save_pb = args.pb
gpu = args.gpu

style_name, _ = os.path.splitext(style_image.split(os.sep)[-1])

fpath = os.listdir(args.dataset)
imagepaths = []
for fn in fpath:
    base, ext = os.path.splitext(fn)
    if ext == '.jpg' or ext == '.png':
        imagepath = os.path.join(dataset, fn)
        imagepaths.append(imagepath)
data_len = len(imagepaths)
iterations = int(data_len / batchsize)
print ('Number of traning images: {}'.format(data_len))
print ('{} epochs, {} iterations per epoch'.format(epochs, iterations))

style_np = np.asarray(Image.open(style_image).convert('RGB').resize((224, 224)), dtype=np.float32)
styles_np = [style_np for x in range(batchsize)]


if gpu > -1:
    device = '/gpu:{}'.format(gpu)
else:
    device = '/cpu:0'

with tf.device(device):

    inputs = tf.placeholder(tf.float32, shape=[batchsize, 224, 224, 3], name='input')
    net = FastStyleNet()
    saver = tf.train.Saver(restore_sequentially=True)
    saver_def = saver.as_saver_def()


    target = tf.placeholder(tf.float32, shape=[batchsize, 224, 224, 3])
    outputs = net(inputs)

    # style target feature
    # compute gram maxtrix of style target
    vgg_s = custom_Vgg16(target, data_dict=data_dict)
    feature_ = [vgg_s.conv1_2, vgg_s.conv2_2, vgg_s.conv3_3, vgg_s.conv4_3, vgg_s.conv5_3]
    gram_ = [gram_matrix(l) for l in feature_]

    # content target feature 
    vgg_c = custom_Vgg16(inputs, data_dict=data_dict)
    feature_ = [vgg_c.conv1_2, vgg_c.conv2_2, vgg_c.conv3_3, vgg_c.conv4_3, vgg_c.conv5_3]

    # feature after transformation 
    vgg = custom_Vgg16(outputs, data_dict=data_dict)
    feature = [vgg.conv1_2, vgg.conv2_2, vgg.conv3_3, vgg.conv4_3, vgg.conv5_3]

    # compute feature loss
    loss_f = tf.zeros(batchsize, tf.float32)
    for f, f_ in zip(feature, feature_):
        loss_f += lambda_f * tf.reduce_mean(tf.subtract(f, f_) ** 2, [1, 2, 3])

    # compute style loss
    gram = [gram_matrix(l) for l in feature]
    loss_s = tf.zeros(batchsize, tf.float32)
    for g, g_ in zip(gram, gram_):
        loss_s += lambda_s * tf.reduce_mean(tf.subtract(g, g_) ** 2, [1, 2])

    # total variation denoising
    loss_tv = lambda_tv * total_variation_regularization(outputs)

    # total loss
    loss = loss_s + loss_f + loss_tv

    # optimizer
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:

    ckpt_directory = './ckpts/{}/'.format(style_name)
    if not os.path.exists(ckpt_directory):
        os.makedirs(ckpt_directory)

    # training
    tf.global_variables_initializer().run()

    if ckpt:
        if ckpt < 0:
            checkpoint = tf.train.get_checkpoint_state(ckpt_directory)
            input_checkpoint = checkpoint.model_checkpoint_path
        else:
            input_checkpoint =  ckpt_directory + style_name + '-{}'.format(ckpt)
        saver.restore(sess, input_checkpoint)
        print ('Checkpoint {} restored.'.format(ckpt))

    for epoch in range(1, epochs + 1):
        imgs = np.zeros((batchsize, 224, 224, 3), dtype=np.float32)
        for i in range(iterations):
            for j in range(batchsize):
                p = imagepaths[i * batchsize + j]
                imgs[j] = np.asarray(Image.open(p).convert('RGB').resize((224, 224)), np.float32)
            feed_dict = {inputs: imgs, target: styles_np}
            loss_, _= sess.run([loss, train_step,], feed_dict=feed_dict)
            print('[epoch {}/{}] batch {}/{}... loss: {}'.format(epoch, epochs, i + 1, iterations, loss_[0]))    
        saver.save(sess, ckpt_directory + style_name, global_step=epoch)

if save_pb:
    if not os.path.exists('./pbs'):
        os.makedirs('./pbs')
    freeze_graph(ckpt_directory, './pbs/{}.pb'.format(style_name), 'output')

и когда я бегу, он обучает его на изображениях (сейчас я использую только одно изображение, чтобы заставить весь процесс работать) и выводит это в командной строке:

D:\myName\tensorflow-fast-neuralstyle>python train.py -s picasso.jpg -d trainTest -g 0
C:\ProgramData\Anaconda3\lib\site-packages\h5py\__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Number of traning images: 1
2 epochs, 1 iterations per epoch
2018-05-16 18:47:33.268196: I T:\src\github\tensorflow\tensorflow\core\platform\cpu_feature_guard.cc:140] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2
2018-05-16 18:47:33.582973: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1356] Found device 0 with properties:
name: GeForce GTX 1070 major: 6 minor: 1 memoryClockRate(GHz): 1.645
pciBusID: 0000:01:00.0
totalMemory: 8.00GiB freeMemory: 6.63GiB
2018-05-16 18:47:33.590004: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1435] Adding visible gpu devices: 0
2018-05-16 18:47:34.243696: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-05-16 18:47:34.247206: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:929]      0
2018-05-16 18:47:34.249841: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:942] 0:   N
2018-05-16 18:47:34.252015: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6405 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
[epoch 1/2] batch 1/1... loss: 32216618.0
[epoch 2/2] batch 1/1... loss: 27523674.0
2018-05-16 18:47:55.451428: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1435] Adding visible gpu devices: 0
2018-05-16 18:47:55.456462: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:923] Device interconnect StreamExecutor with strength 1 edge matrix:
2018-05-16 18:47:55.462478: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:929]      0
2018-05-16 18:47:55.465806: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:942] 0:   N
2018-05-16 18:47:55.468555: I T:\src\github\tensorflow\tensorflow\core\common_runtime\gpu\gpu_device.cc:1053] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 6405 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)

Что нормально, пока я не получу эту ошибку:

InvalidArgumentError (see above for traceback): Cannot assign a device for operation 'save/SaveV2': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
Registered kernels:
  device='CPU'

Кажется, что сценарий может найти мой GPU для запуска, но что-то мешает его завершению, чего я не понимаю. Все остальные сообщения об этой ошибке, скажем, устанавливают для аргумента allow_soft_placement значение True, но в этом сценарии оно уже есть.

Любая помощь будет высоко ценится. Спасибо!

p.s обученная модель будет использоваться этим файлом generate.py

import numpy as np
import argparse
import tensorflow as tf
import os
from PIL import Image

parser = argparse.ArgumentParser(description='Real-time style transfer image generator')
parser.add_argument('--input', '-i', type=str, help='content image')
parser.add_argument('--gpu', '-g', default=-1, type=int,
                    help='GPU ID (negative value indicates CPU)')
parser.add_argument('--style', '-s', default=None, type=str, help='style model name')
parser.add_argument('--ckpt', '-c', default=-1, type=int, help='checkpoint to be loaded')
parser.add_argument('--out', '-o', default='stylized_image.jpg', type=str, help='stylized image\'s name')
parser.add_argument('--pb', '-pb', default=False, type=bool, help='load with pb')

args = parser.parse_args()

if not os.path.exists('./images/output/'):
        os.makedirs('./images/output/')

outfile_path = './images/output/' + args.out
content_image_path = args.input
style_name = args.style
ckpt = args.ckpt
load_with_pb = args.pb
gpu = args.gpu

original_image = Image.open(content_image_path).convert('RGB')

img = np.asarray(original_image.resize((224, 224)), dtype=np.float32)
shaped_input = img.reshape((1,) + img.shape)

if gpu > -1:
    device = '/gpu:{}'.format(gpu)
else:
    device = '/cpu:0'


with tf.device(device):
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if load_with_pb:
            from tensorflow.core.framework import graph_pb2
            graph_def = graph_pb2.GraphDef()
            with open('./pbs/{}.pb'.format(style_name), "rb") as f:
                graph_def.ParseFromString(f.read())
            input_image, output = tf.import_graph_def(graph_def, return_elements=['input:0', 'output:0'])

        else:
            if ckpt < 0:
                checkpoint = tf.train.get_checkpoint_state('./ckpts/{}/'.format(style_name))
                input_checkpoint = checkpoint.model_checkpoint_path
            else:
                input_checkpoint = './ckpts/{}/{}-{}'.format(style_name, style_name, ckpt)
            saver = tf.train.import_meta_graph(input_checkpoint + '.meta')
            saver.restore(sess, input_checkpoint)
            graph = tf.get_default_graph()

            input_image = graph.get_tensor_by_name('input:0')
            output = graph.get_tensor_by_name('output:0')

        out = sess.run(output, feed_dict={input_image: shaped_input})

out = out.reshape((out.shape[1:]))
im = Image.fromarray(np.uint8(out))

im = im.resize(original_image.size, resample=Image.LANCZOS)
im.save(outfile_path)

1 Ответ

0 голосов
/ 16 мая 2018

Узлы ввода / вывода, такие как tf.train.Saver, не могут быть размещены на графическом процессоре.Создайте их вне вашего tf.device контекста после создания вашей сети.

...