Я пытаюсь создать программу переноса стилей, и я пытаюсь использовать GradientTape для решения проблемы оптимизации. Но когда я приступаю к обучению, при вычислении общих потерь значение всегда равно 0. Я новичок в тензорном потоке, но насколько я понимаю, активное выполнение позволяет немедленно оценивать операции. Если бы вы могли помочь лучше понять, что я делаю неправильно, было бы здорово.
%tensorflow_version 2.x
import tensorflow as tf
tf.executing_eagerly()
import numpy as np
from PIL import Image
import requests
from io import BytesIO
from keras.applications.vgg19 import VGG19
from keras.applications.vgg19 import preprocess_input
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.models import Model
import keras.backend
from matplotlib import pyplot as plt
from numpy import expand_dims
from tensorflow import GradientTape
CHANNELS = 3
IMAGE_SIZE = 500
IMAGE_WIDTH = IMAGE_SIZE
IMAGE_HEIGHT = IMAGE_SIZE
CONTENT_WEIGHT = 0.02
STYLE_WEIGHT = 4.5
TOTAL_VARIATION_WEIGHT = 0.995
TOTAL_VARIATION_LOSS_FACTOR = 1.25
MEAN = np.array([103.939, 116.779, 123.68])
CONTENT_LAYERS = ['block4_conv2']
STYLE_LAYERS = ['block1_conv1', 'block2_conv1', 'block3_conv1', 'block4_conv1', 'block5_conv1']
input_image_path = "input.png"
style_image_path = "style.png"
output_image_path = "output.png"
combined_image_path = "combined.png"
san_francisco_image_path = "https://www.economist.com/sites/default/files/images/print-edition/20180602_USP001_0.jpg"
tytus_image_path = "http://meetingbenches.com/wp-content/flagallery/tytus-brzozowski-polish-architect-and-watercolorist-a-fairy-tale-in-warsaw/tytus_brzozowski_13.jpg"
input_image = Image.open(BytesIO(requests.get(san_francisco_image_path).content))
input_image = input_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
input_image.save(input_image_path)
#input_image
# Style visualization
style_image = Image.open(BytesIO(requests.get(tytus_image_path).content))
style_image = style_image.resize((IMAGE_WIDTH, IMAGE_HEIGHT))
style_image.save(style_image_path)
#style_image
#make model
def obter_modelo():
modelo = VGG19(include_top = False, weights = 'imagenet', input_tensor = None)
#modelo.trainable = False
c_layer = CONTENT_LAYERS
s_layers = STYLE_LAYERS
output_layers = [modelo.get_layer(layer).output for layer in (c_layer + s_layers)]
return Model(modelo.inputs, output_layers)
def processar_imagem(img):
imagem = img.resize((IMAGE_HEIGHT, IMAGE_WIDTH))
imagem = img_to_array(imagem)
imagem = preprocess_input(imagem)
imagem = expand_dims(imagem, axis=0)
return imagem
def desprocessar_imagem(img):
imagem = img
mean = MEAN
imagem[..., 0] += mean[0]
imagem[..., 1] += mean[1]
imagem[..., 2] += mean[2]
imagem = imagem[..., ::-1]
return imagem.astype(int)
def content_loss(c_mat, out_mat):
return 0.5 * tf.math.reduce_sum(tf.math.square(out_mat - c_mat))
def matriz_gram(mat):
return tf.matmul(mat,tf.transpose(mat))
def style_loss(s_mat, out_mat):
style_feat = keras.backend.batch_flatten(tf.transpose(s_mat,perm = (2,0,1)))
output_feat = keras.backend.batch_flatten(tf.transpose(out_mat, perm = (2,0,1)))
style_gram = matriz_gram(style_feat)
output_gram = matriz_gram(output_feat)
return tf.math.reduce_sum(tf.math.square(style_gram - output_gram)) / (4.0 * (CHANNELS ** 2) * (IMAGE_SIZE ** 2))
def total_loss(c_layer, s_layers, out_layers):
content_layer = c_layer[0]
out_content = out_layers[0]
style_layers = s_layers[1:]
out_style = out_layers[1:]
c_loss = content_loss(content_layer[0], out_content[0])
s_loss = None
for i in range(len(style_layers)):
if s_loss is None:
s_loss = style_loss(style_layers[i][0], out_style[i][0])
else:
s_loss += style_loss(style_layers[i][0], out_style[i][0])
return CONTENT_WEIGHT * c_loss + (STYLE_WEIGHT * s_loss)/len(style_layers)
modelo = obter_modelo()
#content image
content_processado = processar_imagem(input_image)
content_feats = modelo(tf.constant(content_processado))
#style image
style_processado = processar_imagem(style_image)
style_feats = modelo(tf.constant(style_processado))
#output image
output_processado = np.random.rand(IMAGE_HEIGHT, IMAGE_WIDTH,CHANNELS)*255
output_processado = tf.Variable(expand_dims(preprocess_input(output_processado), axis=0))
optimizer = tf.optimizers.Adam(5,beta_1=.99,epsilon=1e-3)
epochs=30
melhor_loss = 2000000.0
melhor_imagem = None
min_value = MEAN
max_value = 255 + MEAN
for e in range(epochs):
with tf.GradientTape() as tape:
tape.watch(output_processado)
output_feats = modelo(tf.dtypes.cast(output_processado, tf.float32))
loss = total_loss(content_feats, style_feats, output_feats)
grad = tape.gradient(loss, output_processado)
print(loss)
optimizer.apply_gradients(zip([grad],[output_processado]))
clip = tf.clip_by_value(output_processado, min_value, max_value)
output_processado.assign(clip)
print("Iteracao: " + str(e) )
# if loss < melhor_loss:
# melhor_imagem = output_processado
# melhor_loss = loss
#print(" Loss diminui para " + str(K.get_value(melhor_loss)))