разные результаты в выводе между Python и C ++ - PullRequest
0 голосов
/ 21 октября 2019

Я делаю повторную идентификацию сети, реализую функцию потери триплета, в этот момент все в порядке. в python сети работают нормально, я реализовал сеть на keras с тензорным потоком в качестве бэкэнда, я передал файл .hd5 в файл .pb, чтобы сделать вывод в тензорном потоке с ++, вероятно, что при одинаковых изображениях разница между питоном ис ++ а я не знаю зачем мне помогать? вот модель в python:

import keras
import keras.applications
import keras.layers as layer
import tensorflow as tf
from keras import backend as K
from keras.backend.tensorflow_backend import set_session
from keras.models import Model as md

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)

class Model:
def init(self, shape):
self.shape = shape
self.params = {
'optimizer': 'sgd',
'first_neuron': 12,
'first_max_pooling': 2,
'second_neuron': 12,
'second_max_pooling': 2,
'third_neuron': 20,
'third_max_pooling': 3,
'dense_neuron': 64,
'final_neuron': 128,
}
self.feature_model = self.create_features_model()
self.triplet_model = self.create_model()

def create_features_model(self):
    # Define the vision modules
    img_input = layer.Input(shape=(self.shape))
    x = layer.Conv2D(self.params['first_neuron'], (3, 3), activation='relu')(img_input)
    x = layer.MaxPooling2D((self.params['first_max_pooling'], self.params['first_max_pooling']))(x)
    x = layer.Conv2D(self.params['second_neuron'], (3, 3), activation='relu')(x)
    x = layer.MaxPooling2D((self.params['second_max_pooling'], self.params['second_max_pooling']))(x)

    x = layer.Conv2D(self.params['third_neuron'], (3, 3), activation='relu')(x)
    x = layer.MaxPooling2D((self.params['third_max_pooling'], self.params['third_max_pooling']))(x)

    x = layer.Flatten()(x)
    x = layer.Dense(self.params['dense_neuron'], activation='relu')(x)
    x = layer.Dense(self.params['final_neuron'], activation='relu')(x)
    out = layer.Lambda(lambda x: K.l2_normalize(x, axis=1), name='t_emb_1_lnorm')(x)
    features_model = md(img_input, out)

    features_model.summary()
    return features_model

def create_model(self):
    base_model = self.feature_model
    # triplet framework, shared weights
    input_shape = (self.shape)
    input_target = layer.Input(shape=input_shape, name='input_target')
    input_positive = layer.Input(shape=input_shape, name='input_pos')
    input_negative = layer.Input(shape=input_shape, name='input_neg')

    net_target = base_model(input_target)
    net_positive = base_model(input_positive)
    net_negative = base_model(input_negative)

    # The Lamda layer produces output using given function. Here its Euclidean distance.
    positive_distance = layer.Lambda(self.euclidean_distance, name='pos_dist')([net_target, net_positive])
    negative_distance = layer.Lambda(self.euclidean_distance, name='neg_dist')([net_target, net_negative])
    diference = layer.Lambda(self.euclidean_distance, name='dif')([net_positive, net_negative])

    # This lambda layer simply stacks outputs so both distances are available to the objective
    distances = layer.Lambda(lambda vects: K.stack(vects, axis=1), name='distance')(
        [positive_distance, negative_distance, diference])

    model = md([input_target, input_positive, input_negative], distances, name='result')

    # Setting up optimizer designed for variable learning rate

    model.compile(optimizer=keras.optimizers.Adam(lr=0.001, decay=0.00002),
                  loss=self.triplet_loss, metrics=[self.accuracy])

    return model

def triplet_loss(self, _, y_pred):
    margin = K.constant(0.5)
    return K.mean(K.maximum(K.constant(0), K.square(y_pred[:, 0, 0]) - 0.5 * (
            K.square(y_pred[:, 1, 0]) + K.square(y_pred[:, 2, 0])) + margin))

def accuracy(self, _, y_pred):
    return K.mean(y_pred[:, 0, 0] < y_pred[:, 1, 0])

def lnorm(self, x):
    return K.l2_normalize(x, axis=-1)

def euclidean_distance(self, vects):
    x, y = vects
    return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))

и вот как я сделал вывод в python:

from model import Model as model
from keras.utils import HDF5Matrix
import numpy as np
import cv2
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)

def load_datasets(in_h5_path, partition='train'):
if partition == 'train':
target = HDF5Matrix(datapath=in_h5_path, dataset="targets")
positive = HDF5Matrix(datapath=in_h5_path, dataset="positives")
negative = HDF5Matrix(datapath=in_h5_path, dataset="negatives")
return target, positive, negative

else:
    print("Invalid 'partition' parameter: Valid values: ['train', 'test']")
tar = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/tra1.png")
nega = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/dec1.png")

tar = cv2.resize(tar, (32, 32), interpolation=cv2.INTER_CUBIC)
nega = cv2.resize(nega, (32, 32), interpolation=cv2.INTER_CUBIC)
t1 = np.array(tar).reshape((1, 32, 32, 3))
t2 = np.array(nega).reshape((1, 32, 32, 3))
target, positive, negative = load_datasets('/home/amejia/PycharmProjects/lossDatasetGenerator/test/test32.h5')
net = model((32, 32, 3))
net.triplet_model.load_weights("/home/amejia/PycharmProjects/triplet_loss/simple-grande.hdf5")
enter = [t1, t2, t1]
a = net.triplet_model.predict(x=enter, batch_size=1)
print(a)

в c ++ вот как я сделал вывод:

tensorflow::Tensor target(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
tensorflow::Tensor positive(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));

img_to_float2(tracks, detections, target, positive, frame);


std::vector<std::pair<std::string, tensorflow::Tensor>> Input = {{"input_target:0", target},
                                                                 {"input_pos:0",    positive},
                                                                 {"input_neg:0",    target}};
std::vector<tensorflow::Tensor> Outputs;

tensorflow::Status Status = session->Run(Input, {"distance/stack:0"}, {}, &Outputs);

auto data = Outputs[0].flat<float>();

std::cout << Outputs[0].DebugString() << std::endl;

и это функция для помещения изображений в тензоры:

void LossModel::img_to_float2(Track &tracks, Detection &detections, tensorflow::Tensor &tracksTensor,
tensorflow::Tensor &detectionsTensor, cv::Mat &frame) {

auto *tar = tracksTensor.flat<float>().data();
auto *dec = detectionsTensor.flat<float>().data();
cv::Mat detectionImg = frame(detections.getBox()).clone();

resize(detectionImg, detectionImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
       cv::INTER_CUBIC);
cv::Mat resizedImage(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, dec);
detectionImg.convertTo(resizedImage, CV_32FC3);

cv::Mat trackImg = tracks.get_img().clone();

resize(trackImg, trackImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
       cv::INTER_CUBIC);
cv::Mat resizedImage2(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, tar);
trackImg.convertTo(resizedImage2, CV_32FC3);
Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...