Я делаю повторную идентификацию сети, реализую функцию потери триплета, в этот момент все в порядке. в python сети работают нормально, я реализовал сеть на keras с тензорным потоком в качестве бэкэнда, я передал файл .hd5 в файл .pb, чтобы сделать вывод в тензорном потоке с ++, вероятно, что при одинаковых изображениях разница между питоном ис ++ а я не знаю зачем мне помогать? вот модель в python:
import keras
import keras.applications
import keras.layers as layer
import tensorflow as tf
from keras import backend as K
from keras.backend.tensorflow_backend import set_session
from keras.models import Model as md
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)
class Model:
def init(self, shape):
self.shape = shape
self.params = {
'optimizer': 'sgd',
'first_neuron': 12,
'first_max_pooling': 2,
'second_neuron': 12,
'second_max_pooling': 2,
'third_neuron': 20,
'third_max_pooling': 3,
'dense_neuron': 64,
'final_neuron': 128,
}
self.feature_model = self.create_features_model()
self.triplet_model = self.create_model()
def create_features_model(self):
# Define the vision modules
img_input = layer.Input(shape=(self.shape))
x = layer.Conv2D(self.params['first_neuron'], (3, 3), activation='relu')(img_input)
x = layer.MaxPooling2D((self.params['first_max_pooling'], self.params['first_max_pooling']))(x)
x = layer.Conv2D(self.params['second_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['second_max_pooling'], self.params['second_max_pooling']))(x)
x = layer.Conv2D(self.params['third_neuron'], (3, 3), activation='relu')(x)
x = layer.MaxPooling2D((self.params['third_max_pooling'], self.params['third_max_pooling']))(x)
x = layer.Flatten()(x)
x = layer.Dense(self.params['dense_neuron'], activation='relu')(x)
x = layer.Dense(self.params['final_neuron'], activation='relu')(x)
out = layer.Lambda(lambda x: K.l2_normalize(x, axis=1), name='t_emb_1_lnorm')(x)
features_model = md(img_input, out)
features_model.summary()
return features_model
def create_model(self):
base_model = self.feature_model
# triplet framework, shared weights
input_shape = (self.shape)
input_target = layer.Input(shape=input_shape, name='input_target')
input_positive = layer.Input(shape=input_shape, name='input_pos')
input_negative = layer.Input(shape=input_shape, name='input_neg')
net_target = base_model(input_target)
net_positive = base_model(input_positive)
net_negative = base_model(input_negative)
# The Lamda layer produces output using given function. Here its Euclidean distance.
positive_distance = layer.Lambda(self.euclidean_distance, name='pos_dist')([net_target, net_positive])
negative_distance = layer.Lambda(self.euclidean_distance, name='neg_dist')([net_target, net_negative])
diference = layer.Lambda(self.euclidean_distance, name='dif')([net_positive, net_negative])
# This lambda layer simply stacks outputs so both distances are available to the objective
distances = layer.Lambda(lambda vects: K.stack(vects, axis=1), name='distance')(
[positive_distance, negative_distance, diference])
model = md([input_target, input_positive, input_negative], distances, name='result')
# Setting up optimizer designed for variable learning rate
model.compile(optimizer=keras.optimizers.Adam(lr=0.001, decay=0.00002),
loss=self.triplet_loss, metrics=[self.accuracy])
return model
def triplet_loss(self, _, y_pred):
margin = K.constant(0.5)
return K.mean(K.maximum(K.constant(0), K.square(y_pred[:, 0, 0]) - 0.5 * (
K.square(y_pred[:, 1, 0]) + K.square(y_pred[:, 2, 0])) + margin))
def accuracy(self, _, y_pred):
return K.mean(y_pred[:, 0, 0] < y_pred[:, 1, 0])
def lnorm(self, x):
return K.l2_normalize(x, axis=-1)
def euclidean_distance(self, vects):
x, y = vects
return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
и вот как я сделал вывод в python:
from model import Model as model
from keras.utils import HDF5Matrix
import numpy as np
import cv2
from keras.backend.tensorflow_backend import set_session
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)
def load_datasets(in_h5_path, partition='train'):
if partition == 'train':
target = HDF5Matrix(datapath=in_h5_path, dataset="targets")
positive = HDF5Matrix(datapath=in_h5_path, dataset="positives")
negative = HDF5Matrix(datapath=in_h5_path, dataset="negatives")
return target, positive, negative
else:
print("Invalid 'partition' parameter: Valid values: ['train', 'test']")
tar = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/tra1.png")
nega = cv2.imread("/home/amejia/PycharmProjects/triplet_loss/dec1.png")
tar = cv2.resize(tar, (32, 32), interpolation=cv2.INTER_CUBIC)
nega = cv2.resize(nega, (32, 32), interpolation=cv2.INTER_CUBIC)
t1 = np.array(tar).reshape((1, 32, 32, 3))
t2 = np.array(nega).reshape((1, 32, 32, 3))
target, positive, negative = load_datasets('/home/amejia/PycharmProjects/lossDatasetGenerator/test/test32.h5')
net = model((32, 32, 3))
net.triplet_model.load_weights("/home/amejia/PycharmProjects/triplet_loss/simple-grande.hdf5")
enter = [t1, t2, t1]
a = net.triplet_model.predict(x=enter, batch_size=1)
print(a)
в c ++ вот как я сделал вывод:
tensorflow::Tensor target(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
tensorflow::Tensor positive(tensorflow::DT_FLOAT,
tensorflow::TensorShape(
{1, image_size, image_size, 3}));
img_to_float2(tracks, detections, target, positive, frame);
std::vector<std::pair<std::string, tensorflow::Tensor>> Input = {{"input_target:0", target},
{"input_pos:0", positive},
{"input_neg:0", target}};
std::vector<tensorflow::Tensor> Outputs;
tensorflow::Status Status = session->Run(Input, {"distance/stack:0"}, {}, &Outputs);
auto data = Outputs[0].flat<float>();
std::cout << Outputs[0].DebugString() << std::endl;
и это функция для помещения изображений в тензоры:
void LossModel::img_to_float2(Track &tracks, Detection &detections, tensorflow::Tensor &tracksTensor,
tensorflow::Tensor &detectionsTensor, cv::Mat &frame) {
auto *tar = tracksTensor.flat<float>().data();
auto *dec = detectionsTensor.flat<float>().data();
cv::Mat detectionImg = frame(detections.getBox()).clone();
resize(detectionImg, detectionImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::INTER_CUBIC);
cv::Mat resizedImage(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, dec);
detectionImg.convertTo(resizedImage, CV_32FC3);
cv::Mat trackImg = tracks.get_img().clone();
resize(trackImg, trackImg, cv::Size(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE), 0, 0,
cv::INTER_CUBIC);
cv::Mat resizedImage2(FEATURES_IMG_SIZE, FEATURES_IMG_SIZE, CV_32FC3, tar);
trackImg.convertTo(resizedImage2, CV_32FC3);