Мое намерение состоит в том, чтобы ускорить процесс обучения, используя веса от предыдущих обученных моделей к аналогичным слоям в обучении новой модели.
Скажем, у меня есть две модели: первая модель и вторая модель.
1-я модель - поезд с нуля.
2-я модель имеет два дополнительных слоя (conv_1_1 and relu_1_1)
отличие от 1-й модели. Остальные слои одинаковы.
Так что мне нравится распространять обратно только те слои, они отличаются от первой модели, и те же слои, что и для первой модели, не проходят переподготовку. Тогда намерение состоит в том, чтобы ускорить обучение 2-й модели.
Для этого я устанавливаю lr_mult and decay_mult
в 0.
Но выяснилось, что время тренировки 2-й модели все еще такое же, как у 1-й модели.
Я думаю, что они по-прежнему умножаются на 0, поэтому, хотя веса не обновляются, вычисления все еще там.
Как вообще пропустить backpropagation
в тех же слоях?
Я проверил файл журнала и нашел как
I0830 13:40:26.546422 10580 net.cpp:226] mbox_loss needs backward computation.
I0830 13:40:26.546432 10580 net.cpp:228] mbox_priorbox does not need backward computation.
I0830 13:40:26.546437 10580 net.cpp:226] mbox_conf needs backward computation.
I0830 13:40:26.546440 10580 net.cpp:226] mbox_loc needs backward computation.
I0830 13:40:26.546444 10580 net.cpp:228] conv_6_norm_mbox_priorbox does not need backward computation.
I0830 13:40:26.546448 10580 net.cpp:226] conv_6_norm_mbox_conf_flat needs backward computation.
I0830 13:40:26.546452 10580 net.cpp:226] conv_6_norm_mbox_conf_perm needs backward computation.
I0830 13:40:26.546455 10580 net.cpp:226] conv_6_norm_mbox_conf needs backward computation.
I0830 13:40:26.546460 10580 net.cpp:226] conv_6_norm_mbox_loc_flat needs backward computation.
I0830 13:40:26.546464 10580 net.cpp:226] conv_6_norm_mbox_loc_perm needs backward computation.
I0830 13:40:26.546468 10580 net.cpp:226] conv_6_norm_mbox_loc needs backward computation.
I0830 13:40:26.546471 10580 net.cpp:226] conv_6_norm_conv_6_norm_0_split needs backward computation.
I0830 13:40:26.546475 10580 net.cpp:226] conv_6_norm needs backward computation.
I0830 13:40:26.546478 10580 net.cpp:226] relu_6 needs backward computation.
I0830 13:40:26.546481 10580 net.cpp:226] conv_6 needs backward computation.
I0830 13:40:26.546485 10580 net.cpp:226] pool_5 needs backward computation.
I0830 13:40:26.546489 10580 net.cpp:226] relu_5 needs backward computation.
I0830 13:40:26.546492 10580 net.cpp:226] conv_5 needs backward computation.
I0830 13:40:26.546495 10580 net.cpp:226] pool_4 needs backward computation.
I0830 13:40:26.546499 10580 net.cpp:226] relu_4 needs backward computation.
I0830 13:40:26.546502 10580 net.cpp:226] conv_4 needs backward computation.
I0830 13:40:26.546505 10580 net.cpp:226] pool_3 needs backward computation.
I0830 13:40:26.546509 10580 net.cpp:226] relu_3 needs backward computation.
I0830 13:40:26.546512 10580 net.cpp:226] conv_3 needs backward computation.
I0830 13:40:26.546515 10580 net.cpp:226] pool_2 needs backward computation.
I0830 13:40:26.546519 10580 net.cpp:226] relu_2 needs backward computation.
I0830 13:40:26.546522 10580 net.cpp:226] conv_2 needs backward computation.
I0830 13:40:26.546525 10580 net.cpp:226] pool_1 needs backward computation.
I0830 13:40:26.546530 10580 net.cpp:226] relu_1_1 needs backward computation.
I0830 13:40:26.546532 10580 net.cpp:226] conv_1_1 needs backward computation.
I0830 13:40:26.546536 10580 net.cpp:228] relu_1 does not need backward computation.
I0830 13:40:26.546540 10580 net.cpp:228] conv_1 does not need backward computation.
I0830 13:40:26.546545 10580 net.cpp:228] data_data_0_split does not need backward computation.
I0830 13:40:26.546548 10580 net.cpp:228] data does not need backward computation.
Таким образом, только conv_1 and relu_1
не распространяется обратно. Но другие слои все еще распространяются обратно.
Как отключить обратное распространение в следующих слоях
conv_6_norm_conv_6_norm_0_split, conv_6_norm, relu_6, conv_6, pool_5, relu_5, conv_5, pool_4, relu_4, conv_4,pool_3, relu_3, conv_3, pool_2, relu_2, conv_2
Файл Train.prototxt выглядит следующим образом.
name: "RegNet_train_0"
layer {
name: "data"
type: "AnnotatedData"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mirror: true
mean_value: 104.0
mean_value: 117.0
mean_value: 123.0
resize_param {
prob: 1.0
resize_mode: WARP
height: 480
width: 480
interp_mode: LINEAR
interp_mode: AREA
interp_mode: NEAREST
interp_mode: CUBIC
interp_mode: LANCZOS4
height_scale: 480
width_scale: 480
}
emit_constraint {
emit_type: CENTER
}
distort_param {
brightness_prob: 0.5
brightness_delta: 32.0
contrast_prob: 0.5
contrast_lower: 0.5
contrast_upper: 1.5
hue_prob: 0.5
hue_delta: 18.0
saturation_prob: 0.5
saturation_lower: 0.5
saturation_upper: 1.5
random_order_prob: 0.0
}
expand_param {
prob: 0.5
max_expand_ratio: 4.0
}
}
data_param {
source: "/home/coie/data/NumberPlate/lmdb/Nextan_trainval_lmdb"
batch_size: 16
backend: LMDB
}
annotated_data_param {
batch_sampler {
max_sample: 1
max_trials: 1
}
batch_sampler {
sampler {
min_scale: 0.300000011921
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.10000000149
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.300000011921
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.300000011921
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.300000011921
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.5
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.300000011921
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.699999988079
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.300000011921
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
min_jaccard_overlap: 0.899999976158
}
max_sample: 1
max_trials: 50
}
batch_sampler {
sampler {
min_scale: 0.300000011921
max_scale: 1.0
min_aspect_ratio: 0.5
max_aspect_ratio: 2.0
}
sample_constraint {
max_jaccard_overlap: 1.0
}
max_sample: 1
max_trials: 50
}
label_map_file: "/home/coie/data/NumberPlate/labelmap_NumberPlate.prototxt"
}
}
layer {
name: "conv_1"
type: "Convolution"
bottom: "data"
top: "conv_1"
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 8
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu_1"
type: "ReLU"
bottom: "conv_1"
top: "conv_1"
}
layer {
name: "conv_1_1"
type: "Convolution"
bottom: "conv_1"
top: "conv_1_1"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 8
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu_1_1"
type: "ReLU"
bottom: "conv_1_1"
top: "conv_1_1"
}
layer {
name: "pool_1"
type: "Pooling"
bottom: "conv_1_1"
top: "pool_1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv_2"
type: "Convolution"
bottom: "pool_1"
top: "conv_2"
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 8
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu_2"
type: "ReLU"
bottom: "conv_2"
top: "conv_2"
}
layer {
name: "pool_2"
type: "Pooling"
bottom: "conv_2"
top: "pool_2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv_3"
type: "Convolution"
bottom: "pool_2"
top: "conv_3"
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu_3"
type: "ReLU"
bottom: "conv_3"
top: "conv_3"
}
layer {
name: "pool_3"
type: "Pooling"
bottom: "conv_3"
top: "pool_3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv_4"
type: "Convolution"
bottom: "pool_3"
top: "conv_4"
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 16
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu_4"
type: "ReLU"
bottom: "conv_4"
top: "conv_4"
}
layer {
name: "pool_4"
type: "Pooling"
bottom: "conv_4"
top: "pool_4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv_5"
type: "Convolution"
bottom: "pool_4"
top: "conv_5"
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 32
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu_5"
type: "ReLU"
bottom: "conv_5"
top: "conv_5"
}
layer {
name: "pool_5"
type: "Pooling"
bottom: "conv_5"
top: "pool_5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv_6"
type: "Convolution"
bottom: "pool_5"
top: "conv_6"
param {
lr_mult: 0.0
decay_mult: 0.0
}
param {
lr_mult: 0.0
decay_mult: 0.0
}
convolution_param {
num_output: 32
pad: 1
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "relu_6"
type: "ReLU"
bottom: "conv_6"
top: "conv_6"
}
layer {
name: "conv_6_norm"
type: "Normalize"
bottom: "conv_6"
top: "conv_6_norm"
norm_param {
across_spatial: false
scale_filler {
type: "constant"
value: 20.0
}
channel_shared: false
}
}
layer {
name: "conv_6_norm_mbox_loc"
type: "Convolution"
bottom: "conv_6_norm"
top: "conv_6_norm_mbox_loc"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 12
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv_6_norm_mbox_loc_perm"
type: "Permute"
bottom: "conv_6_norm_mbox_loc"
top: "conv_6_norm_mbox_loc_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv_6_norm_mbox_loc_flat"
type: "Flatten"
bottom: "conv_6_norm_mbox_loc_perm"
top: "conv_6_norm_mbox_loc_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv_6_norm_mbox_conf"
type: "Convolution"
bottom: "conv_6_norm"
top: "conv_6_norm_mbox_conf"
param {
lr_mult: 1.0
decay_mult: 1.0
}
param {
lr_mult: 2.0
decay_mult: 0.0
}
convolution_param {
num_output: 6
pad: 1
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.0
}
}
}
layer {
name: "conv_6_norm_mbox_conf_perm"
type: "Permute"
bottom: "conv_6_norm_mbox_conf"
top: "conv_6_norm_mbox_conf_perm"
permute_param {
order: 0
order: 2
order: 3
order: 1
}
}
layer {
name: "conv_6_norm_mbox_conf_flat"
type: "Flatten"
bottom: "conv_6_norm_mbox_conf_perm"
top: "conv_6_norm_mbox_conf_flat"
flatten_param {
axis: 1
}
}
layer {
name: "conv_6_norm_mbox_priorbox"
type: "PriorBox"
bottom: "conv_6_norm"
bottom: "data"
top: "conv_6_norm_mbox_priorbox"
prior_box_param {
min_size: 25.6000003815
max_size: 48.0
aspect_ratio: 3.0
flip: false
clip: false
variance: 0.10000000149
variance: 0.10000000149
variance: 0.20000000298
variance: 0.20000000298
img_size: 480
step: 32.0
offset: 0.5
}
}
layer {
name: "mbox_loc"
type: "Concat"
bottom: "conv_6_norm_mbox_loc_flat"
top: "mbox_loc"
concat_param {
axis: 1
}
}
layer {
name: "mbox_conf"
type: "Concat"
bottom: "conv_6_norm_mbox_conf_flat"
top: "mbox_conf"
concat_param {
axis: 1
}
}
layer {
name: "mbox_priorbox"
type: "Concat"
bottom: "conv_6_norm_mbox_priorbox"
top: "mbox_priorbox"
concat_param {
axis: 2
}
}
layer {
name: "mbox_loss"
type: "MultiBoxLoss"
bottom: "mbox_loc"
bottom: "mbox_conf"
bottom: "mbox_priorbox"
bottom: "label"
top: "mbox_loss"
include {
phase: TRAIN
}
propagate_down: true
propagate_down: true
propagate_down: false
propagate_down: false
loss_param {
normalization: VALID
}
multibox_loss_param {
loc_loss_type: SMOOTH_L1
conf_loss_type: SOFTMAX
loc_weight: 1.0
num_classes: 2
share_location: true
match_type: PER_PREDICTION
overlap_threshold: 0.5
use_prior_for_matching: true
background_label_id: 1
use_difficult_gt: true
neg_pos_ratio: 3.0
neg_overlap: 0.5
code_type: CENTER_SIZE
ignore_cross_boundary_bbox: false
mining_type: MAX_NEGATIVE
}
}