Я пытаюсь переобучить модель (Pointnet ++), добавив один дополнительный класс (т.е. моя модель была обучена с 6 классами, но теперь я добавляю еще один класс, так что теперь общее количество классов равно 7). Я посмотрел на похожие вопросы через inte rnet, но я не могу понять, в чем проблема в моем коде. Может кто-нибудь, пожалуйста, направьте меня .. Ниже приведен код для справки
Определение модели в файле с именем MODEL:
def placeholder_inputs(batch_size, num_point):
pointclouds_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point, 3))
labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
smpws_pl = tf.placeholder(tf.float32, shape=(batch_size, num_point))
return pointclouds_pl, labels_pl, smpws_pl
def get_model(point_cloud, is_training, num_class, bn_decay=None):
""" Semantic segmentation PointNet, input is BxNx3, output Bxnum_class """
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
end_points = {}
l0_xyz = point_cloud
l0_points = None
end_points['l0_xyz'] = l0_xyz
# Layer 1
l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz, l0_points, npoint=1024, radius=2, nsample=32, mlp=[32,32,64], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer1')
l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz, l1_points, npoint=256, radius=4, nsample=32, mlp=[64,64,128], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer2')
l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz, l2_points, npoint=64, radius=8, nsample=32, mlp=[128,128,256], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer3')
l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz, l3_points, npoint=16, radius=16, nsample=32, mlp=[256,256,512], mlp2=None, group_all=False, is_training=is_training, bn_decay=bn_decay, scope='layer4')
# Feature Propagation layers
l3_points = pointnet_fp_module(l3_xyz, l4_xyz, l3_points, l4_points, [256,256], is_training, bn_decay, scope='fa_layer1')
l2_points = pointnet_fp_module(l2_xyz, l3_xyz, l2_points, l3_points, [256,256], is_training, bn_decay, scope='fa_layer2')
l1_points = pointnet_fp_module(l1_xyz, l2_xyz, l1_points, l2_points, [256,128], is_training, bn_decay, scope='fa_layer3')
l0_points = pointnet_fp_module(l0_xyz, l1_xyz, l0_points, l1_points, [128,128,128], is_training, bn_decay, scope='fa_layer4')
# FC layers
net = tf_util.conv1d(l0_points, 128, 1, padding='VALID', bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay)
end_points['feats'] = net
net = tf_util.dropout(net, keep_prob=0.5, is_training=is_training, scope='dp1')
net = tf_util.conv1d(net, num_class, 1, padding='VALID', activation_fn=None, scope='fc2')
return net, end_points
def get_loss(pred, label, smpw):
""" pred: BxNxC,
label: BxN,
smpw: BxN """
classify_loss = tf.losses.sparse_softmax_cross_entropy(labels=label, logits=pred, weights=smpw)
tf.summary.scalar('classify loss', classify_loss)
tf.add_to_collection('losses', classify_loss)
return classify_loss
Код, где я пытаюсь Чтобы восстановить и переобучить модель:
def train():
with tf.Graph().as_default():
with tf.device('/gpu:'+str(GPU_INDEX)):
pointclouds_pl, labels_pl, smpws_pl = MODEL.placeholder_inputs(BATCH_SIZE, NUM_POINT)
is_training_pl = tf.placeholder(tf.bool, shape=())
print (is_training_pl)
batch = tf.Variable(0)
bn_decay = get_bn_decay(batch)
tf.summary.scalar('bn_decay', bn_decay)
print ("--- Get model and loss")
# Get model and loss
pred, end_points = MODEL.get_model(pointclouds_pl, is_training_pl, NUM_CLASSES, bn_decay=bn_decay)
loss = MODEL.get_loss(pred, labels_pl, smpws_pl)
tf.summary.scalar('loss', loss)
correct = tf.equal(tf.argmax(pred, 2), tf.to_int64(labels_pl))
accuracy = tf.reduce_sum(tf.cast(correct, tf.float32)) / float(BATCH_SIZE*NUM_POINT)
tf.summary.scalar('accuracy', accuracy)
print("--- Get training operator")
# Get training operator
learning_rate = get_learning_rate(batch)
tf.summary.scalar('learning_rate', learning_rate)
if OPTIMIZER == 'momentum':
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM)
elif OPTIMIZER == 'adam':
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss, global_step=batch)
# Add ops to save and restore all the variables.
saver = tf.train.Saver()
# Create a session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
config.log_device_placement = False
sess = tf.Session(config=config)
# Add summary writers
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'), sess.graph)
test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'), sess.graph)
#sess.run(init, {is_training_pl: True})
###############################################################################################
log_string("Model restoring.")
saver.restore(sess, MODEL_PATH)
log_string("Model restored.") ###################################################################
# Init variables
init = tf.global_variables_initializer()
sess.run(init)
ops = {'pointclouds_pl': pointclouds_pl,
'labels_pl': labels_pl,
'smpws_pl': smpws_pl,
'is_training_pl': is_training_pl,
'pred': pred,
'loss': loss,
'train_op': train_op,
'merged': merged,
'step': batch,
'end_points': end_points}
best_acc = -1
for epoch in range(MAX_EPOCH):
log_string('**** EPOCH %03d ****' % (epoch))
sys.stdout.flush()
train_one_epoch(sess, ops, train_writer, TRAIN_DATASET)
if epoch%5==0:
acc = eval_one_epoch(sess, ops, test_writer, TEST_DATASET)
acc = eval_whole_scene_one_epoch(sess, ops, test_writer, TEST_DATASET_WHOLE_SCENE)
if acc > best_acc:
best_acc = acc
save_path = saver.save(sess, os.path.join(LOG_DIR, "best_model_retrained_epoch_%03d"%(epoch)))
log_string("Model saved in file: %s" % save_path)
# Save the variables to disk.
if epoch % 10 == 0:
save_path = saver.save(sess, os.path.join(LOG_DIR, "model_retrained"))
log_string("Model saved in file: %s" % save_path)
Когда я запустил приведенный выше код, я получил ошибку ниже:
Caused by op 'save/Assign_143', defined at:
File "train_2_retrain_4.1_water.py", line 448, in <module>
train()
File "train_2_retrain_4.1_water.py", line 144, in train
saver = tf.train.Saver()
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line
1338, in __init__
self.build()
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line
1347, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line
1384, in _build
build_save=build_save, build_restore=build_restore)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line
835, in _build_internal
restore_sequentially, reshape)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line
494, in _AddRestoreOps
assign_ops.append(saveable.restore(saveable_tensors, shapes))
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line
185, in restore
self.op.get_shape().is_fully_defined())
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line
283, in assign
validate_shape=validate_shape)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 60, in assign
use_locking=use_locking, name=name)
File "/home/ubuntu/anaconda3/lib/python3.6/site-
packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/home/ubuntu/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Assign requires shapes of both tensors to match. lhs shape= [7] rhs shape= [6]
[[Node: save/Assign_143 = Assign[T=DT_FLOAT, _class=["loc:@fc2/biases"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](fc2/biases, save/RestoreV2:143)]]
[[Node: save/RestoreV2/_267 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_508_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
Любая помощь очень ценится.