Вы можете сделать это так:
import tensorflow as tf
A = tf.placeholder(tf.float32, [3, 28, 28, 16])
B = tf.placeholder(tf.float32, [3, 2, 16])
A_exp = A[:, tf.newaxis]
B_exp = B[:, :, tf.newaxis, tf.newaxis]
C = tf.reduce_sum(A_exp * B_exp, axis=-1)
print(C)
# Tensor("Sum:0", shape=(3, 2, 28, 28), dtype=float32)