Вот подход, основанный на БПФ / свертке, который минимизирует евклидово расстояние:
import numpy as np
from numpy import fft
N = 184
n = 46
pad = 192
def best_offs(A,a):
A,a = A.astype(float),a.astype(float)
Ap,ap = (np.zeros((pad,pad)) for _ in "Aa")
Ap[:N,:N] = A
ap[:n,:n] = a
sim = fft.irfft2(fft.rfft2(ap).conj()*fft.rfft2(Ap))[:N-n+1,:N-n+1]
Ap[:N,:N] = A*A
ap[:n,:n] = 1
ref = fft.irfft2(fft.rfft2(ap).conj()*fft.rfft2(Ap))[:N-n+1,:N-n+1]
return np.unravel_index((ref-2*sim).argmin(),sim.shape)
# example
# random picture
A = np.random.randint(0,256,(N,N),dtype=np.uint8)
# random offset
offy,offx = np.random.randint(0,N-n+1,2)
# sub pic at random offset
# randomly flip half of the least significant 75% of all bits
a = A[offy:offy+n,offx:offx+n] ^ np.random.randint(0,64,(n,n))
# reconstruct offset
oyrec,oxrec = best_offs(A,a)
assert offy==oyrec and offx==oxrec
# speed?
from timeit import timeit
print(timeit(lambda:best_offs(A,a),number=100)*10,"ms")
# example with zero a
a[...] = 0
# make A smaller in a matching subsquare
A[offy:offy+n,offx:offx+n]>>=1
# reconstruct offset
oyrec,oxrec = best_offs(A,a)
assert offy==oyrec and offx==oxrec
Примерный прогон:
3.458537160186097 ms