Вот решение:
import numpy as np
m = 7
a = np.array([
[0, 1, 2],
[0, 1, 3],
[2, 3, 4],
[4, 5, 6],
# ...
])
print('a:')
print(a)
a_flat = a.flatten() # Or a.ravel() if can modify original array
v1, idx1 = np.unique(a_flat, return_index=True)
a_flat[idx1] = -1
v2, idx2 = np.unique(a_flat, return_index=True)
v2, idx2 = v2[1:], idx2[1:]
rows1, cols1 = np.unravel_index(idx1, a.shape)
rows2, cols2 = np.unravel_index(idx2, a.shape)
b_row = -np.ones((m, 2), dtype=int)
b_col = -np.ones((m, 2), dtype=int)
b_row[v1, 0] = rows1
b_col[v1, 0] = cols1
b_row[v2, 1] = rows2
b_col[v2, 1] = cols2
print('b_row:')
print(b_row)
print('b_col:')
print(b_col)
Выход:
a:
[[0 1 2]
[0 1 3]
[2 3 4]
[4 5 6]]
b_row:
[[ 0 1]
[ 0 1]
[ 0 2]
[ 1 2]
[ 2 3]
[ 3 -1]
[ 3 -1]]
b_col:
[[ 0 0]
[ 1 1]
[ 2 0]
[ 2 1]
[ 2 0]
[ 1 -1]
[ 2 -1]]
EDIT:
Небольшой бенчмарк в IPython для сравнения. Как указано @ eozd , алгоритмическая сложность в принципе выше из-за того, что np.unique
работает в O (n), но векторизованное решение все еще намного быстрее для практических размеров:
import numpy as np
def method_orig(a, m):
b_row = -np.ones((m, 2), dtype=int)
b_col = -np.ones((m, 2), dtype=int)
count = np.zeros(m, dtype=int)
for k, row in enumerate(a):
i = count[row]
b_row[row, i] = k
b_col[row, i] = [0, 1, 2]
count[row] += 1
return b_row, b_col
def method_jdehesa(a, m):
a_flat = a.flatten() # Or a.ravel() if can modify original array
v1, idx1 = np.unique(a_flat, return_index=True)
a_flat[idx1] = -1
v2, idx2 = np.unique(a_flat, return_index=True)
v2, idx2 = v2[1:], idx2[1:]
rows1, cols1 = np.unravel_index(idx1, a.shape)
rows2, cols2 = np.unravel_index(idx2, a.shape)
b_row = -np.ones((m, 2), dtype=int)
b_col = -np.ones((m, 2), dtype=int)
b_row[v1, 0] = rows1
b_col[v1, 0] = cols1
b_row[v2, 1] = rows2
b_col[v2, 1] = cols2
return b_row, b_col
n = 100000
c = 3
m = 200000
# Generate random input
# This does not respect "no doubled indices in row" but is good enough for testing
np.random.seed(100)
a = np.random.permutation(np.concatenate([np.arange(m), np.arange(m)]))[:(n * c)].reshape((n, c))
%timeit method_orig(a, m)
# 3.22 s ± 1.3 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit method_jdehesa(a, m)
# 108 ms ± 764 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)