Это было довольно сложно, но вы можете сделать это без циклов и сохранить относительный порядок в исходном массиве примерно так (в этом случае сохраняются первые повторения):
import numpy as np
def drop_extra_repetitions(x, max_reps):
# Find unique rows
uniq, idx_inv, counts = np.unique(x, axis=0, return_inverse=True, return_counts=True)
# Compute number of repetitions of each different row
counts_clip = np.minimum(counts, max_reps)
# Array alternating between valid unique row indices and -1 ([0, -1, 1, -1, ...])
idx_to_repeat = np.stack(
[np.arange(len(uniq)), -np.ones(len(uniq), dtype=int)], axis=1).ravel()
# Number of repetitions for each of the previous indices
idx_repeats_clip = np.stack([counts_clip, counts - counts_clip], axis=1).ravel()
# Valid unique row indices are repetead at most max_reps,
# extra repetitions are filled with -1
idx_clip_sorted = np.repeat(idx_to_repeat, idx_repeats_clip)
# Sorter for inverse index - that is, sort the indices in the input array
# according to their corresponding unique row index
sorter = np.argsort(idx_inv)
# The final inverse index is the same as the original but with -1 on extra repetitions
idx_inv_final = np.empty(len(sorter), dtype=int)
idx_inv_final[sorter] = idx_clip_sorted
# Return the array reconstructed from the inverse index without the positions with -1
return uniq[idx_inv_final[idx_inv_final >= 0]]
x = [[5, 5, 5], [1, 2, 3], [1, 2, 3], [5, 5, 5], [1, 2, 3], [1, 2, 3]]
max_reps = 2
print(drop_extra_repetitions(x, max_reps))
# [[5 5 5]
# [1 2 3]
# [1 2 3]
# [5 5 5]]
Если вам вообще не нужно сохранять порядок, то вы можете просто сделать:
import numpy as np
def drop_extra_repetitions(x, max_reps):
uniq, counts = np.unique(x, axis=0, return_counts=True)
# Repeat each unique row index at most max_reps
ret_idx = np.repeat(np.arange(len(uniq)), np.minimum(counts, max_reps))
return uniq[ret_idx]
x = [[5, 5, 5], [1, 2, 3], [1, 2, 3], [5, 5, 5], [1, 2, 3], [1, 2, 3]]
max_reps = 2
print(drop_extra_repetitions(x, max_reps))
# [[1 2 3]
# [1 2 3]
# [5 5 5]
# [5 5 5]]