Если представление с эффективным использованием памяти действительно невозможно (что, вероятно, имеет место), NumPy предлагает np.pad()
, который должен быть настолько эффективным, насколько это возможно.
Хотя этот параметр обеспечивает большую гибкость вывода, поддерживая ряд опций заполнения, а не только циклический - до mode='wrap'
, это кажется сравнительно медленным для этого варианта использования, и код может быть выполнен быстрее в ряде пути.
Наилучший компромисс в скорости и эффективности использования памяти - использование результата np.tile()
после подходящего np.roll()
(cyclic_padding_tile_roll()
). Обратите внимание, что шаг np.roll()
может быть пропущен (cyclic_padding_tile()
) за счет потенциально большего объема памяти, что также может снизить общую производительность.
В качестве альтернативы, эффективная для памяти и, как правило, быстрая реализация может быть получена с помощью нарезки (cyclic_padding_slicing()
), которая может быть значительно медленнее, чем другие подходы, как только базовая фигура содержится много раз в целевой фигуре.
Это код для решений, которые я тестировал.
Если не указано иное, все они должны работать для произвольных измерений.
Тот же базовый код используется для подготовки offsets
, используя тот факт, что:
import numpy as np
import functools
import itertools
def prod(items):
return functools.reduce(lambda x, y: x * y, base_shape)
def reduce_offsets(offsets, shape, direct=True):
offsets = tuple(
(offset if direct else (dim - offset)) % dim
for offset, dim in zip(offsets, shape))
return offsets
с использованием индексации (оригинальный подход):
def cyclic_padding_loops(arr, shape, offsets):
offsets = reduce_offsets(offsets, arr.shape)
result = np.zeros(shape, dtype=arr.dtype)
for i in itertools.product(*tuple(range(dim) for dim in result.shape)):
slicing = tuple(
(j + k) % dim
for j, k, dim in zip(i, offsets, arr.shape))
result[i] = arr[slicing]
return result
с использованием np.tile()
только
(используется тот же подход, что и @Divakar, но работает для произвольных измерений):
def cyclic_padding_tile(arr, shape, offsets):
offsets = reduce_offsets(offsets, arr.shape)
tiling = tuple(
new_dim // dim + (1 if new_dim % dim else 0) + (1 if offset else 0)
for offset, dim, new_dim in zip(offsets, arr.shape, shape))
slicing = tuple(
slice(offset, offset + new_dim)
for offset, new_dim in zip(offsets, shape))
result = np.tile(arr, tiling)[slicing]
return result
с использованием np.tile()
и np.roll()
:
def cyclic_padding_tile_roll(arr, shape, offsets):
offsets = reduce_offsets(offsets, arr.shape, False)
tiling = tuple(
new_dim // dim + (1 if new_dim % dim else 0)
for offset, dim, new_dim in zip(offsets, arr.shape, shape))
slicing = tuple(slice(0, new_dim) for new_dim in shape)
if any(offset != 0 for offset in offsets):
nonzero_offsets_axes, nonzero_offsets = tuple(zip(
*((axis, offset) for axis, offset in enumerate(offsets)
if offset != 0)))
arr = np.roll(arr, nonzero_offsets, nonzero_offsets_axes)
result = np.tile(arr, tiling)[slicing]
return result
с использованием np.pad()
только :
def cyclic_padding_pad(arr, shape, offsets):
offsets = reduce_offsets(offsets, arr.shape, False)
width = tuple(
(offset, new_dim - dim - offset)
for dim, new_dim, offset in zip(arr.shape, offsets))
result = np.pad(arr, width, mode='wrap')
return result
с использованием np.pad()
и np.roll()
:
def cyclic_padding_pad_roll(arr, shape, offsets):
offsets = reduce_offsets(offsets, arr.shape, False)
width = tuple(
(0, new_dim - dim)
for dim, new_dim, offset in zip(arr.shape, shape, offsets))
if any(offset != 0 for offset in offsets):
nonzero_offsets_axes, nonzero_offsets = tuple(zip(
*((axis, offset) for axis, offset in enumerate(offsets)
if offset != 0)))
arr = np.roll(arr, nonzero_offsets, nonzero_offsets_axes)
result = np.pad(arr, width, mode='wrap')
return result
с использованием цикла срезов :
def cyclic_padding_slicing(arr, shape, offsets):
offsets = reduce_offsets(offsets, arr.shape)
views = tuple(
tuple(
slice(max(0, dim * i - offset), dim * (i + 1) - offset)
for i in range((new_dim + offset) // dim))
+ (slice(dim * ((new_dim + offset) // dim) - offset, new_dim),)
for offset, dim, new_dim in zip(offsets, arr.shape, shape))
views = tuple(
tuple(slice_ for slice_ in view if slice_.start < slice_.stop)
for view in views)
result = np.zeros(shape, dtype=arr.dtype)
for view in itertools.product(*views):
slicing = tuple(
slice(None)
if slice_.stop - slice_.start == dim else (
slice(offset, offset + (slice_.stop - slice_.start))
if slice_.start == 0 else
slice(0, (slice_.stop - slice_.start)))
for slice_, offset, dim in zip(view, offsets, arr.shape))
result[view] = arr[slicing]
return result
с использованием шагов
(это, по существу, реализация @ B.M., адаптированная для n-dim входов):
def cyclic_padding_strides(arr, shape, offsets):
offsets = reduce_offsets(offsets, arr.shape)
chunks = tuple(
new_dim // dim + (1 if new_dim % dim else 0) + (1 if offset else 0)
for dim, new_dim, offset in zip(arr.shape, shape, offsets))
inner_shape = tuple(
x for chunk, dim in zip(chunks, arr.shape) for x in (chunk, dim))
outer_shape = tuple(
(chunk * dim) for chunk, dim in zip(chunks, arr.shape))
inner_strides = tuple(x for stride in arr.strides for x in (0, stride))
# outer_strides = tuple(x for stride in arr.strides for x in (0, stride))
slicing = tuple(
slice(offset, offset + new_dim)
for offset, new_dim in zip(offsets, shape))
result = np.lib.stride_tricks.as_strided(
arr, inner_shape, inner_strides, writeable=False).reshape(outer_shape)
result = result[slicing]
return result
Этот код используется для тестирования:
def test_cyclic_paddings(base_shape, shape, offsets, cyclic_paddings):
print('Base Shape: {}, Shape: {}, Offset: {}'.format(base_shape, shape, offsets))
arr = np.arange(prod(base_shape)).reshape(base_shape) + 1
ref_result = cyclic_paddings[0](arr, shape, offsets)
for cyclic_padding in cyclic_paddings:
test_result = cyclic_padding(arr, shape, offsets)
result = np.all(ref_result == test_result)
if not result:
print(ref_result)
print(test_result)
print(': {:24s} {:4s} '.format(cyclic_padding.__name__, 'OK' if result else 'FAIL'), end='')
timeit_result = %timeit -o cyclic_padding(arr, shape, offsets)
cyclic_nd_paddings = (
cyclic_padding_tile,
cyclic_padding_tile_roll,
cyclic_padding_pad,
cyclic_padding_pad_roll,
cyclic_padding_slicing,
cyclic_padding_loops,
cyclic_padding_strides,
)
inputs = (
((2, 3), (5, 7), (0, 0)),
((2, 3), (5, 7), (0, 1)),
((2, 3), (5, 7), (1, 1)),
((2, 3), (41, 43), (1, 1)),
((2, 3, 4, 5), (7, 11, 13, 17), (1, 2, 3, 4)),
((2, 3, 4, 5), (23, 31, 41, 53), (1, 2, 3, 4)),
((8, 8), (100, 100), (5, 7)),
((80, 80), (8000, 8000), (53, 73)),
((800, 800), (9000, 9000), (53, 73)),
)
for (base_shape, shape, offsets) in inputs:
test_cyclic_paddings(base_shape, shape, offsets, cyclic_nd_paddings)
print()
Для разных входов я получаю следующие результаты:
# Base Shape: (2, 3), Shape: (5, 7), Offset: (0, 0)
# : cyclic_padding_tile OK 6.54 µs ± 70.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# : cyclic_padding_tile_roll OK 6.75 µs ± 29.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# : cyclic_padding_pad OK 40.6 µs ± 2.44 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad_roll OK 42 µs ± 4.49 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_slicing OK 23 µs ± 693 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_loops OK 34.7 µs ± 727 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_strides OK 13.2 µs ± 210 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# Base Shape: (2, 3), Shape: (5, 7), Offset: (0, 1)
# : cyclic_padding_tile OK 6.5 µs ± 223 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# : cyclic_padding_tile_roll OK 19.8 µs ± 394 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad OK 35.4 µs ± 329 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad_roll OK 58 µs ± 579 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_slicing OK 23.3 µs ± 321 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_loops OK 33.7 µs ± 280 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_strides OK 13.2 µs ± 194 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# Base Shape: (2, 3), Shape: (5, 7), Offset: (1, 1)
# : cyclic_padding_tile OK 6.68 µs ± 138 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# : cyclic_padding_tile_roll OK 23.2 µs ± 334 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad OK 30.7 µs ± 236 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad_roll OK 62.9 µs ± 1 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_slicing OK 23.5 µs ± 266 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_loops OK 34.6 µs ± 544 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_strides OK 13.1 µs ± 104 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# Base Shape: (2, 3), Shape: (41, 43), Offset: (1, 1)
# : cyclic_padding_tile OK 8.92 µs ± 63.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# : cyclic_padding_tile_roll OK 25.2 µs ± 185 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad OK 60.7 µs ± 450 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad_roll OK 82.2 µs ± 656 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_slicing OK 510 µs ± 1.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
# : cyclic_padding_loops OK 1.57 ms ± 26.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
# : cyclic_padding_strides OK 18.2 µs ± 639 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# Base Shape: (2, 3, 4, 5), Shape: (7, 11, 13, 17), Offset: (1, 2, 3, 4)
# : cyclic_padding_tile OK 89 µs ± 3.18 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_tile_roll OK 81.3 µs ± 1.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad OK 106 µs ± 2.77 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad_roll OK 148 µs ± 9.02 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_slicing OK 977 µs ± 8.11 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
# : cyclic_padding_loops OK 18.8 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# : cyclic_padding_strides OK 101 µs ± 1.86 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# Base Shape: (2, 3, 4, 5), Shape: (23, 31, 41, 53), Offset: (1, 2, 3, 4)
# : cyclic_padding_tile OK 2.8 ms ± 112 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# : cyclic_padding_tile_roll OK 2.05 ms ± 28.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
# : cyclic_padding_pad OK 6.35 ms ± 237 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# : cyclic_padding_pad_roll OK 5.81 ms ± 172 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# : cyclic_padding_slicing OK 40.4 ms ± 838 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# : cyclic_padding_loops OK 1.71 s ± 44.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_strides OK 3 ms ± 64.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# Base Shape: (8, 8), Shape: (100, 100), Offset: (5, 7)
# : cyclic_padding_tile OK 16.3 µs ± 901 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
# : cyclic_padding_tile_roll OK 32.6 µs ± 151 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad OK 65.6 µs ± 229 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_pad_roll OK 88.9 µs ± 1.05 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# : cyclic_padding_slicing OK 333 µs ± 1.86 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
# : cyclic_padding_loops OK 8.71 ms ± 58.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
# : cyclic_padding_strides OK 25.1 µs ± 255 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# Base Shape: (80, 80), Shape: (8000, 8000), Offset: (53, 73)
# : cyclic_padding_tile OK 148 ms ± 325 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# : cyclic_padding_tile_roll OK 151 ms ± 1.51 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
# : cyclic_padding_pad OK 443 ms ± 9.42 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_pad_roll OK 442 ms ± 8.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_slicing OK 182 ms ± 469 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# : cyclic_padding_loops OK 58.8 s ± 256 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_strides OK 150 ms ± 534 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
# Base Shape: (800, 800), Shape: (9000, 9000), Offset: (53, 73)
# : cyclic_padding_tile OK 269 ms ± 1.11 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_tile_roll OK 234 ms ± 1.39 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_pad OK 591 ms ± 3.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_pad_roll OK 582 ms ± 4.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_slicing OK 250 ms ± 4.43 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_loops OK 1min 17s ± 855 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# : cyclic_padding_strides OK 280 ms ± 2.28 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)