IL
/// non-SIMD fallback implementation for 128-bit right-shift (unsigned)
/// n: number of bit positions to right-shift a 16-byte memory image.
/// Vector(T) argument 'v' is passed by-ref and modified in-situ.
/// Layout order of the two 64-bit quads is little-endian.
.method public static void SHR(Vector_T<uint64>& v, int32 n) aggressiveinlining
{
ldarg v
dup
dup
ldc.i4.8
add
ldind.i8
ldc.i4.s 64
ldarg n
sub
shl
ldarg v
ldind.i8
ldarg n
shr.un
or
stind.i8
ldc.i4.8
add
dup
ldind.i8
ldarg n
shr.un
stind.i8
ret
}
псевдокод
As<Vector<ulong>,ulong>(ref v) = (As<Vector<ulong>,ulong>(in v) >> n) |
(ByteOffsAs<Vector<ulong>,ulong>(in v, 8) << (64 - n));
ByteOffsAs<Vector<ulong>,ulong>(ref v, 8) >>= n;
C # внешняя декларация
static class vector_ext
{
[MethodImpl(MethodImplOptions.ForwardRef | MethodImplOptions.AggressiveInlining)]
extern public static void SHR(ref Vector<ulong> v, int n);
};
Вы можете связать промежуточные .netmodule двоичные файлы, полученные из IL (ildasm.exe
) и C # (csc.exe
) вместе в одну сборку, используя/LTCG
(генерация кода времени ссылки) в link.exe
.
время выполнения x64 JIT (.NET Framework 4.7.2)
0x7FF878F5C7E0 48 89 4C 24 08 mov qword ptr [rsp+8],rcx
0x7FF878F5C7E5 8B C2 mov eax,edx
0x7FF878F5C7E7 F7 D8 neg eax
0x7FF878F5C7E9 8D 48 40 lea ecx,[rax+40h]
0x7FF878F5C7EC 48 8B 44 24 08 mov rax,qword ptr [rsp+8]
0x7FF878F5C7F1 4C 8B 40 08 mov r8,qword ptr [rax+8]
0x7FF878F5C7F5 49 D3 E0 shl r8,cl
0x7FF878F5C7F8 4C 8B 08 mov r9,qword ptr [rax]
0x7FF878F5C7FB 8B CA mov ecx,edx
0x7FF878F5C7FD 49 D3 E9 shr r9,cl
0x7FF878F5C800 4D 0B C1 or r8,r9
0x7FF878F5C803 4C 89 00 mov qword ptr [rax],r8
0x7FF878F5C806 48 83 C0 08 add rax,8
0x7FF878F5C80A 8B CA mov ecx,edx
0x7FF878F5C80C 48 D3 28 shr qword ptr [rax],cl
0x7FF878F5C80F C3 ret