Question

Я реализовал векторное приближение c # к Log. Включает небезопасный код. Он отлично работал в ряде сред, но недавнее развертывание упало. Реализация использует SIMD через библиотеку System.Numerics.Vectors.

К сожалению, я не могу проверить систему, где программное обеспечение не работает. Однако я хотел бы знать, какие предположения относительно библиотеки являются недействительными:

Вектор .Count всегда возвращает степень 2?
Имеет ли Vector .Count == Vector.Count * 2?
Могу ли я взять указатель на вектор с помощью Unsafe.AsPointer, а затем выполнить стандартные операции, как если бы это были N упакованных чисел в памяти?
Существуют ли какие-либо процессоры, на которых работает dotNet 4, с другим порядком байтов или они не хранят числа с плавающей запятой в формате IEEE754?

Код выглядит следующим образом:

const float invLn2 = 1.44269504089f; // 1 / ln(2)
        const float pow2_126 = 8.5070592e+37f; //2^126

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public static Vector<float> QuickLog2(Vector<float> vecOrig)
        {
            //32 bit Float specification:
            //Leftmost bit is sign bit.
            //Next 8 bits are exponent
            //Next 23 bits are mantissa
            unsafe
            {
                var ints = Vector.AsVectorUInt32(vecOrig);

                var exponents = Vector.BitwiseAnd(ints, new Vector<uint>(0x7F800000));
                BitshiftVector23(Unsafe.AsPointer(ref exponents));

                var unsignedExponents = exponents - new Vector<uint>(127);
                var signedExponents = Vector.AsVectorInt32(unsignedExponents);
                var localMantissBitmask = Vector.AsVectorSingle(new Vector<UInt32>(0x807FFFFF));
                var maskedMantissas = Vector.BitwiseAnd(vecOrig, localMantissBitmask);
                var mantissas = maskedMantissas * new Vector<float>(pow2_126);

                var mantissasLogged = LogPolynomialFunction2(mantissas) * new Vector<float>(invLn2);

                Vector<float> floatExponents;
#if false
                floatExponents = Vector.ConvertToSingle(signedExponents);               
#else
                ConvertIntToFloatInPace(Unsafe.AsPointer(ref signedExponents));
                floatExponents = Vector.AsVectorSingle(signedExponents);
#endif

                return mantissasLogged + floatExponents;
            }
        }

        const float log10_2 = 0.30102999566398119521373889472449f;
        /// <summary>
        /// A vectorized implementation of Log10(N). Uses bitshift, bitmasks, and unsafe code.
        /// Does not have the same safety as Math.Log10: Behaviour for infities, zero, negative numbers are undefined.
        /// </summary>
        /// <param name="vec">The vector to take the log of</param>
        /// <returns>The log, to the base 10, of the vector</returns>
        /// <remarks>
        /// Accurate to about 10^-7, which is the limit of a 32 bit float anyway.
        /// In my (BS) tests, takes about twice as long to run on as Math.Log10(...), but operates on 8 numbers,
        /// so 4x faster.
        /// Reverts to Math.Log10(...) if vectors are not hardware accelerated. 
        /// Given the extra memory copies required, that will be much slower than using scalar code.
        /// It'll be nice once intrinsics make it into dotNet and we can replace this with a single instruction...
        /// </remarks>
        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        public static Vector<float> QuickLog10(Vector<float> vec)
        {
            if (Vector.IsHardwareAccelerated)
                return QuickLog2(vec) * new Vector<float>(log10_2);
            else
            {
                float[] tmp = new float[Vector<float>.Count];
                vec.CopyTo(tmp);
                for (int i = 0; i < Vector<float>.Count; i++)
                    tmp[i] = (float)Math.Log10(tmp[i]);
                return new Vector<float>(tmp);
            }
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static unsafe void BitshiftVector23(void* vector)
        {
            UInt64* asUlong = (UInt64*)vector;
            if (Vector<UInt64>.Count == 4)
            {
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
            }
            else if (Vector<UInt64>.Count == 8)
            {
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
                asUlong++;
                *asUlong = *asUlong >> 23;
            }
            else
                for (int i = 0; i < Vector<UInt64>.Count; i++)
                    asUlong[i] = asUlong[i] >> 23;
        }

        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static unsafe void ConvertIntToFloatInPace(void* vector)
        {
            int* asInt = (int*)vector;
            if (Vector<int>.Count == 8)
            {
                *(float*)asInt = *asInt;
                asInt++;
                *(float*)asInt = *asInt;
                asInt++;
                *(float*)asInt = *asInt;
                asInt++;
                *(float*)asInt = *asInt;
                asInt++;
                *(float*)asInt = *asInt;
                asInt++;
                *(float*)asInt = *asInt;
                asInt++;
                *(float*)asInt = *asInt;
                asInt++;
                *(float*)asInt = *asInt;
                asInt++;
            }
            else if (Vector<UInt64>.Count == 16)
            {
                for (int i = 0; i < 2; i++)
                {
                    *(float*)asInt = *asInt;
                    asInt++;
                    *(float*)asInt = *asInt;
                    asInt++;
                    *(float*)asInt = *asInt;
                    asInt++;
                    *(float*)asInt = *asInt;
                    asInt++;
                    *(float*)asInt = *asInt;
                    asInt++;
                    *(float*)asInt = *asInt;
                    asInt++;
                    *(float*)asInt = *asInt;
                    asInt++;
                    *(float*)asInt = *asInt;
                    asInt++;
                }
            }
            else
                for (int i = 0; i < Vector<UInt64>.Count; i++)
                {
                    *(float*)asInt = *asInt;
                    asInt++;
                }
        }


        [MethodImpl(MethodImplOptions.AggressiveInlining)]
        private static Vector<float> LogPolynomialFunction2(Vector<float> mantissas)
        {
            var zm1 = mantissas;
            var zp1 = mantissas + new Vector<float>(2f);
            var zm1Divzp1 = Vector.Divide(zm1, zp1);
            var squared = zm1Divzp1 * zm1Divzp1;
            var cur = zm1Divzp1;

            //Manual loop unwinding:
#if false
                var mantissasLogged = Vector<float>.Zero;
                for (float i = 0; i < 4; i++)
                {
                    var fac = 2f / (2f * i + 1f);
                    mantissasLogged += cur * new Vector<float>(fac);
                    cur *= squared;
                }
#else
            //i = 0;
            const float fac0 = 2f / (2 * 0 + 1);
            var mantissasLogged = cur * new Vector<float>(fac0);
            cur *= squared;

            //i = 1;
            const float fac1 = 2f / (2 * 1 + 1);
            mantissasLogged += cur * new Vector<float>(fac1);
            cur *= squared;

            //i = 2;
            const float fac2 = 2f / (2 * 2 + 1);
            mantissasLogged += cur * new Vector<float>(fac2);
            cur *= squared;

            //i = 3;
            const float fac3 = 2f / (2 * 3 + 1);
            mantissasLogged += cur * new Vector<float>(fac3);
            cur *= squared;

            //i = 4;
            const float fac4 = 2f / (2 * 4 + 1);
            mantissasLogged += cur * new Vector<float>(fac4);
#endif
            return mantissasLogged;
        }

РЕДАКТИРОВАТЬ: Я поместил несколько простых тестов в программу при запуске. Vector.IsHardwareAccelerated == true; Vector.Count == 4; Этот векторизованный журнал дает правильный ответ для первых двух входов, но неверный для вторых двух. Возможно, предположение, что Unsafe.AsPointer (Vector) дает мне указатель на векторные элементы, поскольку четыре последовательных числа с плавающей точкой неверны.

Выходные данные журнала:

DEBUG Vector.IsHardwareAccelerated: True 
DEBUG Vector<float>.Count: 4 
DEBUG Vector<Uint64>.Count: 2 
DEBUG MathUtils test input data: 5.967E+009,1.072E+006,9.521E+017,4.726E+000 
DEBUG MathUtils required output: 9.776,6.030,17.979,0.674 
DEBUG MathUtils actual output: 9.776,6.030,0.218,0.072

(еще не было возможности проверить битовые комбинации ...)

TheGeneral · Answer 1 · 18 июня 2019

IEEE 754 Стандарт с плавающей запятой не определяет порядковый номер, это определенно может быть проблемой здесь (в зависимости от того, на чем вы работаете)

Вы можете использовать BitConverter.IsLittleEndian и соответственно изменяются

Указывает порядок байтов («порядковый номер»), в котором хранятся данные в этой компьютерной архитектуре.

Какие гарантии дает System.Numerics.Vectors относительно размера и порядка следования битов?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Какие гарантии дает System.Numerics.Vectors относительно размера и порядка следования битов?

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

1 Ответ

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы