Question

Исходный код fletcher16 приводится примерно так: wikipedia .

uint16_t Fletcher16( uint8_t* data, int count )
{
   uint16_t sum1 = 0;
   uint16_t sum2 = 0;
   int index;

   for( index = 0; index < count; ++index )
   {
      sum1 = (sum1 + data[index]) % 255;
      sum2 = (sum2 + sum1) % 255;
   }

   return (sum2 << 8) | sum1;
}

Из этого примера я реализовал 64-битную версию следующим образом.

unsigned long Fletcher64( unsigned int* data, int count )
{
   unsigned long sum1 = 0;
   unsigned long sum2 = 0;
   int index;

   for( index = 0; index < count; ++index )
   {
      sum1 = (sum1 + data[index]) % UINT_MAX; // UINT_MAX = 2^32
      sum2 = (sum2 + sum1) % UINT_MAX;
   }

   return (sum2 << 32) | sum1;
}

Мой подход правильный, или я что-то не так делаю?

wallyk · Answer 1 · 01 марта 2012

Большинство алгоритмов хэширования принимают параметры блока памяти и количество байтов (или иногда битов) для хэширования.Вы изменили счетчик байтов на счетчик слов - довольно значительное изменение.

Кроме того, поскольку вы изменили размер вычисления, изменился способ вычисления sum2.Это нормально, если вы не пытаетесь реплицировать исходный алгоритм значение-значение-значение, но если это подразумевается как совместимая оптимизация, то это, вероятно, неправильно.

Bim · Answer 2 · 07 января 2017

Это неправильно.Вам будет не хватать 0-3 байта конечных данных, потому что вы только суммируете DWORD-ы в данных, но у вас могут остаться некоторые байты, в зависимости от размера данных.Я бы предложил эти реализации :

#include <iostream>
#include <inttypes.h>

uint64_t fletcher64_A(const uint8_t * data, uint64_t count)
{
    // calculate how many full double words the input has
    uint64_t dwords = count / 4;
    // now calculate the flechter-64 checksum from double words
    uint64_t sum1 = 0;
    uint64_t sum2 = 0;
    const uint32_t * data32 = reinterpret_cast<const uint32_t*>(data);
    for (uint64_t index = 0; index < dwords; ++index) 
    {
        sum1 = (sum1 + data32[index]) % UINT32_MAX;
        sum2 = (sum2 + sum1) % UINT32_MAX;
    }
    // calculate how many extra bytes, that do not fit into a double word, the input has
    uint64_t remainingBytes = count - dwords * 4;
    if (remainingBytes > 0)
    {
        // copy the excess bytes to our dummy variable. you could use memcpy here...
        uint32_t dummy = 0;
        for (uint64_t index = 0; index < remainingBytes; ++index)
        {
            reinterpret_cast<uint8_t*>(&dummy)[index] = data[dwords * 4 + index];
        }
        // now add the dummy on top
        sum1 = (sum1 + dummy) % UINT32_MAX;
        sum2 = (sum2 + sum1) % UINT32_MAX;
    }
    // build final checksum
    return (sum2 << 32) | sum1;
}

uint64_t fletcher64_B(const uint8_t * data, uint64_t count)
{
    // calculate how many full double words the input has
    uint64_t dwords = count / 4;
    // now calculate the flechter-64 checksum from double words
    uint32_t sum1 = 0;
    uint32_t sum2 = 0;
    const uint32_t * data32 = reinterpret_cast<const uint32_t*>(data);
    for (uint64_t index = 0; index < dwords; ++index) 
    {
        sum1 += data32[index];
        sum2 += sum1;
    }
    // calculate how many extra bytes, that do not fit into a double word, the input has
    uint64_t remainingBytes = count - dwords * 4;
    if (remainingBytes > 0)
    {
        // copy the excess bytes to our dummy variable. you could use memcpy here...
        uint32_t dummy = 0;
        for (uint64_t index = 0; index < remainingBytes; ++index)
        {
            reinterpret_cast<uint8_t*>(&dummy)[index] = data[dwords * 4 + index];
        }
        // now add the dummy on top
        sum1 += dummy;
        sum2 += sum1;
    }
    // build final checksum
    return ((uint64_t)sum2 << 32) | sum1;
}

int main() {
    const std::string data = "abcdefghijklmnopqrstuvwxyz0123456789helloworld!";
    uint64_t ca = fletcher64_A(reinterpret_cast<const uint8_t*>(data.data()), data.size());
    uint64_t cb = fletcher64_B(reinterpret_cast<const uint8_t*>(data.data()), data.size());
    std::cout << "String size: " << data.size() << ", checksum a: 0x" << std::hex << ca << ", checksum b: 0x" << cb << std::endl;
    return 0;
}

, которые учитывают завершающие байты.fletcher64_A является простой реализацией, а flechter64_B выполняет модуль по описанию Mecki.Вы можете видеть, что обе реализации выдают одинаковые значения (они не видят, см. Редактирование).

РЕДАКТИРОВАТЬ # 1: Это не совсем правильно, как прокомментировал Томас Темпельманн.Постараюсь исправить алгоритм в какой-то момент.Вы можете проверить, когда суммирование в алгоритме fletcher переполнится здесь .Это более или менее соответствует тому, что написано в википедии.

РЕДАКТИРОВАТЬ # 2: Я выполнил тест, используя шаблонизированную версию наивного и оптимизированного алгоритма.Скомпилируйте и запустите его, используя "g ++ -std = c ++ 11 -O2 -Wall -pedantic test.cpp && ./a.out".Вывод заключается в том, что 32-разрядная версия работает, но не 16- и 64-разрядные версии.Буду признателен, если вы найдете ошибку и комментарий.@Thomas Tempelmann: оптимизированная версия примерно в 6 раз быстрее на моем компьютере!

#include <iostream>
#include <inttypes.h>
#include <random>
#include <algorithm>
#include <chrono>
#include <limits>

uint64_t test_overflow(uint64_t start, uint64_t add, uint64_t check)
{
    uint64_t count = 0;
    uint64_t sum1 = start;
    uint64_t sum2 = start;
    do
    {
        sum2 += sum1 += add;
        count++;
    } while (sum1 + add < check && sum2 + (sum1 + add) < check);
    return count;
}

template <class T, class R>
T fletcherTA(const uint8_t * data, const T & count, const T & start)
{
    // calculate how many full R-words the input has
    T rwords = count / sizeof(R);
    // calculate how many extra bytes, that do not fit into an R-word, the input has
    T remainingBytes = count - rwords * sizeof(R);
    // now calculate the flechter-T checksum from R-words
    T sum1 = start & std::numeric_limits<R>::max();
    T sum2 = start >> (sizeof(R)*8);
    const R * dataR = reinterpret_cast<const R*>(data);
    while (rwords)
    {
        rwords--;
        sum1 = (sum1 + *dataR++) % std::numeric_limits<R>::max();
        sum2 = (sum2 + sum1) % std::numeric_limits<R>::max();
    }
    if (remainingBytes > 0)
    {
        // copy the excess bytes to our dummy variable. you could use memcpy here...
        R dummy = 0;
        const uint8_t * data8 = reinterpret_cast<const uint8_t*>(dataR);
        for (uint64_t index = 0; index < remainingBytes; ++index)
        {
            reinterpret_cast<uint8_t*>(&dummy)[index] = data8[index];
        }
        // now add the dummy on top
        sum1 = (sum1 + dummy) % std::numeric_limits<R>::max();
        sum2 = (sum2 + sum1) % std::numeric_limits<R>::max();
    }
    // build final checksum
    return (sum2 << sizeof(R)*8) | sum1;
}

template <class T, class R, T overflowAfter>
T fletcherTB(const uint8_t * data, const T & count, const T & start)
{
    // calculate how many full R-words the input has
    T rwords = count / sizeof(R);
    // calculate how many extra bytes, that do not fit into an R-word, the input has
    T remainingBytes = count - rwords * sizeof(R);
    // now calculate the flechter-T checksum from R-words
    T sum1 = start & std::numeric_limits<R>::max();
    T sum2 = start >> (sizeof(R)*8);
    const R * dataR = reinterpret_cast<const R*>(data);
    while (rwords)
    {
        T tlen = ((rwords >= overflowAfter) ? overflowAfter : rwords);     
        rwords -= tlen;
        do 
        {
            sum2 += sum1 += *dataR++;
            tlen--;
        } while (tlen);
        sum1 = (sum1 & std::numeric_limits<R>::max()) + (sum1 >> (sizeof(R)*8));
        sum2 = (sum2 & std::numeric_limits<R>::max()) + (sum2 >> (sizeof(R)*8));
    }
    if (remainingBytes > 0)
    {
        // copy the excess bytes to our dummy variable. you could use memcpy here...
        R dummy = 0;
        const uint8_t * data8 = reinterpret_cast<const uint8_t*>(dataR);
        for (uint64_t index = 0; index < remainingBytes; ++index)
        {
            reinterpret_cast<uint8_t*>(&dummy)[index] = data8[index];
        }
        // now add the dummy on top
        sum2 += sum1 += dummy;
        sum1 = (sum1 & std::numeric_limits<R>::max()) + (sum1 >> (sizeof(R)*8));
        sum2 = (sum2 & std::numeric_limits<R>::max()) + (sum2 >> (sizeof(R)*8));
    }
    // build final checksum
    return (sum2 << (sizeof(R)*8)) | sum1;
}

template <class T, class R, T overflowAfter>
void test_implementations()
{
    std::cout << "Testing " << sizeof(T)*8 << " bit implementations:" << std::endl;
    // test flechter overflow
    std::cout << "Overflow after: " << test_overflow(0, std::numeric_limits<R>::max(), std::numeric_limits<T>::max() - std::numeric_limits<R>::max()) << " rounds (start value 0)." << std::endl;
    // test fletcher checksum in both implementations with the same data
    const uint64_t dataSize = 1 * 1024 * 1024 * 1024; // 1 * 1024 * 1 MB = 1 GB of test data
    const uint64_t blockSize = std::min(std::min(dataSize, (uint64_t)10 * 1024 * 1024), (uint64_t)(std::numeric_limits<T>::max() - std::numeric_limits<T>::max() % 4));
    const T oddBlockSize = static_cast<T>(blockSize - 1);   
    const uint64_t nrOfBlocks = dataSize / blockSize;
    std::vector<uint32_t> data(blockSize / sizeof(uint32_t));
    // initialize random number generator using current time
    std::minstd_rand prng(std::chrono::high_resolution_clock::now().time_since_epoch().count());
    std::cout << "Testing checksums with " << std::dec << dataSize / (1024 * 1024) << " MB of data in " << blockSize / 1024 << " kB blocks..." << std::endl;
    T ca = 0;
    T cb = 0;   
    for (uint64_t block = 0; block < nrOfBlocks; block++)
    {
        // generate random numbers
        std::generate(data.begin(), data.end(), [&prng](){ return prng(); });
        // apply checksum function. make sure to use an odd value to test remaining bytes being captured
        ca = fletcherTA<T, R>(reinterpret_cast<const uint8_t*>(data.data()), oddBlockSize, ca);
        cb = fletcherTB<T, R, overflowAfter>(reinterpret_cast<const uint8_t*>(data.data()), oddBlockSize, cb);
    }
    std::cout << "Checksum A: 0x" << std::hex << ca << std::endl;
    std::cout << "Checksum B: 0x" << std::hex << cb << std::endl;
    // test speed
    const uint64_t runs = nrOfBlocks;
    std::cout << "Testing speed with " << std::dec << dataSize / (1024 * 1024) << " MB of data..." << std::endl;
    auto startA = std::chrono::high_resolution_clock::now();
    for (uint64_t run = 0; run < runs; run++)
    {
        ca = fletcherTA<T, R>(reinterpret_cast<const uint8_t*>(data.data()), oddBlockSize, ca);
    }
    auto endA = std::chrono::high_resolution_clock::now();
    auto startB = std::chrono::high_resolution_clock::now();
    for (uint64_t run = 0; run < runs; run++)
    {
        cb = fletcherTB<T, R, overflowAfter>(reinterpret_cast<const uint8_t*>(data.data()), oddBlockSize, cb);
    }
    auto endB = std::chrono::high_resolution_clock::now();
    std::cout << "Checksum A: 0x" << std::hex << ca << ", took " << std::dec << std::chrono::duration_cast<std::chrono::milliseconds>(endA-startA).count() << " ms" << std::endl;
    std::cout << "Checksum B: 0x" << std::hex << cb << ", took " << std::dec << std::chrono::duration_cast<std::chrono::milliseconds>(endB-startB).count() << " ms" << std::endl;
    std::cout << std::endl;
}

int main() {
    test_implementations<uint16_t, uint8_t, 20>();
    test_implementations<uint32_t, uint16_t, 359>();
    test_implementations<uint64_t, uint32_t, 683442530>();
    return 0;
}

orlp · Answer 3 · 01 марта 2012

Да, похоже, что это правильно, но не забудьте проверить это.

Это правильный способ вычисления контрольной суммы fletcher64

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 3 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Это правильный способ вычисления контрольной суммы fletcher64

Пожалуйста, войдите или зарегистрируйтесь чтобы ответить на этот вопрос.

Ответы [ 3 ]

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Пожалуйста, войдите или зарегистрируйтесь что бы добавить комментарий.

Похожие темы