Это неправильно.Вам будет не хватать 0-3 байта конечных данных, потому что вы только суммируете DWORD-ы в данных, но у вас могут остаться некоторые байты, в зависимости от размера данных.Я бы предложил эти реализации :
#include <iostream>
#include <inttypes.h>
uint64_t fletcher64_A(const uint8_t * data, uint64_t count)
{
// calculate how many full double words the input has
uint64_t dwords = count / 4;
// now calculate the flechter-64 checksum from double words
uint64_t sum1 = 0;
uint64_t sum2 = 0;
const uint32_t * data32 = reinterpret_cast<const uint32_t*>(data);
for (uint64_t index = 0; index < dwords; ++index)
{
sum1 = (sum1 + data32[index]) % UINT32_MAX;
sum2 = (sum2 + sum1) % UINT32_MAX;
}
// calculate how many extra bytes, that do not fit into a double word, the input has
uint64_t remainingBytes = count - dwords * 4;
if (remainingBytes > 0)
{
// copy the excess bytes to our dummy variable. you could use memcpy here...
uint32_t dummy = 0;
for (uint64_t index = 0; index < remainingBytes; ++index)
{
reinterpret_cast<uint8_t*>(&dummy)[index] = data[dwords * 4 + index];
}
// now add the dummy on top
sum1 = (sum1 + dummy) % UINT32_MAX;
sum2 = (sum2 + sum1) % UINT32_MAX;
}
// build final checksum
return (sum2 << 32) | sum1;
}
uint64_t fletcher64_B(const uint8_t * data, uint64_t count)
{
// calculate how many full double words the input has
uint64_t dwords = count / 4;
// now calculate the flechter-64 checksum from double words
uint32_t sum1 = 0;
uint32_t sum2 = 0;
const uint32_t * data32 = reinterpret_cast<const uint32_t*>(data);
for (uint64_t index = 0; index < dwords; ++index)
{
sum1 += data32[index];
sum2 += sum1;
}
// calculate how many extra bytes, that do not fit into a double word, the input has
uint64_t remainingBytes = count - dwords * 4;
if (remainingBytes > 0)
{
// copy the excess bytes to our dummy variable. you could use memcpy here...
uint32_t dummy = 0;
for (uint64_t index = 0; index < remainingBytes; ++index)
{
reinterpret_cast<uint8_t*>(&dummy)[index] = data[dwords * 4 + index];
}
// now add the dummy on top
sum1 += dummy;
sum2 += sum1;
}
// build final checksum
return ((uint64_t)sum2 << 32) | sum1;
}
int main() {
const std::string data = "abcdefghijklmnopqrstuvwxyz0123456789helloworld!";
uint64_t ca = fletcher64_A(reinterpret_cast<const uint8_t*>(data.data()), data.size());
uint64_t cb = fletcher64_B(reinterpret_cast<const uint8_t*>(data.data()), data.size());
std::cout << "String size: " << data.size() << ", checksum a: 0x" << std::hex << ca << ", checksum b: 0x" << cb << std::endl;
return 0;
}
, которые учитывают завершающие байты.fletcher64_A является простой реализацией, а flechter64_B выполняет модуль по описанию Mecki.Вы можете видеть, что обе реализации выдают одинаковые значения (они не видят, см. Редактирование).
РЕДАКТИРОВАТЬ # 1: Это не совсем правильно, как прокомментировал Томас Темпельманн.Постараюсь исправить алгоритм в какой-то момент.Вы можете проверить, когда суммирование в алгоритме fletcher переполнится здесь .Это более или менее соответствует тому, что написано в википедии.
РЕДАКТИРОВАТЬ # 2: Я выполнил тест, используя шаблонизированную версию наивного и оптимизированного алгоритма.Скомпилируйте и запустите его, используя "g ++ -std = c ++ 11 -O2 -Wall -pedantic test.cpp && ./a.out".Вывод заключается в том, что 32-разрядная версия работает, но не 16- и 64-разрядные версии.Буду признателен, если вы найдете ошибку и комментарий.@Thomas Tempelmann: оптимизированная версия примерно в 6 раз быстрее на моем компьютере!
#include <iostream>
#include <inttypes.h>
#include <random>
#include <algorithm>
#include <chrono>
#include <limits>
uint64_t test_overflow(uint64_t start, uint64_t add, uint64_t check)
{
uint64_t count = 0;
uint64_t sum1 = start;
uint64_t sum2 = start;
do
{
sum2 += sum1 += add;
count++;
} while (sum1 + add < check && sum2 + (sum1 + add) < check);
return count;
}
template <class T, class R>
T fletcherTA(const uint8_t * data, const T & count, const T & start)
{
// calculate how many full R-words the input has
T rwords = count / sizeof(R);
// calculate how many extra bytes, that do not fit into an R-word, the input has
T remainingBytes = count - rwords * sizeof(R);
// now calculate the flechter-T checksum from R-words
T sum1 = start & std::numeric_limits<R>::max();
T sum2 = start >> (sizeof(R)*8);
const R * dataR = reinterpret_cast<const R*>(data);
while (rwords)
{
rwords--;
sum1 = (sum1 + *dataR++) % std::numeric_limits<R>::max();
sum2 = (sum2 + sum1) % std::numeric_limits<R>::max();
}
if (remainingBytes > 0)
{
// copy the excess bytes to our dummy variable. you could use memcpy here...
R dummy = 0;
const uint8_t * data8 = reinterpret_cast<const uint8_t*>(dataR);
for (uint64_t index = 0; index < remainingBytes; ++index)
{
reinterpret_cast<uint8_t*>(&dummy)[index] = data8[index];
}
// now add the dummy on top
sum1 = (sum1 + dummy) % std::numeric_limits<R>::max();
sum2 = (sum2 + sum1) % std::numeric_limits<R>::max();
}
// build final checksum
return (sum2 << sizeof(R)*8) | sum1;
}
template <class T, class R, T overflowAfter>
T fletcherTB(const uint8_t * data, const T & count, const T & start)
{
// calculate how many full R-words the input has
T rwords = count / sizeof(R);
// calculate how many extra bytes, that do not fit into an R-word, the input has
T remainingBytes = count - rwords * sizeof(R);
// now calculate the flechter-T checksum from R-words
T sum1 = start & std::numeric_limits<R>::max();
T sum2 = start >> (sizeof(R)*8);
const R * dataR = reinterpret_cast<const R*>(data);
while (rwords)
{
T tlen = ((rwords >= overflowAfter) ? overflowAfter : rwords);
rwords -= tlen;
do
{
sum2 += sum1 += *dataR++;
tlen--;
} while (tlen);
sum1 = (sum1 & std::numeric_limits<R>::max()) + (sum1 >> (sizeof(R)*8));
sum2 = (sum2 & std::numeric_limits<R>::max()) + (sum2 >> (sizeof(R)*8));
}
if (remainingBytes > 0)
{
// copy the excess bytes to our dummy variable. you could use memcpy here...
R dummy = 0;
const uint8_t * data8 = reinterpret_cast<const uint8_t*>(dataR);
for (uint64_t index = 0; index < remainingBytes; ++index)
{
reinterpret_cast<uint8_t*>(&dummy)[index] = data8[index];
}
// now add the dummy on top
sum2 += sum1 += dummy;
sum1 = (sum1 & std::numeric_limits<R>::max()) + (sum1 >> (sizeof(R)*8));
sum2 = (sum2 & std::numeric_limits<R>::max()) + (sum2 >> (sizeof(R)*8));
}
// build final checksum
return (sum2 << (sizeof(R)*8)) | sum1;
}
template <class T, class R, T overflowAfter>
void test_implementations()
{
std::cout << "Testing " << sizeof(T)*8 << " bit implementations:" << std::endl;
// test flechter overflow
std::cout << "Overflow after: " << test_overflow(0, std::numeric_limits<R>::max(), std::numeric_limits<T>::max() - std::numeric_limits<R>::max()) << " rounds (start value 0)." << std::endl;
// test fletcher checksum in both implementations with the same data
const uint64_t dataSize = 1 * 1024 * 1024 * 1024; // 1 * 1024 * 1 MB = 1 GB of test data
const uint64_t blockSize = std::min(std::min(dataSize, (uint64_t)10 * 1024 * 1024), (uint64_t)(std::numeric_limits<T>::max() - std::numeric_limits<T>::max() % 4));
const T oddBlockSize = static_cast<T>(blockSize - 1);
const uint64_t nrOfBlocks = dataSize / blockSize;
std::vector<uint32_t> data(blockSize / sizeof(uint32_t));
// initialize random number generator using current time
std::minstd_rand prng(std::chrono::high_resolution_clock::now().time_since_epoch().count());
std::cout << "Testing checksums with " << std::dec << dataSize / (1024 * 1024) << " MB of data in " << blockSize / 1024 << " kB blocks..." << std::endl;
T ca = 0;
T cb = 0;
for (uint64_t block = 0; block < nrOfBlocks; block++)
{
// generate random numbers
std::generate(data.begin(), data.end(), [&prng](){ return prng(); });
// apply checksum function. make sure to use an odd value to test remaining bytes being captured
ca = fletcherTA<T, R>(reinterpret_cast<const uint8_t*>(data.data()), oddBlockSize, ca);
cb = fletcherTB<T, R, overflowAfter>(reinterpret_cast<const uint8_t*>(data.data()), oddBlockSize, cb);
}
std::cout << "Checksum A: 0x" << std::hex << ca << std::endl;
std::cout << "Checksum B: 0x" << std::hex << cb << std::endl;
// test speed
const uint64_t runs = nrOfBlocks;
std::cout << "Testing speed with " << std::dec << dataSize / (1024 * 1024) << " MB of data..." << std::endl;
auto startA = std::chrono::high_resolution_clock::now();
for (uint64_t run = 0; run < runs; run++)
{
ca = fletcherTA<T, R>(reinterpret_cast<const uint8_t*>(data.data()), oddBlockSize, ca);
}
auto endA = std::chrono::high_resolution_clock::now();
auto startB = std::chrono::high_resolution_clock::now();
for (uint64_t run = 0; run < runs; run++)
{
cb = fletcherTB<T, R, overflowAfter>(reinterpret_cast<const uint8_t*>(data.data()), oddBlockSize, cb);
}
auto endB = std::chrono::high_resolution_clock::now();
std::cout << "Checksum A: 0x" << std::hex << ca << ", took " << std::dec << std::chrono::duration_cast<std::chrono::milliseconds>(endA-startA).count() << " ms" << std::endl;
std::cout << "Checksum B: 0x" << std::hex << cb << ", took " << std::dec << std::chrono::duration_cast<std::chrono::milliseconds>(endB-startB).count() << " ms" << std::endl;
std::cout << std::endl;
}
int main() {
test_implementations<uint16_t, uint8_t, 20>();
test_implementations<uint32_t, uint16_t, 359>();
test_implementations<uint64_t, uint32_t, 683442530>();
return 0;
}