попробуйте это:
// some bit operations stuff
const unsigned char de_brujin_bit_map_64 [] =
{
0,1,2,7,3,13,8,19,4,25,14,28,9,34,20,40,5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
63,6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
};
inline uint8_t trailing_zero_count(uint64_t x) { return x?de_brujin_bit_map_64[(lower_bit(x)*0x0218A392CD3D5DBFL) >> 58]:64; }
inline uint8_t leading_zero_count(uint64_t x) { return x?(63-de_brujin_bit_map_64[(upper_bit(x)*0x0218A392CD3D5DBFL) >> 58]):64; }
inline uint64_t lower_bit(uint64_t x) { return x & -(int64_t&)x; }
inline uint64_t upper_bit(uint64_t x)
{
if(!x) return 0;
x |= x >> 1; x |= x >> 2; x |= x >> 4; x |= x >> 8; x |= x >> 16; x |= x >> 32;
return (x >> 1) + 1;
}
inline uint128_t upper_bit(const uint128_t x)
{
if(x.upper()) return uint128_t(upper_bit(x.upper()), 0);
else return uint128_t(0, upper_bit(x.lower()));
}
inline uint128_t lower_bit(const uint128_t x)
{
if(x.lower()) return uint128_t(0, lower_bit(x.lower()));
else return uint128_t(lower_bit(x.upper()), 0);
}
inline uint8_t trailing_zero_count(const uint128_t& x) { return x.lower()?trailing_zero_count(x.lower()):(64+trailing_zero_count(x.upper())); }
inline uint8_t leading_zero_count(const uint128_t& x) { return x.upper()?leading_zero_count(x.upper()):(64+leading_zero_count(x.lower())); }
// division operator
uint128_t uint128_t::operator/(const uint128_t& rhs) const
{
if(rhs == 0) return uint128_t(0); // !!!! zero division
if(rhs == rhs) return uint128_t(1);
if(rhs > *this) return uint128_t(0);
if(rhs == 1) return *this;
if(!upper_ && !rhs.upper_) return uint128_t(0, lower_/rhs.lower_);
if(lower_bit(rhs) == rhs) return *this >> trailing_zero_count(rhs);
uint128_t result;
uint128_t bit_mask = upper_bit();
uint128_t denom = 1;
do
{
bit_mask >>= 1;
denom <<= 1;
if(*this & bit_mask) denom |= 1;
result <<= 1;
if(denom >= rhs) { denom -= rhs; result |= 1; }
}
while (bit_mask.lower_ != 1);
return result;
}