Я придумал следующее, это тестируется здесь: https://wandbox.org/permlink/g3FDe88vKgdPsGLC Он должен работать на любом процессоре, поскольку он использует только байтовый доступ (хотя и не тестировался на платформе с прямым порядком байтов).
#include <stdint.h>
#include <stdbool.h>
/**
* shift a buffer left with byte granularity (little endian)
*
* @param dst destination buffer, can be equal to src
* @param src source buffer
* @param byte_size length in bytes of src/dst
* @param byte_shift shift amount in bytes
* @param fill8 fill value for the LSBs
*
* byte_shift is allowed to be larger than byte_size, it behaves like they are equal
*/
static void bufshl(void*const dst,const void*const src,size_t byte_size, size_t byte_shift, uint8_t fill8){
if(0==byte_size) return;
if(byte_shift>byte_size) byte_shift=byte_size;
uint8_t*const dst8=(uint8_t*)dst;
const uint8_t*const src8=(const uint8_t*const)src;
for(size_t i=byte_size-1;i!=byte_shift-1;i--){dst8[i] = src8[i-byte_shift];}
for(size_t i=0;i<byte_shift;i++){dst8[i] = fill8;}
}
/**
* shift a buffer left with bit granularity (little endian)
*
* @param dst destination buffer, can be equal to src
* @param src source buffer
* @param size length in bits of src/dst
* @param shift shift amount in bits
* @param fill fill value for the LSBs
*
* shift is allowed to be larger than size, it behaves like they are equal
*/
static void bufshlbits(void*const dst,const void*const src,size_t size, size_t shift, bool fill){
if(0==size) return;
const uint8_t fill8 = fill ? 0xFF : 0x00;
if(shift>size) shift=size;
uint8_t*const dst8=(uint8_t*)dst;
const uint8_t*const src8=(const uint8_t*const)src;
const size_t byte_size = (size+7)/8;
const unsigned int byte_shift=shift%8;
const unsigned int cshift = (8-byte_shift)%8;
const uint8_t last = src8[byte_size-1];
const size_t lsb = shift/8;
if(0==(shift%8)){
bufshl(dst,src,byte_size,lsb,fill8);
} else {
uint8_t carry=src8[byte_size-1-lsb];
for(size_t i=byte_size-1;i!=lsb-1;i--){
const size_t sidx = i-1-lsb;
const uint8_t s = sidx>byte_size ? fill8 : src8[sidx];
const uint8_t d = (carry<<byte_shift)|(s >> cshift);
carry = src8[sidx];
dst8[i] = d;
}
}
for(size_t i=0;i<lsb;i++){dst8[i]=fill8;}
if(size%8){
const uint8_t last_mask = 0xFF<<(size % 8);
dst8[byte_size-1] &= ~last_mask;
dst8[byte_size-1] |= last & last_mask;
}
}
/**
* shift a buffer right with byte granularity (little endian)
*
* @param dst destination buffer, can be equal to src
* @param src source buffer
* @param byte_size length in bytes of src/dst
* @param byte_shift shift amount in bytes
* @param fill8 fill value for the MSBs
*
* byte_shift is allowed to be larger than byte_size, it behaves like they are equal
*/
static void bufshr(void*const dst,const void*const src,size_t byte_size, size_t byte_shift, uint8_t fill8){
if(0==byte_size) return;
if(byte_shift>byte_size) byte_shift=byte_size;
uint8_t*const dst8=(uint8_t*)dst;
const uint8_t*const src8=(const uint8_t*const)src;
const size_t last=byte_size-byte_shift;
for(size_t i=0;i!=last;i++){dst8[i] = src8[i+byte_shift];}
for(size_t i=last;i<byte_shift;i++){dst8[i] = fill8;}
}
/**
* shift a buffer right with bit granularity (little endian)
*
* @param dst destination buffer, can be equal to src
* @param src source buffer
* @param size length in bits of src/dst
* @param shift shift amount in bits
* @param fill fill value for the MSBs
*
* shift is allowed to be larger than size, it behaves like they are equal
*/
static void bufshrbits(void*const dst,const void*const src,size_t size, size_t shift, bool fill){
if(0==size) return;
const uint8_t fill8 = fill ? 0xFF : 0x00;
if(shift>size) shift=size;
uint8_t*const dst8=(uint8_t*)dst;
const uint8_t*const src8=(const uint8_t*const)src;
const size_t byte_size = (size+7)/8;
const unsigned int bshift=shift%8;
const unsigned int cshift = bshift ? (8-bshift)%8 : 8;
const uint8_t last = src8[byte_size-1];
const size_t lsb = shift/8;
if((0==(shift%8)) && (0==(size%8))) {
bufshr(dst,src,byte_size,lsb,fill8);
} else {
const uint8_t last_mask = size%8 ? 0xFF<<(size % 8) : 0;
uint8_t carry = lsb+1 >=byte_size ? fill8 : src8[lsb+1];
if(lsb+1 == byte_size-1) {
carry &= ~last_mask;
carry |= last_mask & fill8;
}
if(byte_size>lsb){
for(size_t i=0;i<byte_size-lsb-1;i++){
const size_t sidx = i+lsb;
uint8_t s;
if(sidx>=byte_size-1){
s=(src8[sidx] & ~last_mask) | (last_mask & fill8);
}else{
s=src8[sidx];
}
const uint8_t d = (carry<<cshift)|(s >> bshift);
carry = sidx+2 >=byte_size? fill8 : src8[sidx+2];
if(sidx+2 == byte_size-1) {
carry &= ~last_mask;
carry |= last_mask & fill8;
}
dst8[i] = d;
}
}
const size_t sidx = byte_size-lsb-1+lsb;
carry &= ~last_mask;
carry |= last_mask & fill8;
uint8_t s;
if(sidx>=byte_size-1){
s=(src8[sidx] & ~last_mask) | (last_mask & fill8);
}else{
s=src8[sidx];
}
const uint8_t d = (carry<<cshift)|(s >> bshift);
dst8[byte_size-lsb-1] = d;
}
for(size_t i=byte_size-lsb;i<byte_size;i++){dst8[i]=fill8;}
if(size%8){
const uint8_t last_mask = 0xFF<<(size % 8);
dst8[byte_size-1] &= ~last_mask;
dst8[byte_size-1] |= last & last_mask;
}
}