pub const SIMD_ALIGNMENT: usize = 32;
SIMD alignment for vectors (AVX2 = 32 bytes, AVX-512 = 64 bytes)