pub fn pack_block(values: &[u32; 128], bit_width: u8, output: &mut Vec<u8>)
Pack a block of 128 u32 values using the specified bit width