[−][src]Function safe_arch::mul_u8i8_add_horizontal_saturating_m256i

#[must_use]pub fn mul_u8i8_add_horizontal_saturating_m256i(a: m256i, b: m256i) -> m256i

This is supported with target feature avx2 only.

This is dumb and weird.

Vertically multiplies each u8 lane from a with an i8 lane from b, producing an i16 intermediate value.
These intermediate i16 values are horizontally added with saturation.

let a = m256i::from([
  255_u8, 255, 0, 0, 255, 255, 1, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
  18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
]);
let b = m256i::from([
  127_i8, 127, 0, 0, -127, -127, 1, 1, 24, 25, 26, 27, 28, 29, 30, 31, 16,
  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
]);
let c: [i16; 16] = mul_u8i8_add_horizontal_saturating_m256i(a, b).into();
assert_eq!(
  c,
  [i16::MAX, 0, i16::MIN, 2, 417, 557, 713, 885,
  545, 685, 841, 1013, 1201, 1405, 1625, 1861]
);

Intrinsic: _mm256_maddubs_epi16
Assembly: vpmaddubsw ymm, ymm, ymm