[][src]Macro safe_arch::dot_product_m128

macro_rules! dot_product_m128 {
    ($a:expr, $b:expr, $imm:expr) => { ... };
}

Performs a dot product of two m128 registers.

The output details are determined by a control mask:

  • For each lane, you can multiply that lane from $a and $b or you can take a default of 0.0
  • This forms four temporary f32 values which are summed to a single f32.
  • For each output lane, you can have the sum in that lane or 0.0.
let a = m128::from_array([1.0, 2.0, 3.0, 4.0]);
let b = m128::from_array([5.0, 6.0, 7.0, 8.0]);

// Bits 4 through 7 determine if we should mul lanes 0 through 3.

let c = dot_product_m128!(a, b, 0b0000_1111).to_array();
assert_eq!(c, [0.0, 0.0, 0.0, 0.0]); // no mul
let c = dot_product_m128!(a, b, 0b0001_1111).to_array();
assert_eq!(c, [5.0, 5.0, 5.0, 5.0]); // mul temp lane 0 (1 * 5)
let c = dot_product_m128!(a, b, 0b0010_1111).to_array();
assert_eq!(c, [12.0, 12.0, 12.0, 12.0]); // mul temp lane 1 (2 * 6)
let c = dot_product_m128!(a, b, 0b0100_1111).to_array();
assert_eq!(c, [21.0, 21.0, 21.0, 21.0]); // mul temp lane 2 (3 * 7)
let c = dot_product_m128!(a, b, 0b1000_1111).to_array();
assert_eq!(c, [32.0, 32.0, 32.0, 32.0]); // mul temp lane 3 (4 * 8)
let c = dot_product_m128!(a, b, 0b1111_1111).to_array();
assert_eq!(c, [70.0, 70.0, 70.0, 70.0]); // mul all lanes (and summed in the next step)

// After here we have four temp lanes, which get added to form `sum`.

// Bits 0 through 3 determines if the `sum` is in lanes 0 through 3.

let c = dot_product_m128!(a, b, 0b1111_0000).to_array();
assert_eq!(c, [0.0, 0.0, 0.0, 0.0]); // never use sum

let c = dot_product_m128!(a, b, 0b1111_0001).to_array();
assert_eq!(c, [70.0, 0.0, 0.0, 0.0]); // sum in output lane 0

let c = dot_product_m128!(a, b, 0b1111_0010).to_array();
assert_eq!(c, [0.0, 70.0, 0.0, 0.0]); // sum in output lane 1

let c = dot_product_m128!(a, b, 0b1111_0100).to_array();
assert_eq!(c, [0.0, 0.0, 70.0, 0.0]); // sum in output lane 2

let c = dot_product_m128!(a, b, 0b1111_1000).to_array();
assert_eq!(c, [0.0, 0.0, 0.0, 70.0]); // sum in output lane 3

let c = dot_product_m128!(a, b, 0b1111_1111).to_array();
assert_eq!(c, [70.0, 70.0, 70.0, 70.0]); // sum in all output lanes