[−][src]Macro safe_arch::dot_product_m128
Performs a dot product of two m128
registers.
The output details are determined by a control mask:
- For each lane, you can multiply that lane from
$a
and$b
or you can take a default of 0.0 - This forms four temporary
f32
values which are summed to a singlef32
. - For each output lane, you can have the sum in that lane or 0.0.
let a = m128::from_array([1.0, 2.0, 3.0, 4.0]); let b = m128::from_array([5.0, 6.0, 7.0, 8.0]); // Bits 4 through 7 determine if we should mul lanes 0 through 3. let c = dot_product_m128!(a, b, 0b0000_1111).to_array(); assert_eq!(c, [0.0, 0.0, 0.0, 0.0]); // no mul let c = dot_product_m128!(a, b, 0b0001_1111).to_array(); assert_eq!(c, [5.0, 5.0, 5.0, 5.0]); // mul temp lane 0 (1 * 5) let c = dot_product_m128!(a, b, 0b0010_1111).to_array(); assert_eq!(c, [12.0, 12.0, 12.0, 12.0]); // mul temp lane 1 (2 * 6) let c = dot_product_m128!(a, b, 0b0100_1111).to_array(); assert_eq!(c, [21.0, 21.0, 21.0, 21.0]); // mul temp lane 2 (3 * 7) let c = dot_product_m128!(a, b, 0b1000_1111).to_array(); assert_eq!(c, [32.0, 32.0, 32.0, 32.0]); // mul temp lane 3 (4 * 8) let c = dot_product_m128!(a, b, 0b1111_1111).to_array(); assert_eq!(c, [70.0, 70.0, 70.0, 70.0]); // mul all lanes (and summed in the next step) // After here we have four temp lanes, which get added to form `sum`. // Bits 0 through 3 determines if the `sum` is in lanes 0 through 3. let c = dot_product_m128!(a, b, 0b1111_0000).to_array(); assert_eq!(c, [0.0, 0.0, 0.0, 0.0]); // never use sum let c = dot_product_m128!(a, b, 0b1111_0001).to_array(); assert_eq!(c, [70.0, 0.0, 0.0, 0.0]); // sum in output lane 0 let c = dot_product_m128!(a, b, 0b1111_0010).to_array(); assert_eq!(c, [0.0, 70.0, 0.0, 0.0]); // sum in output lane 1 let c = dot_product_m128!(a, b, 0b1111_0100).to_array(); assert_eq!(c, [0.0, 0.0, 70.0, 0.0]); // sum in output lane 2 let c = dot_product_m128!(a, b, 0b1111_1000).to_array(); assert_eq!(c, [0.0, 0.0, 0.0, 70.0]); // sum in output lane 3 let c = dot_product_m128!(a, b, 0b1111_1111).to_array(); assert_eq!(c, [70.0, 70.0, 70.0, 70.0]); // sum in all output lanes