#![feature(cfg_target_feature)]
extern crate simd;
use simd::f32x4;
#[cfg(target_feature = "avx")]
use simd::x86::avx::{f32x8, LowHigh128};
#[inline(never)]
pub fn dot(x: &[f32], y: &[f32]) -> f32 {
assert_eq!(x.len(), y.len());
let len = std::cmp::min(x.len(), y.len());
let mut sum = f32x4::splat(0.0);
let mut i = 0;
while i < len & !3 {
let x = f32x4::load(x, i);
let y = f32x4::load(y, i);
sum = sum + x * y;
i += 4
}
sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3)
}
#[cfg(target_feature = "avx")]
#[inline(never)]
pub fn dot8(x: &[f32], y: &[f32]) -> f32 {
assert_eq!(x.len(), y.len());
let len = std::cmp::min(x.len(), y.len());
let mut sum = f32x8::splat(0.0);
let mut i = 0;
while i < len & !7 {
let x = f32x8::load(x, i);
let y = f32x8::load(y, i);
sum = sum + x * y;
i += 8
}
let sum = sum.low() + sum.high();
sum.extract(0) + sum.extract(1) + sum.extract(2) + sum.extract(3)
}
#[cfg(not(target_feature = "avx"))]
pub fn dot8(_: &[f32], _: &[f32]) -> f32 {
unimplemented!()
}
fn main() {
println!("{}", dot(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]));
println!("{}", dot(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0],
&[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]));
if cfg!(target_feature = "avx") {
println!("{}", dot8(&[1.0, 3.0, 5.0, 7.0], &[2.0, 4.0, 6.0, 8.0]));
println!("{}", dot8(&[1.0, 3.0, 6.0, 7.0, 10.0, 6.0, 3.0, 2.0],
&[2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0]));
}
}