1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
extern crate lazy_static; extern crate libc; extern crate log; #[macro_use] extern crate objekt; extern crate num_traits; #[cfg(test)] extern crate proptest; pub mod align; pub mod f16; pub mod frame; mod generic; #[cfg(target_arch = "x86_64")] pub mod x86_64_fma; #[cfg(target_arch = "aarch64")] pub mod arm64; #[cfg(any(target_arch = "arm", target_arch = "armv7"))] pub mod arm32; pub use self::frame::*; pub struct Ops { pub svmm: Box<Fn(usize, usize) -> Box<VecMatMul<f32>> + Send + Sync>, pub smm: Box<Fn(usize, usize, usize) -> Box<MatMul<f32>> + Send + Sync>, pub dmm: Box<Fn(usize, usize, usize) -> Box<MatMul<f64>> + Send + Sync>, pub sconv: Box<Fn(usize, Vec<isize>, Vec<isize>) -> Box<Conv<f32>> + Send + Sync>, } pub fn generic() -> Ops { Ops { svmm: Box::new(|k, n| Box::new(PackedVecMatMul::<generic::SVecMatMul8, f32>::new(k, n))), smm: Box::new(|m, k, n| Box::new(PackedMatMul::<generic::SMatMul4x4, f32>::new(m, k, n))), dmm: Box::new(|m, k, n| Box::new(PackedMatMul::<generic::DMatMul4x2, f64>::new(m, k, n))), sconv: Box::new(|co, kernel_offsets, data_offsets| { Box::new(PackedConv::<generic::SConv4x4, f32>::new(co, kernel_offsets, data_offsets)) }), } } #[allow(unreachable_code, unused_mut)] pub fn best() -> Ops { let mut ops = generic(); #[cfg(target_arch = "x86_64")] { if is_x86_feature_detected!("fma") { log::info!("x86_64/fma activated for smm and sconv"); ops.smm = Box::new(|m, k, n| { Box::new(PackedMatMul::<x86_64_fma::matmul::KerFma16x6, f32>::new(m, k, n)) }); ops.sconv = Box::new(|co, kernel_offsets, data_offsets| { Box::new(PackedConv::<x86_64_fma::conv::SConvFma16x6, f32>::new( co, kernel_offsets, data_offsets, )) }); } } #[cfg(any(target_arch = "arm", target_arch = "armv7"))] arm32::plug(&mut ops); #[cfg(target_arch = "aarch64")] arm64::plug(&mut ops); return ops; } lazy_static::lazy_static! { static ref OPS: Ops = { best() }; } pub fn ops() -> &'static Ops { &*OPS }