ruvector_sparse_inference/backend/
mod.rs

1//! Backend abstraction for hardware-specific optimizations
2
3use crate::config::ActivationType;
4use ndarray::Array2;
5
6pub mod cpu;
7pub mod wasm;
8
9#[cfg(feature = "npu")]
10pub mod npu;
11
12/// Backend trait for SIMD/vectorized operations
13pub trait Backend: Send + Sync {
14    /// Dot product of two vectors
15    fn dot_product(&self, a: &[f32], b: &[f32]) -> f32;
16
17    /// Sparse matrix-vector multiplication
18    /// Only computes rows specified in `rows`
19    fn sparse_matmul(&self, matrix: &Array2<f32>, input: &[f32], rows: &[usize]) -> Vec<f32>;
20
21    /// Sparse matrix-vector multiplication with column-major accumulation
22    fn sparse_matmul_accumulate(
23        &self,
24        matrix: &Array2<f32>,
25        input: &[f32],
26        cols: &[usize],
27        output: &mut [f32],
28    );
29
30    /// Apply activation function in-place
31    fn activation(&self, data: &mut [f32], activation_type: ActivationType);
32
33    /// Vectorized addition
34    fn add(&self, a: &mut [f32], b: &[f32]);
35
36    /// Vectorized multiply-add: a[i] += b[i] * scalar
37    fn axpy(&self, a: &mut [f32], b: &[f32], scalar: f32);
38
39    /// Backend name for debugging
40    fn name(&self) -> &'static str;
41
42    /// SIMD width (number of f32s per vector register)
43    fn simd_width(&self) -> usize;
44}
45
46/// Get the best available backend for the current platform
47pub fn get_backend() -> Box<dyn Backend> {
48    #[cfg(target_arch = "wasm32")]
49    return Box::new(wasm::WasmBackend);
50
51    #[cfg(not(target_arch = "wasm32"))]
52    {
53        #[cfg(feature = "npu")]
54        if npu::is_available() {
55            return Box::new(npu::NpuBackend::new());
56        }
57
58        Box::new(cpu::CpuBackend)
59    }
60}