kitt_score 0.1.0

//! Linear SIMD kernels for dot and cosine similarity on `&[f32]`.
//!
//! Uses `pulp::Arch::new()` to dispatch to the best SIMD ISA available at
//! runtime (`AVX2`/`AVX-512` on `x86_64`, `NEON` on `aarch64`). The cost of the dispatch
//! itself is amortized across many scoring calls in a trigger batch — in the
//! worst case (1 call per dispatch), it is on the order of a few nanoseconds.
//!
//! References:
//!   - Faer documentation on runtime SIMD dispatch (pulp crate).
//!   - Agner Fog, "The microarchitecture of Intel, AMD and VIA CPUs" (2023),
//!     on the throughput of `vfmadd` instructions used for dot-product reduction.

/// Dot product of two equal-length `f32` slices.
///
/// # Panics
///
/// Panics if the input slices have different lengths.
#[must_use]
pub fn dot(a: &[f32], b: &[f32]) -> f32 {
    assert_eq!(a.len(), b.len(), "dot: length mismatch");
    let arch = pulp::Arch::new();
    arch.dispatch(|| {
        // Scalar fallback; pulp compiles this with the best ISA for the current CPU.
        a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
    })
}

/// Cosine similarity of two equal-length `f32` slices. Returns 0.0 if either vector has zero magnitude.
///
/// # Panics
///
/// Panics if the input slices have different lengths.
#[must_use]
#[allow(clippy::float_cmp)]
pub fn cosine(a: &[f32], b: &[f32]) -> f32 {
    assert_eq!(a.len(), b.len(), "cosine: length mismatch");
    let arch = pulp::Arch::new();
    arch.dispatch(|| {
        let (d, na, nb) = a
            .iter()
            .zip(b.iter())
            .fold((0.0f32, 0.0f32, 0.0f32), |(d, na, nb), (&x, &y)| {
                (x.mul_add(y, d), x.mul_add(x, na), y.mul_add(y, nb))
            });
        // explicit zero-magnitude sentinel; no tolerance semantics
        if na == 0.0 || nb == 0.0 {
            0.0
        } else {
            d / (na.sqrt() * nb.sqrt())
        }
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    #[allow(clippy::float_cmp)]
    fn dot_basic() {
        assert_eq!(dot(&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]), 32.0);
    }

    #[test]
    fn cosine_orthogonal_is_zero() {
        assert!((cosine(&[1.0, 0.0], &[0.0, 1.0])).abs() < 1e-6);
    }

    #[test]
    fn cosine_parallel_is_one() {
        let c = cosine(&[1.0, 2.0, 3.0], &[2.0, 4.0, 6.0]);
        assert!((c - 1.0).abs() < 1e-6, "got {c}");
    }

    #[test]
    #[allow(clippy::float_cmp)]
    fn cosine_zero_vector_returns_zero() {
        assert_eq!(cosine(&[0.0, 0.0, 0.0], &[1.0, 2.0, 3.0]), 0.0);
    }
}