laurus 0.3.1

Unified search library for lexical, vector, and semantic retrieval
//! Core vector data structure.

use rayon::prelude::*;
use serde::{Deserialize, Serialize};

use crate::error::{LaurusError, Result};

/// A dense vector representation for similarity search.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Vector {
    /// The vector dimensions as floating point values.
    pub data: Vec<f32>,
}

impl Vector {
    /// Create a new vector with the given dimensions.
    pub fn new(data: Vec<f32>) -> Self {
        Self { data }
    }

    /// Get the dimensionality of this vector.
    pub fn dimension(&self) -> usize {
        self.data.len()
    }

    /// Calculate the L2 norm (magnitude) of this vector.
    pub fn norm(&self) -> f32 {
        self.data.iter().map(|x| x * x).sum::<f32>().sqrt()
    }

    /// Normalize this vector to unit length.
    pub fn normalize(&mut self) {
        let norm = self.norm();
        if norm > 0.0 {
            for value in &mut self.data {
                *value /= norm;
            }
        }
    }

    /// Get a normalized copy of this vector.
    pub fn normalized(&self) -> Self {
        let mut normalized = self.clone();
        normalized.normalize();
        normalized
    }

    /// Validate that this vector has the expected dimension.
    pub fn validate_dimension(&self, expected_dim: usize) -> Result<()> {
        if self.data.len() != expected_dim {
            return Err(LaurusError::InvalidOperation(format!(
                "Vector dimension mismatch: expected {}, got {}",
                expected_dim,
                self.data.len()
            )));
        }
        Ok(())
    }

    /// Check if this vector contains any NaN or infinite values.
    pub fn is_valid(&self) -> bool {
        self.data.iter().all(|x| x.is_finite())
    }

    /// Calculate the L2 norm using parallel processing for large vectors.
    pub fn norm_parallel(&self) -> f32 {
        if self.data.len() > 10000 {
            self.data.par_iter().map(|x| x * x).sum::<f32>().sqrt()
        } else {
            self.norm()
        }
    }

    /// Normalize this vector using parallel processing for large vectors.
    pub fn normalize_parallel(&mut self) {
        let norm = self.norm_parallel();
        if norm > 0.0 {
            if self.data.len() > 10000 {
                self.data.par_iter_mut().for_each(|value| *value /= norm);
            } else {
                for value in &mut self.data {
                    *value /= norm;
                }
            }
        }
    }

    /// Normalize multiple vectors in parallel.
    pub fn normalize_batch_parallel(vectors: &mut [Vector]) {
        if vectors.len() > 10 {
            vectors
                .par_iter_mut()
                .for_each(|vector| vector.normalize_parallel());
        } else {
            for vector in vectors {
                vector.normalize();
            }
        }
    }
}

/// Dense vector with weight, used for internal storage.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StoredVector {
    pub data: Vec<f32>,
    pub weight: f32,
}

impl StoredVector {
    pub fn new(data: Vec<f32>) -> Self {
        Self { data, weight: 1.0 }
    }

    pub fn with_weight(mut self, weight: f32) -> Self {
        self.weight = weight;
        self
    }

    pub fn dimension(&self) -> usize {
        self.data.len()
    }

    pub fn to_vector(&self) -> Vector {
        Vector {
            data: self.data.clone(),
        }
    }
}

impl From<Vector> for StoredVector {
    fn from(vector: Vector) -> Self {
        Self {
            data: vector.data,
            weight: 1.0,
        }
    }
}