ruve-db 0.1.1

A hybrid vector and full-text search database with HNSW approximate nearest-neighbour indexing and BM25
Documentation

// cosine similarity is a measure of similarity between two non-zero vectors in an inner product space. 
// the result ranges from -1 to 1. A value of 1 indicates that the vectors are identical, while a value of -1 indicates that they are opposite. 
// A value of 0 indicates that the vectors are orthogonal (i.e., they are at a right angle to each other).
#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn identical_vectors_return_one() {
        let v = vec![1.0_f32, 2.0, 3.0];
        assert!((cosine_similarity(&v, &v) - 1.0).abs() < 1e-6);
    }

    #[test]
    fn orthogonal_vectors_return_zero() {
        let a = vec![1.0_f32, 0.0];
        let b = vec![0.0_f32, 1.0];
        assert!(cosine_similarity(&a, &b).abs() < 1e-6);
    }

    #[test]
    fn opposite_vectors_return_minus_one() {
        let a = vec![1.0_f32, 0.0];
        let b = vec![-1.0_f32, 0.0];
        assert!((cosine_similarity(&a, &b) + 1.0).abs() < 1e-6);
    }

    #[test]
    fn zero_vector_returns_zero() {
        let a = vec![1.0_f32, 2.0];
        let z = vec![0.0_f32, 0.0];
        assert_eq!(cosine_similarity(&a, &z), 0.0);
        assert_eq!(cosine_similarity(&z, &a), 0.0);
    }

    #[test]
    fn scaled_vector_is_still_one() {
        let a = vec![1.0_f32, 2.0, 3.0];
        let b = vec![10.0_f32, 20.0, 30.0];
        assert!((cosine_similarity(&a, &b) - 1.0).abs() < 1e-6);
    }
}

pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
    // calculate the dot product of the two vectors
    // (a1 * b1) + (a2 * b2) + ... + (an * bn)
    let dot = a.
        iter().
        zip(b). // create the pairs of corresponding elements from both vectors
        map(|(x, y)| x * y). // multiply each pair of elements
        sum::<f32>(); // sum up all the products to get the dot product
    
    // calculate the magnitude (norm) of each vector
    // we need this to normalize the dot product and get the cosine similarity
    // because two vectors can be marked as similar if they point in the same direction, even if they have different magnitudes
    let norm_a = a.iter().map(|x| x * x).sum::<f32>().sqrt();
    let norm_b = b.iter().map(|x| x * x).sum::<f32>().sqrt();

    if norm_a == 0.0 || norm_b == 0.0 {
        return 0.0;
    }

    dot / (norm_a * norm_b)
}