Skip to main content

do_memory_core/embeddings/
similarity.rs

1//! Vector similarity calculations and search utilities
2
3use serde::{Deserialize, Serialize};
4
5/// Result from similarity search containing the item and similarity score
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct SimilaritySearchResult<T> {
8    /// The found item (episode or pattern)
9    pub item: T,
10    /// Similarity score (0.0 to 1.0, higher = more similar)
11    pub similarity: f32,
12    /// Additional metadata about the match
13    pub metadata: SimilarityMetadata,
14}
15
16/// Metadata about a similarity match
17#[derive(Debug, Clone, Serialize, Deserialize, Default)]
18pub struct SimilarityMetadata {
19    /// Which embedding was used for the match
20    #[serde(default)]
21    pub embedding_model: String,
22    /// Timestamp of when the embedding was generated
23    pub embedding_timestamp: Option<chrono::DateTime<chrono::Utc>>,
24    /// Additional context about the match
25    #[serde(default)]
26    pub context: serde_json::Value,
27}
28
29/// Calculate cosine similarity between two vectors
30///
31/// Cosine similarity measures the cosine of the angle between two vectors,
32/// giving a similarity score between -1 and 1 (normalized to 0-1 for convenience).
33/// Higher scores indicate greater similarity.
34#[must_use]
35pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
36    if a.len() != b.len() {
37        return 0.0;
38    }
39
40    if a.is_empty() {
41        return 0.0;
42    }
43
44    let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
45    let magnitude_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
46    let magnitude_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
47
48    if magnitude_a == 0.0 || magnitude_b == 0.0 {
49        return 0.0;
50    }
51
52    // Normalize from [-1, 1] to [0, 1] range
53    let similarity = dot_product / (magnitude_a * magnitude_b);
54    (similarity + 1.0) / 2.0
55}
56
57#[cfg(test)]
58mod tests {
59    use super::*;
60
61    #[test]
62    fn test_cosine_similarity() {
63        let vec1 = vec![1.0, 2.0, 3.0];
64        let vec2 = vec![1.0, 2.0, 3.0];
65        let similarity = cosine_similarity(&vec1, &vec2);
66        assert!((similarity - 1.0).abs() < 0.001);
67
68        let vec3 = vec![1.0, 0.0];
69        let vec4 = vec![0.0, 1.0];
70        let similarity = cosine_similarity(&vec3, &vec4);
71        assert!((similarity - 0.5).abs() < 0.001);
72
73        let vec5 = vec![1.0, 2.0, 3.0];
74        let vec6 = vec![-1.0, -2.0, -3.0];
75        let similarity = cosine_similarity(&vec5, &vec6);
76        assert!((similarity - 0.0).abs() < 0.001);
77
78        let vec7 = vec![1.0, 2.0];
79        let vec8 = vec![1.0, 2.0, 3.0];
80        let similarity = cosine_similarity(&vec7, &vec8);
81        assert_eq!(similarity, 0.0);
82    }
83}