Skip to main content

memory_core/autonomous/
consolidation.rs

1use std::collections::HashSet;
2
3/// Jaccard similarity between two text values based on whitespace-split terms.
4pub fn term_similarity(a: &str, b: &str) -> f64 {
5    let terms_a: HashSet<String> = a.split_whitespace().map(|w| w.to_lowercase()).collect();
6    let terms_b: HashSet<String> = b.split_whitespace().map(|w| w.to_lowercase()).collect();
7
8    let intersection = terms_a.intersection(&terms_b).count() as f64;
9    let union = terms_a.union(&terms_b).count() as f64;
10
11    if union == 0.0 {
12        0.0
13    } else {
14        intersection / union
15    }
16}
17
18/// Merge values from multiple memory texts, keeping all unique lines.
19pub fn merge_values(values: &[&str]) -> String {
20    let mut seen: HashSet<String> = HashSet::new();
21    let mut merged_lines: Vec<String> = Vec::new();
22
23    for value in values {
24        for line in value.lines() {
25            let normalized = line.trim().to_lowercase();
26            if !normalized.is_empty() && seen.insert(normalized) {
27                merged_lines.push(line.to_string());
28            }
29        }
30    }
31
32    merged_lines.join("\n")
33}
34
35/// A group of memory IDs that are candidates for consolidation.
36#[derive(Debug, Clone)]
37pub struct ConsolidationGroup {
38    pub memory_ids: Vec<i64>,
39    pub similarity: f64,
40    pub key: String,
41    pub scope: String,
42}
43
44/// Find pairs of memories in the given list that have high term overlap
45/// and share the same key AND scope. Returns consolidation groups.
46pub fn find_candidates(
47    memories: &[(i64, String, String, String)], // (id, key, value, scope)
48    threshold: f64,
49) -> Vec<ConsolidationGroup> {
50    let mut groups: Vec<ConsolidationGroup> = Vec::new();
51
52    for i in 0..memories.len() {
53        for j in (i + 1)..memories.len() {
54            let (id_a, key_a, val_a, scope_a) = &memories[i];
55            let (id_b, key_b, val_b, scope_b) = &memories[j];
56
57            if key_a != key_b || scope_a != scope_b {
58                continue;
59            }
60
61            let sim = term_similarity(val_a, val_b);
62            if sim >= threshold {
63                // Check if either ID is already in a group for this key
64                let existing = groups.iter_mut().find(|g| {
65                    g.key == *key_a
66                        && (g.memory_ids.contains(id_a) || g.memory_ids.contains(id_b))
67                });
68
69                match existing {
70                    Some(group) => {
71                        if !group.memory_ids.contains(id_a) {
72                            group.memory_ids.push(*id_a);
73                        }
74                        if !group.memory_ids.contains(id_b) {
75                            group.memory_ids.push(*id_b);
76                        }
77                        if sim < group.similarity {
78                            group.similarity = sim;
79                        }
80                    }
81                    None => {
82                        groups.push(ConsolidationGroup {
83                            memory_ids: vec![*id_a, *id_b],
84                            similarity: sim,
85                            key: key_a.clone(),
86                            scope: scope_a.clone(),
87                        });
88                    }
89                }
90            }
91        }
92    }
93
94    groups
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100
101    #[test]
102    fn identical_texts_have_similarity_one() {
103        assert_eq!(term_similarity("hello world", "hello world"), 1.0);
104    }
105
106    #[test]
107    fn disjoint_texts_have_similarity_zero() {
108        assert_eq!(term_similarity("alpha beta", "gamma delta"), 0.0);
109    }
110
111    #[test]
112    fn partial_overlap() {
113        let sim = term_similarity("the quick brown fox", "the quick red fox");
114        assert!(sim > 0.5 && sim < 1.0);
115    }
116
117    #[test]
118    fn empty_texts() {
119        assert_eq!(term_similarity("", ""), 0.0);
120        assert_eq!(term_similarity("hello", ""), 0.0);
121    }
122
123    #[test]
124    fn merge_deduplicates_lines() {
125        let a = "line one\nline two\nline three";
126        let b = "line two\nline four";
127        let merged = merge_values(&[a, b]);
128        assert_eq!(merged, "line one\nline two\nline three\nline four");
129    }
130
131    #[test]
132    fn merge_preserves_original_case() {
133        let a = "Use BUN for tests";
134        let b = "use bun for tests\nAlso run clippy";
135        let merged = merge_values(&[a, b]);
136        // First occurrence wins (case-insensitive dedup)
137        assert!(merged.contains("Use BUN for tests"));
138        assert!(merged.contains("Also run clippy"));
139        assert!(!merged.contains("use bun for tests"));
140    }
141
142    #[test]
143    fn find_candidates_groups_same_key_and_scope() {
144        let memories = vec![
145            (1, "commands/test".to_string(), "run bun test".to_string(), "/proj".to_string()),
146            (2, "commands/test".to_string(), "run bun test --watch".to_string(), "/proj".to_string()),
147            (3, "commands/build".to_string(), "cargo build --release".to_string(), "/proj".to_string()),
148        ];
149        let groups = find_candidates(&memories, 0.5);
150        assert_eq!(groups.len(), 1);
151        assert!(groups[0].memory_ids.contains(&1));
152        assert!(groups[0].memory_ids.contains(&2));
153        assert_eq!(groups[0].scope, "/proj");
154    }
155
156    #[test]
157    fn find_candidates_no_cross_key() {
158        let memories = vec![
159            (1, "key_a".to_string(), "same content here".to_string(), "/".to_string()),
160            (2, "key_b".to_string(), "same content here".to_string(), "/".to_string()),
161        ];
162        let groups = find_candidates(&memories, 0.5);
163        assert!(groups.is_empty());
164    }
165
166    #[test]
167    fn find_candidates_no_cross_scope() {
168        let memories = vec![
169            (1, "k".to_string(), "same content here".to_string(), "/project/a".to_string()),
170            (2, "k".to_string(), "same content here".to_string(), "/project/b".to_string()),
171        ];
172        let groups = find_candidates(&memories, 0.5);
173        assert!(groups.is_empty());
174    }
175
176    #[test]
177    fn find_candidates_below_threshold() {
178        let memories = vec![
179            (1, "k".to_string(), "alpha beta gamma delta".to_string(), "/".to_string()),
180            (2, "k".to_string(), "epsilon zeta eta theta".to_string(), "/".to_string()),
181        ];
182        let groups = find_candidates(&memories, 0.5);
183        assert!(groups.is_empty());
184    }
185}