memory_core/autonomous/
consolidation.rs1use std::collections::HashSet;
2
3pub fn term_similarity(a: &str, b: &str) -> f64 {
5 let terms_a: HashSet<String> = a.split_whitespace().map(|w| w.to_lowercase()).collect();
6 let terms_b: HashSet<String> = b.split_whitespace().map(|w| w.to_lowercase()).collect();
7
8 let intersection = terms_a.intersection(&terms_b).count() as f64;
9 let union = terms_a.union(&terms_b).count() as f64;
10
11 if union == 0.0 {
12 0.0
13 } else {
14 intersection / union
15 }
16}
17
18pub fn merge_values(values: &[&str]) -> String {
20 let mut seen: HashSet<String> = HashSet::new();
21 let mut merged_lines: Vec<String> = Vec::new();
22
23 for value in values {
24 for line in value.lines() {
25 let normalized = line.trim().to_lowercase();
26 if !normalized.is_empty() && seen.insert(normalized) {
27 merged_lines.push(line.to_string());
28 }
29 }
30 }
31
32 merged_lines.join("\n")
33}
34
35#[derive(Debug, Clone)]
37pub struct ConsolidationGroup {
38 pub memory_ids: Vec<i64>,
39 pub similarity: f64,
40 pub key: String,
41 pub scope: String,
42}
43
44pub fn find_candidates(
47 memories: &[(i64, String, String, String)], threshold: f64,
49) -> Vec<ConsolidationGroup> {
50 let mut groups: Vec<ConsolidationGroup> = Vec::new();
51
52 for i in 0..memories.len() {
53 for j in (i + 1)..memories.len() {
54 let (id_a, key_a, val_a, scope_a) = &memories[i];
55 let (id_b, key_b, val_b, scope_b) = &memories[j];
56
57 if key_a != key_b || scope_a != scope_b {
58 continue;
59 }
60
61 let sim = term_similarity(val_a, val_b);
62 if sim >= threshold {
63 let existing = groups.iter_mut().find(|g| {
65 g.key == *key_a
66 && (g.memory_ids.contains(id_a) || g.memory_ids.contains(id_b))
67 });
68
69 match existing {
70 Some(group) => {
71 if !group.memory_ids.contains(id_a) {
72 group.memory_ids.push(*id_a);
73 }
74 if !group.memory_ids.contains(id_b) {
75 group.memory_ids.push(*id_b);
76 }
77 if sim < group.similarity {
78 group.similarity = sim;
79 }
80 }
81 None => {
82 groups.push(ConsolidationGroup {
83 memory_ids: vec![*id_a, *id_b],
84 similarity: sim,
85 key: key_a.clone(),
86 scope: scope_a.clone(),
87 });
88 }
89 }
90 }
91 }
92 }
93
94 groups
95}
96
97#[cfg(test)]
98mod tests {
99 use super::*;
100
101 #[test]
102 fn identical_texts_have_similarity_one() {
103 assert_eq!(term_similarity("hello world", "hello world"), 1.0);
104 }
105
106 #[test]
107 fn disjoint_texts_have_similarity_zero() {
108 assert_eq!(term_similarity("alpha beta", "gamma delta"), 0.0);
109 }
110
111 #[test]
112 fn partial_overlap() {
113 let sim = term_similarity("the quick brown fox", "the quick red fox");
114 assert!(sim > 0.5 && sim < 1.0);
115 }
116
117 #[test]
118 fn empty_texts() {
119 assert_eq!(term_similarity("", ""), 0.0);
120 assert_eq!(term_similarity("hello", ""), 0.0);
121 }
122
123 #[test]
124 fn merge_deduplicates_lines() {
125 let a = "line one\nline two\nline three";
126 let b = "line two\nline four";
127 let merged = merge_values(&[a, b]);
128 assert_eq!(merged, "line one\nline two\nline three\nline four");
129 }
130
131 #[test]
132 fn merge_preserves_original_case() {
133 let a = "Use BUN for tests";
134 let b = "use bun for tests\nAlso run clippy";
135 let merged = merge_values(&[a, b]);
136 assert!(merged.contains("Use BUN for tests"));
138 assert!(merged.contains("Also run clippy"));
139 assert!(!merged.contains("use bun for tests"));
140 }
141
142 #[test]
143 fn find_candidates_groups_same_key_and_scope() {
144 let memories = vec![
145 (1, "commands/test".to_string(), "run bun test".to_string(), "/proj".to_string()),
146 (2, "commands/test".to_string(), "run bun test --watch".to_string(), "/proj".to_string()),
147 (3, "commands/build".to_string(), "cargo build --release".to_string(), "/proj".to_string()),
148 ];
149 let groups = find_candidates(&memories, 0.5);
150 assert_eq!(groups.len(), 1);
151 assert!(groups[0].memory_ids.contains(&1));
152 assert!(groups[0].memory_ids.contains(&2));
153 assert_eq!(groups[0].scope, "/proj");
154 }
155
156 #[test]
157 fn find_candidates_no_cross_key() {
158 let memories = vec![
159 (1, "key_a".to_string(), "same content here".to_string(), "/".to_string()),
160 (2, "key_b".to_string(), "same content here".to_string(), "/".to_string()),
161 ];
162 let groups = find_candidates(&memories, 0.5);
163 assert!(groups.is_empty());
164 }
165
166 #[test]
167 fn find_candidates_no_cross_scope() {
168 let memories = vec![
169 (1, "k".to_string(), "same content here".to_string(), "/project/a".to_string()),
170 (2, "k".to_string(), "same content here".to_string(), "/project/b".to_string()),
171 ];
172 let groups = find_candidates(&memories, 0.5);
173 assert!(groups.is_empty());
174 }
175
176 #[test]
177 fn find_candidates_below_threshold() {
178 let memories = vec![
179 (1, "k".to_string(), "alpha beta gamma delta".to_string(), "/".to_string()),
180 (2, "k".to_string(), "epsilon zeta eta theta".to_string(), "/".to_string()),
181 ];
182 let groups = find_candidates(&memories, 0.5);
183 assert!(groups.is_empty());
184 }
185}