retro_core/analysis/
merge.rs1use crate::models::{Pattern, PatternStatus, PatternUpdate};
2use chrono::Utc;
3use uuid::Uuid;
4
5const SIMILARITY_THRESHOLD: f64 = 0.8;
7
8pub fn process_updates(
11 updates: Vec<PatternUpdate>,
12 existing: &[Pattern],
13 project: Option<&str>,
14) -> (Vec<Pattern>, Vec<MergeUpdate>) {
15 let mut new_patterns = Vec::new();
16 let mut merge_updates = Vec::new();
17 let now = Utc::now();
18
19 for update in updates {
20 match update {
21 PatternUpdate::New(new) => {
22 if let Some(match_id) = find_similar_pattern(&new.description, existing) {
24 merge_updates.push(MergeUpdate {
26 pattern_id: match_id,
27 new_sessions: new.source_sessions,
28 new_confidence: new.confidence,
29 additional_times_seen: 1,
30 });
31 } else {
32 let pattern = Pattern {
34 id: Uuid::new_v4().to_string(),
35 pattern_type: new.pattern_type,
36 description: new.description,
37 confidence: new.confidence,
38 times_seen: 1,
39 first_seen: now,
40 last_seen: now,
41 last_projected: None,
42 status: PatternStatus::Discovered,
43 source_sessions: new.source_sessions,
44 related_files: new.related_files,
45 suggested_content: new.suggested_content,
46 suggested_target: new.suggested_target,
47 project: project.map(String::from),
48 generation_failed: false,
49 };
50 new_patterns.push(pattern);
51 }
52 }
53 PatternUpdate::Update(upd) => {
54 if existing.iter().any(|p| p.id == upd.existing_id) {
56 merge_updates.push(MergeUpdate {
57 pattern_id: upd.existing_id,
58 new_sessions: upd.new_sessions,
59 new_confidence: upd.new_confidence,
60 additional_times_seen: 1,
61 });
62 } else {
63 eprintln!(
64 "warning: AI referenced non-existent pattern ID: {}",
65 upd.existing_id
66 );
67 }
68 }
69 }
70 }
71
72 (new_patterns, merge_updates)
73}
74
75pub struct MergeUpdate {
77 pub pattern_id: String,
78 pub new_sessions: Vec<String>,
79 pub new_confidence: f64,
80 pub additional_times_seen: i64,
81}
82
83fn find_similar_pattern(description: &str, existing: &[Pattern]) -> Option<String> {
86 let mut best_match: Option<(String, f64)> = None;
87
88 for pattern in existing {
89 let similarity = normalized_similarity(description, &pattern.description);
90 if similarity > SIMILARITY_THRESHOLD {
91 match &best_match {
92 Some((_, best_sim)) if similarity > *best_sim => {
93 best_match = Some((pattern.id.clone(), similarity));
94 }
95 None => {
96 best_match = Some((pattern.id.clone(), similarity));
97 }
98 _ => {}
99 }
100 }
101 }
102
103 best_match.map(|(id, _)| id)
104}
105
106pub fn normalized_similarity(a: &str, b: &str) -> f64 {
109 let a_chars: Vec<char> = a.to_lowercase().chars().collect();
110 let b_chars: Vec<char> = b.to_lowercase().chars().collect();
111 let a_len = a_chars.len();
112 let b_len = b_chars.len();
113
114 let max_len = std::cmp::max(a_len, b_len);
115 if max_len == 0 {
116 return 1.0;
117 }
118
119 let distance = levenshtein_distance(&a_chars, &b_chars);
120 1.0 - (distance as f64 / max_len as f64)
121}
122
123fn levenshtein_distance(a: &[char], b: &[char]) -> usize {
124 let a_len = a.len();
125 let b_len = b.len();
126
127 if a_len == 0 {
128 return b_len;
129 }
130 if b_len == 0 {
131 return a_len;
132 }
133
134 let mut prev: Vec<usize> = (0..=b_len).collect();
136 let mut curr = vec![0; b_len + 1];
137
138 for (i, a_ch) in a.iter().enumerate() {
139 curr[0] = i + 1;
140 for (j, b_ch) in b.iter().enumerate() {
141 let cost = if a_ch == b_ch { 0 } else { 1 };
142 curr[j + 1] = std::cmp::min(
143 std::cmp::min(prev[j + 1] + 1, curr[j] + 1),
144 prev[j] + cost,
145 );
146 }
147 std::mem::swap(&mut prev, &mut curr);
148 }
149
150 prev[b_len]
151}
152
153#[cfg(test)]
154mod tests {
155 use super::*;
156
157 #[test]
158 fn test_identical_strings() {
159 assert!((normalized_similarity("hello", "hello") - 1.0).abs() < f64::EPSILON);
160 }
161
162 #[test]
163 fn test_completely_different() {
164 let sim = normalized_similarity("abc", "xyz");
165 assert!(sim < 0.5);
166 }
167
168 #[test]
169 fn test_similar_strings() {
170 let sim = normalized_similarity(
171 "Always use uv for Python packages",
172 "Always use uv for Python package management",
173 );
174 assert!(sim > 0.7);
175 }
176
177 #[test]
178 fn test_empty_strings() {
179 assert!((normalized_similarity("", "") - 1.0).abs() < f64::EPSILON);
180 }
181
182 #[test]
183 fn test_case_insensitive() {
184 assert!((normalized_similarity("Hello World", "hello world") - 1.0).abs() < f64::EPSILON);
185 }
186}