use crate::models::{Pattern, PatternStatus, PatternUpdate};
use chrono::Utc;
use uuid::Uuid;
const SIMILARITY_THRESHOLD: f64 = 0.8;
pub fn process_updates(
updates: Vec<PatternUpdate>,
existing: &[Pattern],
project: Option<&str>,
) -> (Vec<Pattern>, Vec<MergeUpdate>) {
let mut new_patterns = Vec::new();
let mut merge_updates = Vec::new();
let now = Utc::now();
for update in updates {
match update {
PatternUpdate::New(new) => {
if let Some(match_id) = find_similar_pattern(&new.description, existing) {
merge_updates.push(MergeUpdate {
pattern_id: match_id,
new_sessions: new.source_sessions,
new_confidence: new.confidence,
additional_times_seen: 1,
});
} else {
let pattern = Pattern {
id: Uuid::new_v4().to_string(),
pattern_type: new.pattern_type,
description: new.description,
confidence: new.confidence,
times_seen: 1,
first_seen: now,
last_seen: now,
last_projected: None,
status: PatternStatus::Discovered,
source_sessions: new.source_sessions,
related_files: new.related_files,
suggested_content: new.suggested_content,
suggested_target: new.suggested_target,
project: project.map(String::from),
generation_failed: false,
};
new_patterns.push(pattern);
}
}
PatternUpdate::Update(upd) => {
if existing.iter().any(|p| p.id == upd.existing_id) {
merge_updates.push(MergeUpdate {
pattern_id: upd.existing_id,
new_sessions: upd.new_sessions,
new_confidence: upd.new_confidence,
additional_times_seen: 1,
});
} else {
eprintln!(
"warning: AI referenced non-existent pattern ID: {}",
upd.existing_id
);
}
}
}
}
(new_patterns, merge_updates)
}
pub struct MergeUpdate {
pub pattern_id: String,
pub new_sessions: Vec<String>,
pub new_confidence: f64,
pub additional_times_seen: i64,
}
fn find_similar_pattern(description: &str, existing: &[Pattern]) -> Option<String> {
let mut best_match: Option<(String, f64)> = None;
for pattern in existing {
let similarity = normalized_similarity(description, &pattern.description);
if similarity > SIMILARITY_THRESHOLD {
match &best_match {
Some((_, best_sim)) if similarity > *best_sim => {
best_match = Some((pattern.id.clone(), similarity));
}
None => {
best_match = Some((pattern.id.clone(), similarity));
}
_ => {}
}
}
}
best_match.map(|(id, _)| id)
}
pub fn normalized_similarity(a: &str, b: &str) -> f64 {
let a_chars: Vec<char> = a.to_lowercase().chars().collect();
let b_chars: Vec<char> = b.to_lowercase().chars().collect();
let a_len = a_chars.len();
let b_len = b_chars.len();
let max_len = std::cmp::max(a_len, b_len);
if max_len == 0 {
return 1.0;
}
let distance = levenshtein_distance(&a_chars, &b_chars);
1.0 - (distance as f64 / max_len as f64)
}
fn levenshtein_distance(a: &[char], b: &[char]) -> usize {
let a_len = a.len();
let b_len = b.len();
if a_len == 0 {
return b_len;
}
if b_len == 0 {
return a_len;
}
let mut prev: Vec<usize> = (0..=b_len).collect();
let mut curr = vec![0; b_len + 1];
for (i, a_ch) in a.iter().enumerate() {
curr[0] = i + 1;
for (j, b_ch) in b.iter().enumerate() {
let cost = if a_ch == b_ch { 0 } else { 1 };
curr[j + 1] = std::cmp::min(
std::cmp::min(prev[j + 1] + 1, curr[j] + 1),
prev[j] + cost,
);
}
std::mem::swap(&mut prev, &mut curr);
}
prev[b_len]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_identical_strings() {
assert!((normalized_similarity("hello", "hello") - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_completely_different() {
let sim = normalized_similarity("abc", "xyz");
assert!(sim < 0.5);
}
#[test]
fn test_similar_strings() {
let sim = normalized_similarity(
"Always use uv for Python packages",
"Always use uv for Python package management",
);
assert!(sim > 0.7);
}
#[test]
fn test_empty_strings() {
assert!((normalized_similarity("", "") - 1.0).abs() < f64::EPSILON);
}
#[test]
fn test_case_insensitive() {
assert!((normalized_similarity("Hello World", "hello world") - 1.0).abs() < f64::EPSILON);
}
}