Skip to main content

do_memory_core/extraction/
utils.rs

1//! Utility functions for pattern processing
2
3use crate::pattern::Pattern;
4use crate::types::TaskContext;
5
6/// Remove duplicate patterns from a list
7#[must_use]
8pub fn deduplicate_patterns(patterns: Vec<Pattern>) -> Vec<Pattern> {
9    use std::collections::HashSet;
10
11    let mut seen = HashSet::new();
12    let mut deduplicated = Vec::new();
13
14    for pattern in patterns {
15        let key = pattern.similarity_key();
16
17        if seen.insert(key) {
18            deduplicated.push(pattern);
19        }
20    }
21
22    deduplicated
23}
24
25/// Rank patterns by relevance/quality
26#[must_use]
27pub fn rank_patterns(mut patterns: Vec<Pattern>, context: &TaskContext) -> Vec<Pattern> {
28    // Sort patterns by a composite score considering multiple factors
29    patterns.sort_by(|a, b| {
30        let score_a = calculate_pattern_score(a, context);
31        let score_b = calculate_pattern_score(b, context);
32
33        // Sort in descending order (higher score first)
34        score_b
35            .partial_cmp(&score_a)
36            .unwrap_or(std::cmp::Ordering::Equal)
37    });
38
39    patterns
40}
41
42/// Calculate a relevance score for a pattern given the current context
43///
44/// Scoring system (max ~400+ points):
45/// - Base success rate: 0-100 points
46/// - Sample size: 0-50 points
47/// - Context relevance: 0-100 points
48/// - Pattern type bonuses: 0-50 points
49/// - **Effectiveness tracking: 0-200 points** (NEW)
50fn calculate_pattern_score(pattern: &Pattern, current_context: &TaskContext) -> f64 {
51    let mut score = 0.0;
52
53    // Base score from success rate (0-100 points)
54    score += f64::from(pattern.success_rate()) * 100.0;
55
56    // Sample size bonus (0-50 points, diminishing returns)
57    let sample_size = pattern.sample_size() as f64;
58    score += (sample_size.min(10.0) / 10.0) * 50.0;
59
60    // Context relevance bonus (0-100 points)
61    if let Some(pattern_context) = pattern.context() {
62        score += calculate_context_similarity(pattern_context, current_context) * 100.0;
63    }
64
65    // Pattern type specific bonuses
66    match pattern {
67        Pattern::ToolSequence { tools, .. } => {
68            // Prefer patterns with diverse tool usage
69            let unique_tools = tools.iter().collect::<std::collections::HashSet<_>>().len();
70            score += (unique_tools as f64 / tools.len() as f64) * 20.0;
71        }
72        Pattern::ErrorRecovery { .. } => {
73            // Error recovery patterns are valuable for robustness
74            score += 30.0;
75        }
76        Pattern::DecisionPoint { outcome_stats, .. } => {
77            // Decision points with clear outcomes are more valuable
78            if outcome_stats.total_count > 5 {
79                score += 25.0;
80            }
81        }
82        Pattern::ContextPattern { evidence, .. } => {
83            // Context patterns with more evidence are better
84            score += (evidence.len() as f64).min(5.0) * 10.0;
85        }
86    }
87
88    // **NEW: Effectiveness-based scoring (0-200 points)**
89    // This is the key metric for self-learning!
90    let effectiveness = pattern.effectiveness();
91
92    // 1. Effectiveness score boost (0-100 points)
93    // Combines success rate, usage confidence, and reward impact
94    score += f64::from(effectiveness.effectiveness_score()) * 100.0;
95
96    // 2. Proven usage bonus (0-50 points)
97    // Patterns that have been successfully applied get priority
98    if effectiveness.times_applied > 0 {
99        let success_rate = effectiveness.application_success_rate();
100        let usage_confidence = (effectiveness.times_applied as f64).ln().min(3.0) / 3.0;
101        score += f64::from(success_rate) * usage_confidence * 50.0;
102    }
103
104    // 3. Reward delta bonus (0-50 points)
105    // Patterns that improve outcomes get strong preference
106    if effectiveness.avg_reward_delta > 0.0 {
107        let capped_delta = effectiveness.avg_reward_delta.min(0.5); // Cap at +0.5
108        score += f64::from(capped_delta) * 100.0; // 0.5 delta = 50 points
109    } else if effectiveness.avg_reward_delta < 0.0 {
110        // Penalize patterns that hurt performance
111        let capped_penalty = effectiveness.avg_reward_delta.max(-0.5); // Cap at -0.5
112        score += f64::from(capped_penalty) * 100.0; // Can subtract up to 50 points
113    }
114
115    // 4. Recency bonus (0-10 points)
116    // Recently used patterns are more likely to be relevant
117    if effectiveness.times_applied > 0 {
118        use chrono::Utc;
119        let days_since_use = (Utc::now() - effectiveness.last_used).num_days();
120        if days_since_use < 30 {
121            score += (30.0 - days_since_use as f64) / 30.0 * 10.0;
122        }
123    }
124
125    score
126}
127
128/// Calculate similarity between two task contexts (0.0 to 1.0)
129fn calculate_context_similarity(a: &TaskContext, b: &TaskContext) -> f64 {
130    let mut similarity = 0.0;
131    let mut factors = 0.0;
132
133    // Language match (high weight)
134    if a.language == b.language {
135        similarity += 1.0;
136    }
137    factors += 1.0;
138
139    // Framework match (high weight)
140    if a.framework == b.framework {
141        similarity += 1.0;
142    }
143    factors += 1.0;
144
145    // Domain match (medium weight)
146    if a.domain == b.domain {
147        similarity += 0.8;
148    }
149    factors += 1.0;
150
151    // Complexity level match (medium weight)
152    if a.complexity == b.complexity {
153        similarity += 0.6;
154    }
155    factors += 1.0;
156
157    // Tag overlap (variable weight based on overlap)
158    if !a.tags.is_empty() || !b.tags.is_empty() {
159        let a_tags: std::collections::HashSet<_> = a.tags.iter().collect();
160        let b_tags: std::collections::HashSet<_> = b.tags.iter().collect();
161        let intersection = a_tags.intersection(&b_tags).count();
162        let union = a_tags.union(&b_tags).count();
163
164        if union > 0 {
165            similarity += (intersection as f64 / union as f64) * 0.7;
166        }
167        factors += 1.0;
168    }
169
170    if factors > 0.0 {
171        similarity / factors
172    } else {
173        0.0
174    }
175}