do_memory_core/extraction/
utils.rs1use crate::pattern::Pattern;
4use crate::types::TaskContext;
5
6#[must_use]
8pub fn deduplicate_patterns(patterns: Vec<Pattern>) -> Vec<Pattern> {
9 use std::collections::HashSet;
10
11 let mut seen = HashSet::new();
12 let mut deduplicated = Vec::new();
13
14 for pattern in patterns {
15 let key = pattern.similarity_key();
16
17 if seen.insert(key) {
18 deduplicated.push(pattern);
19 }
20 }
21
22 deduplicated
23}
24
25#[must_use]
27pub fn rank_patterns(mut patterns: Vec<Pattern>, context: &TaskContext) -> Vec<Pattern> {
28 patterns.sort_by(|a, b| {
30 let score_a = calculate_pattern_score(a, context);
31 let score_b = calculate_pattern_score(b, context);
32
33 score_b
35 .partial_cmp(&score_a)
36 .unwrap_or(std::cmp::Ordering::Equal)
37 });
38
39 patterns
40}
41
42fn calculate_pattern_score(pattern: &Pattern, current_context: &TaskContext) -> f64 {
51 let mut score = 0.0;
52
53 score += f64::from(pattern.success_rate()) * 100.0;
55
56 let sample_size = pattern.sample_size() as f64;
58 score += (sample_size.min(10.0) / 10.0) * 50.0;
59
60 if let Some(pattern_context) = pattern.context() {
62 score += calculate_context_similarity(pattern_context, current_context) * 100.0;
63 }
64
65 match pattern {
67 Pattern::ToolSequence { tools, .. } => {
68 let unique_tools = tools.iter().collect::<std::collections::HashSet<_>>().len();
70 score += (unique_tools as f64 / tools.len() as f64) * 20.0;
71 }
72 Pattern::ErrorRecovery { .. } => {
73 score += 30.0;
75 }
76 Pattern::DecisionPoint { outcome_stats, .. } => {
77 if outcome_stats.total_count > 5 {
79 score += 25.0;
80 }
81 }
82 Pattern::ContextPattern { evidence, .. } => {
83 score += (evidence.len() as f64).min(5.0) * 10.0;
85 }
86 }
87
88 let effectiveness = pattern.effectiveness();
91
92 score += f64::from(effectiveness.effectiveness_score()) * 100.0;
95
96 if effectiveness.times_applied > 0 {
99 let success_rate = effectiveness.application_success_rate();
100 let usage_confidence = (effectiveness.times_applied as f64).ln().min(3.0) / 3.0;
101 score += f64::from(success_rate) * usage_confidence * 50.0;
102 }
103
104 if effectiveness.avg_reward_delta > 0.0 {
107 let capped_delta = effectiveness.avg_reward_delta.min(0.5); score += f64::from(capped_delta) * 100.0; } else if effectiveness.avg_reward_delta < 0.0 {
110 let capped_penalty = effectiveness.avg_reward_delta.max(-0.5); score += f64::from(capped_penalty) * 100.0; }
114
115 if effectiveness.times_applied > 0 {
118 use chrono::Utc;
119 let days_since_use = (Utc::now() - effectiveness.last_used).num_days();
120 if days_since_use < 30 {
121 score += (30.0 - days_since_use as f64) / 30.0 * 10.0;
122 }
123 }
124
125 score
126}
127
128fn calculate_context_similarity(a: &TaskContext, b: &TaskContext) -> f64 {
130 let mut similarity = 0.0;
131 let mut factors = 0.0;
132
133 if a.language == b.language {
135 similarity += 1.0;
136 }
137 factors += 1.0;
138
139 if a.framework == b.framework {
141 similarity += 1.0;
142 }
143 factors += 1.0;
144
145 if a.domain == b.domain {
147 similarity += 0.8;
148 }
149 factors += 1.0;
150
151 if a.complexity == b.complexity {
153 similarity += 0.6;
154 }
155 factors += 1.0;
156
157 if !a.tags.is_empty() || !b.tags.is_empty() {
159 let a_tags: std::collections::HashSet<_> = a.tags.iter().collect();
160 let b_tags: std::collections::HashSet<_> = b.tags.iter().collect();
161 let intersection = a_tags.intersection(&b_tags).count();
162 let union = a_tags.union(&b_tags).count();
163
164 if union > 0 {
165 similarity += (intersection as f64 / union as f64) * 0.7;
166 }
167 factors += 1.0;
168 }
169
170 if factors > 0.0 {
171 similarity / factors
172 } else {
173 0.0
174 }
175}