matrixcode_core/memory/
manager.rs

1//! Memory manager and search index.
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7use super::config::*;
8use super::entry::{MemoryCategory, MemoryEntry};
9use super::retrieval::{
10    TfIdfSearch, compute_relevance, expand_semantic_keywords, extract_context_keywords,
11    has_contradiction_signal,
12};
13use crate::providers::Message;
14use crate::truncate::truncate_with_suffix;
15
16// ============================================================================
17// Scoring Helper
18// ============================================================================
19
20/// Compare two scored entries for sorting.
21/// Manual entries always come first, then by combined score (descending).
22fn compare_scored_entries(
23    a: (&MemoryEntry, f64),
24    b: (&MemoryEntry, f64),
25    relevance_weight: f64,
26    importance_weight: f64,
27) -> std::cmp::Ordering {
28    // Manual entries always prioritized
29    if a.0.is_manual && !b.0.is_manual {
30        return std::cmp::Ordering::Less;
31    }
32    if !a.0.is_manual && b.0.is_manual {
33        return std::cmp::Ordering::Greater;
34    }
35
36    let score_a =
37        a.1 * relevance_weight + (a.0.importance / MAX_IMPORTANCE_CEILING) * importance_weight;
38    let score_b =
39        b.1 * relevance_weight + (b.0.importance / MAX_IMPORTANCE_CEILING) * importance_weight;
40
41    score_b
42        .partial_cmp(&score_a)
43        .unwrap_or(std::cmp::Ordering::Equal)
44}
45
46// ============================================================================
47// Search Index
48// ============================================================================
49
50/// Search index for fast lookups.
51#[derive(Debug, Clone)]
52pub struct SearchIndex {
53    /// Lowercase content cache for each entry.
54    content_lower: Vec<String>,
55    /// Entries grouped by category.
56    by_category: HashMap<MemoryCategory, Vec<usize>>,
57    /// Entries sorted by importance (indices).
58    by_importance: Vec<usize>,
59    /// Total word frequency for relevance scoring (future use).
60    #[allow(dead_code)]
61    word_freq: HashMap<String, usize>,
62}
63
64impl SearchIndex {
65    /// Build index from entries.
66    pub fn build(entries: &[MemoryEntry]) -> Self {
67        let content_lower: Vec<String> = entries.iter().map(|e| e.content.to_lowercase()).collect();
68
69        let mut by_category: HashMap<MemoryCategory, Vec<usize>> = HashMap::new();
70        for (i, entry) in entries.iter().enumerate() {
71            by_category.entry(entry.category).or_default().push(i);
72        }
73
74        let mut by_importance: Vec<usize> = (0..entries.len()).collect();
75        by_importance.sort_by(|a, b| {
76            entries[*b]
77                .importance
78                .partial_cmp(&entries[*a].importance)
79                .unwrap_or(std::cmp::Ordering::Equal)
80        });
81
82        let mut word_freq: HashMap<String, usize> = HashMap::new();
83        for content in &content_lower {
84            for word in content.split_whitespace() {
85                *word_freq.entry(word.to_string()).or_default() += 1;
86            }
87        }
88
89        Self {
90            content_lower,
91            by_category,
92            by_importance,
93            word_freq,
94        }
95    }
96
97    /// Search by query with optional limit.
98    pub fn search(
99        &self,
100        _entries: &[MemoryEntry],
101        query_lower: &str,
102        limit: Option<usize>,
103    ) -> Vec<usize> {
104        let matches: Vec<usize> = self
105            .by_importance
106            .iter()
107            .filter(|&idx| self.content_lower[*idx].contains(query_lower))
108            .copied()
109            .collect();
110
111        if let Some(max) = limit {
112            matches.into_iter().take(max).collect()
113        } else {
114            matches
115        }
116    }
117
118    /// Multi-keyword search (matches any keyword).
119    pub fn search_multi(&self, keywords_lower: &[String]) -> Vec<usize> {
120        self.by_importance
121            .iter()
122            .filter(|&idx| {
123                let content = &self.content_lower[*idx];
124                keywords_lower.iter().any(|k| content.contains(k))
125            })
126            .copied()
127            .collect()
128    }
129}
130
131// ============================================================================
132// Helper Functions for Defaults
133// ============================================================================
134
135fn default_max_entries() -> usize {
136    100
137}
138
139fn default_min_importance() -> f64 {
140    30.0
141}
142
143fn default_enabled() -> bool {
144    true
145}
146
147// ============================================================================
148// Auto Memory Manager
149// ============================================================================
150
151/// Manager for automatic memory accumulation.
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct AutoMemory {
154    /// All memory entries.
155    pub entries: Vec<MemoryEntry>,
156    /// Configuration for memory management.
157    #[serde(default)]
158    pub config: MemoryConfig,
159    /// Legacy fields for backward compatibility (deprecated).
160    #[serde(default = "default_max_entries")]
161    pub max_entries: usize,
162    #[serde(default = "default_min_importance")]
163    pub min_importance: f64,
164    #[serde(default = "default_enabled")]
165    pub enabled: bool,
166    /// Search index (not serialized, rebuilt on load).
167    #[serde(skip)]
168    search_index: Option<SearchIndex>,
169}
170
171impl Default for AutoMemory {
172    fn default() -> Self {
173        let config = MemoryConfig::default();
174        Self {
175            entries: Vec::new(),
176            config: config.clone(),
177            max_entries: config.max_entries,
178            min_importance: config.min_importance,
179            enabled: config.enabled,
180            search_index: None,
181        }
182    }
183}
184
185impl AutoMemory {
186    /// Create a new auto memory manager.
187    pub fn new() -> Self {
188        Self::default()
189    }
190
191    /// Ensure search index is built.
192    fn ensure_index(&mut self) {
193        if self.search_index.is_none() {
194            self.rebuild_index();
195        }
196    }
197
198    /// Rebuild search index.
199    pub fn rebuild_index(&mut self) {
200        self.search_index = Some(SearchIndex::build(&self.entries));
201    }
202
203    /// Invalidate search index (call after modifications).
204    fn invalidate_index(&mut self) {
205        self.search_index = None;
206    }
207
208    /// Create with custom configuration.
209    pub fn with_config(config: MemoryConfig) -> Self {
210        Self {
211            entries: Vec::new(),
212            config: config.clone(),
213            max_entries: config.max_entries,
214            min_importance: config.min_importance,
215            enabled: config.enabled,
216            search_index: None,
217        }
218    }
219
220    /// Create a minimal memory manager.
221    pub fn minimal() -> Self {
222        Self::with_config(MemoryConfig::minimal())
223    }
224
225    /// Create an archival memory manager.
226    pub fn archival() -> Self {
227        Self::with_config(MemoryConfig::archival())
228    }
229
230    /// Add a new memory entry with duplicate check.
231    pub fn add(&mut self, entry: MemoryEntry) {
232        // Check for similar content before adding
233        if self.has_similar(&entry.content) {
234            log::debug!("Skipping duplicate memory: {}", entry.content);
235            return;
236        }
237
238        // Check for conflicting memories (e.g., "使用 X" vs "使用 Y")
239        if let Some(conflict_idx) = self.find_conflict(&entry.content, entry.category) {
240            let old_content = self.entries[conflict_idx].content.clone();
241            log::info!(
242                "Memory conflict: '{}' supersedes '{}'",
243                entry.content,
244                old_content
245            );
246            self.entries.remove(conflict_idx);
247            self.invalidate_index();
248        }
249
250        self.entries.push(entry);
251        self.invalidate_index();
252        self.prune();
253    }
254
255    /// Add memory from detected content.
256    pub fn add_memory(
257        &mut self,
258        category: MemoryCategory,
259        content: String,
260        source_session: Option<String>,
261    ) {
262        let entry = MemoryEntry::new(category, content, source_session, None);
263        self.add(entry);
264    }
265
266    /// Find a conflicting memory entry.
267    fn find_conflict(&self, new_content: &str, category: MemoryCategory) -> Option<usize> {
268        let new_lower = new_content.to_lowercase();
269        let new_words: HashSet<&str> = new_lower.split_whitespace().collect();
270
271        let has_change_signal = has_contradiction_signal("", &new_lower);
272        let overlap_threshold = if has_change_signal {
273            CONFLICT_OVERLAY_THRESHOLD_WITH_SIGNAL
274        } else {
275            CONFLICT_OVERLAY_THRESHOLD
276        };
277
278        for (i, entry) in self.entries.iter().enumerate() {
279            if entry.category != category {
280                continue;
281            }
282
283            let entry_lower = entry.content.to_lowercase();
284            let entry_words: HashSet<&str> = entry_lower.split_whitespace().collect();
285
286            let intersection = new_words.intersection(&entry_words).count();
287            let min_len = new_words.len().min(entry_words.len());
288
289            if min_len == 0 {
290                continue;
291            }
292
293            let topic_overlap = intersection as f64 / min_len as f64;
294            let jaccard = Self::calculate_similarity(&entry_lower, &new_lower);
295
296            if topic_overlap > overlap_threshold
297                && jaccard < SIMILARITY_THRESHOLD
298                && has_contradiction_signal(&entry_lower, &new_lower)
299            {
300                return Some(i);
301            }
302
303            if has_change_signal {
304                let old_key_terms: Vec<&str> = entry_words
305                    .iter()
306                    .filter(|w| w.len() > 2)
307                    .copied()
308                    .collect();
309                let referenced = old_key_terms.iter().any(|term| new_lower.contains(term));
310                if referenced {
311                    return Some(i);
312                }
313            }
314        }
315
316        None
317    }
318
319    /// Check if similar content already exists (using enhanced similarity).
320    pub fn has_similar(&self, content: &str) -> bool {
321        let content_lower = content.to_lowercase();
322
323        if content_lower.len() < MIN_SIMILARITY_LENGTH {
324            return false;
325        }
326
327        for e in &self.entries {
328            let entry_lower = e.content.to_lowercase();
329
330            if entry_lower == content_lower {
331                log::debug!("Exact duplicate found: {}", content);
332                return true;
333            }
334
335            if entry_lower.len() < MIN_SIMILARITY_LENGTH {
336                continue;
337            }
338
339            // Use enhanced similarity (Jaccard + semantic patterns)
340            let similarity = Self::calculate_similarity_enhanced(&entry_lower, &content_lower);
341            if similarity >= SIMILARITY_THRESHOLD {
342                log::debug!(
343                    "Similar memory found (similarity={:.2}): '{}' vs '{}'",
344                    similarity,
345                    e.content,
346                    content
347                );
348                crate::debug::debug_log().log(
349                    "MEMORY_DUPLICATE",
350                    &format!(
351                        "similarity={:.2}, existing='{}', new='{}'",
352                        similarity,
353                        truncate_with_suffix(&e.content, 50),
354                        truncate_with_suffix(content, 50)
355                    ),
356                );
357                return true;
358            }
359        }
360
361        false
362    }
363
364    /// Calculate word-based similarity between two strings (Jaccard similarity).
365    pub fn calculate_similarity(a: &str, b: &str) -> f64 {
366        let a_words: HashSet<&str> = a.split_whitespace().collect();
367        let b_words: HashSet<&str> = b.split_whitespace().collect();
368
369        if a_words.is_empty() || b_words.is_empty() {
370            return 0.0;
371        }
372
373        let intersection = a_words.intersection(&b_words).count();
374        let union = a_words.union(&b_words).count();
375
376        if union == 0 {
377            0.0
378        } else {
379            intersection as f64 / union as f64
380        }
381    }
382
383    /// Calculate enhanced similarity with semantic pattern detection.
384    /// This helps detect duplicates like "项目技术栈: Node.js" vs "项目技术栈: Rust".
385    pub fn calculate_similarity_enhanced(a: &str, b: &str) -> f64 {
386        // Calculate Jaccard similarity (word overlap)
387        let jaccard = Self::calculate_similarity(a, b);
388        
389        // Calculate semantic similarity (pattern matching)
390        let semantic = Self::calculate_semantic_similarity(a, b);
391        
392        // Return maximum to catch both word overlap and pattern matches
393        jaccard.max(semantic)
394    }
395
396    /// Calculate semantic similarity based on common patterns.
397    /// Detects if two strings follow the same pattern (e.g., both are tech stack declarations).
398    fn calculate_semantic_similarity(a: &str, b: &str) -> f64 {
399        // Common patterns that indicate same type of memory
400        let patterns = [
401            ("项目技术栈:", "技术栈"),      // Tech stack declarations
402            ("入口文件:", "入口"),          // Entry point declarations
403            ("模块位于", "位于"),           // Module location
404            ("位于 packages/", "packages/"), // Package path
405            ("核心功能:", "功能"),          // Core features
406            ("配置文件:", "配置"),          // Config files
407        ];
408        
409        for (pattern, _) in patterns {
410            if a.contains(pattern) && b.contains(pattern) {
411                // Both match same pattern → high semantic similarity
412                return 0.85;
413            }
414        }
415        
416        // Check category-specific patterns
417        let category_patterns = Self::extract_category_patterns(a);
418        let b_patterns = Self::extract_category_patterns(b);
419        
420        if !category_patterns.is_empty() && !b_patterns.is_empty() {
421            // Count matching patterns
422            let matches = category_patterns.intersection(&b_patterns).count();
423            if matches > 0 {
424                return 0.7 + (matches as f64 * 0.05).min(0.15); // 0.70-0.85
425            }
426        }
427        
428        0.0
429    }
430
431    /// Extract category-specific patterns from content.
432    fn extract_category_patterns(content: &str) -> HashSet<&'static str> {
433        let mut patterns = HashSet::new();
434        
435        // Decision patterns
436        if content.contains("决定") || content.contains("选择") || content.contains("采用") {
437            patterns.insert("decision");
438        }
439        
440        // Preference patterns
441        if content.contains("偏好") || content.contains("习惯") || content.contains("喜欢") {
442            patterns.insert("preference");
443        }
444        
445        // Solution patterns
446        if content.contains("解决") || content.contains("修复") || content.contains("通过") {
447            patterns.insert("solution");
448        }
449        
450        // Structure patterns
451        if content.contains("位于") || content.contains("入口") || content.contains("模块") {
452            patterns.insert("structure");
453        }
454        
455        // Technical patterns
456        if content.contains("技术栈") || content.contains("框架") || content.contains("库") {
457            patterns.insert("technical");
458        }
459        
460        patterns
461    }
462
463    /// Remove low-importance entries when exceeding max_entries.
464    pub fn prune(&mut self) {
465        if self.entries.len() <= self.max_entries {
466            return;
467        }
468
469        let (manual_entries, auto_entries): (Vec<_>, Vec<_>) =
470            self.entries.iter().cloned().partition(|e| e.is_manual);
471
472        let mut sorted_auto = auto_entries;
473        sorted_auto.sort_by(|a, b| {
474            let importance_cmp = b
475                .importance
476                .partial_cmp(&a.importance)
477                .unwrap_or(std::cmp::Ordering::Equal);
478            if importance_cmp == std::cmp::Ordering::Equal {
479                b.last_referenced.cmp(&a.last_referenced)
480            } else {
481                importance_cmp
482            }
483        });
484
485        let kept_auto: Vec<_> = sorted_auto
486            .into_iter()
487            .filter(|e| e.importance >= self.min_importance)
488            .take(self.max_entries.saturating_sub(manual_entries.len()))
489            .collect();
490
491        self.entries = manual_entries.into_iter().chain(kept_auto).collect();
492
493        if self.entries.len() > self.max_entries {
494            self.entries.sort_by(|a, b| {
495                let importance_cmp = b
496                    .importance
497                    .partial_cmp(&a.importance)
498                    .unwrap_or(std::cmp::Ordering::Equal);
499                if importance_cmp == std::cmp::Ordering::Equal {
500                    b.last_referenced.cmp(&a.last_referenced)
501                } else {
502                    importance_cmp
503                }
504            });
505            self.entries.truncate(self.max_entries);
506        }
507
508        self.invalidate_index();
509    }
510
511    /// Smart merge of similar memories.
512    pub fn smart_merge(&mut self) -> usize {
513        if self.entries.len() < 2 {
514            return 0;
515        }
516
517        let mut merged_count = 0;
518        let mut to_remove: Vec<String> = Vec::new();
519        let mut new_entries: Vec<MemoryEntry> = Vec::new();
520        let mut processed: HashSet<String> = HashSet::new();
521
522        for i in 0..self.entries.len() {
523            let entry_i = &self.entries[i];
524            if processed.contains(&entry_i.id) {
525                continue;
526            }
527
528            let mut similar_group: Vec<usize> = vec![i];
529
530            for j in (i + 1)..self.entries.len() {
531                let entry_j = &self.entries[j];
532                if processed.contains(&entry_j.id) {
533                    continue;
534                }
535
536                if entry_i.category != entry_j.category {
537                    continue;
538                }
539
540                let similarity = Self::calculate_similarity(&entry_i.content, &entry_j.content);
541                if similarity >= MERGE_SIMILARITY_THRESHOLD {
542                    similar_group.push(j);
543                }
544            }
545
546            if similar_group.len() >= 2 {
547                let group_entries: Vec<&MemoryEntry> = similar_group
548                    .iter()
549                    .map(|&idx| &self.entries[idx])
550                    .collect();
551
552                let merged = self.merge_group(&group_entries);
553
554                for entry in &group_entries {
555                    to_remove.push(entry.id.clone());
556                    processed.insert(entry.id.clone());
557                }
558
559                new_entries.push(merged);
560                merged_count += similar_group.len() - 1;
561            } else {
562                processed.insert(entry_i.id.clone());
563            }
564        }
565
566        for id in &to_remove {
567            self.remove(id);
568        }
569
570        for entry in new_entries {
571            self.add(entry);
572        }
573
574        if merged_count > 0 {
575            log::debug!("Smart merge: reduced {} entries", merged_count);
576            self.invalidate_index();
577        }
578
579        merged_count
580    }
581
582    /// Merge a group of similar entries into one.
583    fn merge_group(&self, entries: &[&MemoryEntry]) -> MemoryEntry {
584        // SAFETY: entries is guaranteed non-empty by caller (similar_group.len() >= 2)
585        let best = entries
586            .iter()
587            .max_by(|a, b| {
588                let score_a = a.importance + (a.content.len() as f64 / 100.0);
589                let score_b = b.importance + (b.content.len() as f64 / 100.0);
590                score_b
591                    .partial_cmp(&score_a)
592                    .unwrap_or(std::cmp::Ordering::Equal)
593            })
594            .expect("merge_group called with empty entries");
595
596        let all_same = entries
597            .iter()
598            .all(|e| Self::calculate_similarity(&e.content, &best.content) >= 0.95);
599
600        if all_same {
601            let mut merged: MemoryEntry = (*best).clone();
602            merged.importance = entries
603                .iter()
604                .map(|e| e.importance)
605                .fold(best.importance, |max, val| val.max(max));
606            merged.tags.push("merged".to_string());
607            return merged;
608        }
609
610        let mut merged_content = best.content.clone();
611
612        for entry in entries {
613            if entry.id == best.id {
614                continue;
615            }
616            let unique_words = entry
617                .content
618                .split_whitespace()
619                .filter(|word| !best.content.contains(word))
620                .take(3)
621                .collect::<Vec<_>>();
622
623            if !unique_words.is_empty() {
624                let additions = unique_words.join(", ");
625                if additions.len() > 10 {
626                    merged_content =
627                        format!("{} ({})", merged_content.trim_end_matches('.'), additions);
628                }
629            }
630        }
631
632        let mut merged = MemoryEntry::new(best.category, merged_content, None, None);
633        merged.importance = entries
634            .iter()
635            .map(|e| e.importance)
636            .fold(best.importance, |max, val| val.max(max))
637            + 5.0;
638        merged.importance = merged.importance.min(MAX_IMPORTANCE_CEILING);
639
640        merged.tags.push("merged".to_string());
641        for entry in entries {
642            for tag in &entry.tags {
643                if !merged.tags.contains(tag) && !tag.starts_with("merged") {
644                    merged.tags.push(tag.clone());
645                }
646            }
647        }
648
649        merged.is_manual = entries.iter().any(|e| e.is_manual);
650
651        merged
652    }
653
654    /// Get entries by category.
655    pub fn by_category(&self, category: MemoryCategory) -> Vec<&MemoryEntry> {
656        self.entries
657            .iter()
658            .filter(|e| e.category == category)
659            .collect()
660    }
661
662    /// Get entries by category using index.
663    pub fn by_category_fast(&mut self, category: MemoryCategory) -> Vec<&MemoryEntry> {
664        self.ensure_index();
665        if let Some(ref index) = self.search_index {
666            index
667                .by_category
668                .get(&category)
669                .map(|indices| indices.iter().map(|&i| &self.entries[i]).collect())
670                .unwrap_or_default()
671        } else {
672            self.by_category(category)
673        }
674    }
675
676    /// Get top N most important entries.
677    pub fn top_n(&self, n: usize) -> Vec<&MemoryEntry> {
678        let mut sorted: Vec<_> = self.entries.iter().collect();
679        sorted.sort_by(|a, b| {
680            b.importance
681                .partial_cmp(&a.importance)
682                .unwrap_or(std::cmp::Ordering::Equal)
683        });
684        sorted.into_iter().take(n).collect()
685    }
686
687    /// Get top N using index.
688    pub fn top_n_fast(&mut self, n: usize) -> Vec<&MemoryEntry> {
689        self.ensure_index();
690        if let Some(ref index) = self.search_index {
691            index
692                .by_importance
693                .iter()
694                .take(n)
695                .map(|&i| &self.entries[i])
696                .collect()
697        } else {
698            self.top_n(n)
699        }
700    }
701
702    /// Search entries by content or tags.
703    pub fn search(&self, query: &str) -> Vec<&MemoryEntry> {
704        self.search_with_limit(query, None)
705    }
706
707    /// Search entries with result limit.
708    pub fn search_with_limit(&self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
709        let query_lower = query.to_lowercase();
710        let mut results: Vec<_> = self
711            .entries
712            .iter()
713            .filter(|e| {
714                e.content.to_lowercase().contains(&query_lower)
715                    || e.tags
716                        .iter()
717                        .any(|t| t.to_lowercase().contains(&query_lower))
718            })
719            .collect();
720
721        results.sort_by(|a, b| {
722            b.importance
723                .partial_cmp(&a.importance)
724                .unwrap_or(std::cmp::Ordering::Equal)
725        });
726
727        if let Some(max) = limit {
728            results.into_iter().take(max).collect()
729        } else {
730            results
731        }
732    }
733
734    /// Search using index.
735    pub fn search_fast(&mut self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
736        self.ensure_index();
737        let query_lower = query.to_lowercase();
738
739        if let Some(ref index) = self.search_index {
740            let indices = index.search(&self.entries, &query_lower, limit);
741            indices.iter().map(|&i| &self.entries[i]).collect()
742        } else {
743            self.search_with_limit(query, limit)
744        }
745    }
746
747    /// Multi-keyword search.
748    pub fn search_multi(&self, keywords: &[&str]) -> Vec<&MemoryEntry> {
749        if keywords.is_empty() {
750            return Vec::new();
751        }
752
753        let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
754
755        self.entries
756            .iter()
757            .filter(|e| {
758                let content_lower = e.content.to_lowercase();
759                keywords_lower.iter().any(|k| content_lower.contains(k))
760            })
761            .collect()
762    }
763
764    /// Multi-keyword search using index.
765    pub fn search_multi_fast(&mut self, keywords: &[&str]) -> Vec<&MemoryEntry> {
766        if keywords.is_empty() {
767            return Vec::new();
768        }
769
770        self.ensure_index();
771        let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
772
773        if let Some(ref index) = self.search_index {
774            let indices = index.search_multi(&keywords_lower);
775            indices.iter().map(|&i| &self.entries[i]).collect()
776        } else {
777            self.search_multi(keywords)
778        }
779    }
780
781    /// Batch add multiple entries.
782    pub fn add_batch(&mut self, entries: Vec<MemoryEntry>) {
783        for entry in entries {
784            if !self.has_similar(&entry.content) {
785                self.entries.push(entry);
786            }
787        }
788        self.prune();
789    }
790
791    /// Mark entries as referenced.
792    pub fn update_references(&mut self, messages: &[Message]) {
793        let increment = self.config.reference_increment;
794
795        let texts_lower: Vec<String> = messages
796            .iter()
797            .filter_map(Self::extract_message_text_lower)
798            .collect();
799
800        let entry_contents_lower: Vec<String> = self
801            .entries
802            .iter()
803            .map(|e| e.content.to_lowercase())
804            .collect();
805
806        for (i, entry) in self.entries.iter_mut().enumerate() {
807            let entry_lower = &entry_contents_lower[i];
808            if texts_lower.iter().any(|t| t.contains(entry_lower)) {
809                entry.mark_referenced_with_increment(increment);
810            }
811        }
812    }
813
814    /// Extract lowercase text from a message.
815    fn extract_message_text_lower(msg: &Message) -> Option<String> {
816        match &msg.content {
817            crate::providers::MessageContent::Text(t) => Some(t.to_lowercase()),
818            crate::providers::MessageContent::Blocks(blocks) => {
819                let text = blocks
820                    .iter()
821                    .filter_map(|b| {
822                        if let crate::providers::ContentBlock::Text { text } = b {
823                            Some(text.as_str())
824                        } else {
825                            None
826                        }
827                    })
828                    .collect::<Vec<_>>()
829                    .join(" ");
830                Some(text.to_lowercase())
831            }
832        }
833    }
834
835    /// Generate manifest for AI selection (Claude Code style).
836    pub fn generate_manifest(&self, max_entries: usize) -> String {
837        if self.entries.is_empty() {
838            return String::new();
839        }
840
841        let mut sorted_entries: Vec<_> = self.entries.iter().enumerate().collect();
842        sorted_entries.sort_by(|a, b| {
843            b.1.importance
844                .partial_cmp(&a.1.importance)
845                .unwrap_or(std::cmp::Ordering::Equal)
846        });
847        sorted_entries.truncate(max_entries);
848
849        let mut manifest = String::new();
850        for (original_idx, entry) in sorted_entries.iter() {
851            let preview: String = entry.content.chars().take(80).collect();
852            let preview = preview.trim_end_matches('\n');
853            manifest.push_str(&format!(
854                "{}. {} {} {} (重要性: {:.0})\n",
855                original_idx,
856                entry.category.icon(),
857                preview,
858                entry.category.display_name(),
859                entry.importance
860            ));
861        }
862
863        manifest
864    }
865
866    /// Get entries by indices (from AI selection result).
867    pub fn get_entries_by_indices(&self, indices: &[usize]) -> Vec<&MemoryEntry> {
868        indices
869            .iter()
870            .filter_map(|i| self.entries.get(*i))
871            .collect()
872    }
873
874    /// Generate summary for system prompt.
875    pub fn generate_prompt_summary(&self, max_entries: usize) -> String {
876        if self.entries.is_empty() {
877            return String::new();
878        }
879
880        let top_entries = self.top_n(max_entries);
881        if top_entries.is_empty() {
882            return String::new();
883        }
884
885        let mut summary = String::from("【自动记忆摘要】\n\n");
886
887        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
888        for entry in top_entries {
889            by_cat.entry(entry.category).or_default().push(entry);
890        }
891
892        for (cat, entries) in by_cat {
893            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
894            for entry in entries {
895                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
896            }
897            summary.push('\n');
898        }
899
900        summary
901    }
902
903    /// Generate context-aware summary.
904    pub fn generate_contextual_summary(&self, context: &str, max_entries: usize) -> String {
905        let keywords = extract_context_keywords(context);
906        self.generate_contextual_summary_with_keywords(&keywords, max_entries)
907    }
908
909    /// Generate context-aware summary with pre-extracted keywords.
910    pub fn generate_contextual_summary_with_keywords(
911        &self,
912        context_keywords: &[String],
913        max_entries: usize,
914    ) -> String {
915        if self.entries.is_empty() {
916            return String::new();
917        }
918
919        let expanded_keywords = expand_semantic_keywords(context_keywords);
920
921        let mut tfidf = TfIdfSearch::new();
922        tfidf.index(self);
923        let keywords_slice: Vec<&str> = expanded_keywords.iter().map(|s| s.as_str()).collect();
924        let tfidf_results = tfidf.search_multi(&keywords_slice, Some(max_entries * 2));
925
926        let mut tfidf_scores: HashMap<String, f64> = HashMap::new();
927        for (content, score) in &tfidf_results {
928            if let Some(entry) = self.entries.iter().find(|e| &e.content == content) {
929                tfidf_scores.insert(entry.id.clone(), *score);
930            }
931        }
932
933        let mut scored: Vec<(&MemoryEntry, f64)> = self
934            .entries
935            .iter()
936            .map(|entry| {
937                let relevance = compute_relevance(entry, &expanded_keywords);
938                let tfidf = tfidf_scores.get(&entry.id).copied().unwrap_or(0.0);
939                let combined = tfidf * 0.4 + relevance * 0.6;
940                (entry, combined)
941            })
942            .collect();
943
944        scored.sort_by(|a, b| {
945            compare_scored_entries(*a, *b, CONTEXT_RELEVANCE_WEIGHT, CONTEXT_IMPORTANCE_WEIGHT)
946        });
947
948        let selected: Vec<&MemoryEntry> = scored
949            .iter()
950            .take(max_entries)
951            .map(|(entry, _)| *entry)
952            .collect();
953
954        if selected.is_empty() {
955            return String::new();
956        }
957
958        let mut summary = String::from("【跨会话记忆】\n\n");
959
960        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
961        for entry in selected {
962            by_cat.entry(entry.category).or_default().push(entry);
963        }
964
965        for (cat, entries) in by_cat {
966            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
967            for entry in entries {
968                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
969            }
970            summary.push('\n');
971        }
972
973        summary
974    }
975
976    /// Update reference statistics.
977    pub fn update_retrieval_stats(&mut self, retrieved_ids: &[String]) {
978        for id in retrieved_ids {
979            if let Some(entry) = self.entries.iter_mut().find(|e| &e.id == id) {
980                entry.mark_referenced();
981                log::debug!("Updated reference stats for memory {}", id);
982            }
983        }
984    }
985
986    /// Get IDs of entries for retrieval.
987    pub fn get_retrieval_ids(
988        &self,
989        context_keywords: &[String],
990        max_entries: usize,
991    ) -> Vec<String> {
992        if self.entries.is_empty() {
993            return Vec::new();
994        }
995
996        let expanded_keywords = expand_semantic_keywords(context_keywords);
997
998        let mut scored: Vec<(&MemoryEntry, f64)> = self
999            .entries
1000            .iter()
1001            .map(|entry| {
1002                let relevance = compute_relevance(entry, &expanded_keywords);
1003                (entry, relevance)
1004            })
1005            .collect();
1006
1007        scored.sort_by(|a, b| compare_scored_entries(*a, *b, 1.0, 1.0));
1008
1009        scored
1010            .iter()
1011            .take(max_entries)
1012            .map(|(e, _)| e.id.clone())
1013            .collect()
1014    }
1015
1016    /// Generate context-aware summary async.
1017    /// Note: AI keyword extraction has been removed, uses rule-based extraction now.
1018    pub async fn generate_contextual_summary_async(
1019        &self,
1020        context: &str,
1021        max_entries: usize,
1022        _fast_provider: Option<&dyn crate::providers::Provider>,
1023    ) -> String {
1024        if self.entries.is_empty() {
1025            return String::new();
1026        }
1027
1028        let context_keywords = extract_context_keywords(context);
1029
1030        let mut scored: Vec<(&MemoryEntry, f64)> = self
1031            .entries
1032            .iter()
1033            .map(|entry| {
1034                let relevance = compute_relevance(entry, &context_keywords);
1035                (entry, relevance)
1036            })
1037            .collect();
1038
1039        scored.sort_by(|a, b| {
1040            compare_scored_entries(*a, *b, CONTEXT_RELEVANCE_WEIGHT, CONTEXT_IMPORTANCE_WEIGHT)
1041        });
1042
1043        let selected: Vec<&MemoryEntry> = scored
1044            .iter()
1045            .take(max_entries)
1046            .map(|(entry, _)| *entry)
1047            .collect();
1048
1049        if selected.is_empty() {
1050            return String::new();
1051        }
1052
1053        let mut summary = String::from("【跨会话记忆】\n\n");
1054
1055        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
1056        for entry in selected {
1057            by_cat.entry(entry.category).or_default().push(entry);
1058        }
1059
1060        for (cat, entries) in by_cat {
1061            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
1062            for entry in entries {
1063                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
1064            }
1065            summary.push('\n');
1066        }
1067
1068        summary
1069    }
1070
1071    /// Format all entries for display.
1072    pub fn format_all(&self) -> String {
1073        if self.entries.is_empty() {
1074            return "[no memories accumulated]".to_string();
1075        }
1076
1077        let mut result = String::from("Accumulated memories:\n\n");
1078
1079        let mut sorted: Vec<_> = self.entries.iter().collect();
1080        sorted.sort_by(|a, b| {
1081            b.importance
1082                .partial_cmp(&a.importance)
1083                .unwrap_or(std::cmp::Ordering::Equal)
1084        });
1085
1086        for entry in sorted {
1087            result.push_str(&entry.format_line());
1088            result.push('\n');
1089        }
1090
1091        result
1092    }
1093
1094    /// Generate statistics summary.
1095    pub fn generate_statistics(&self) -> MemoryStatistics {
1096        let total = self.entries.len();
1097        let manual = self.entries.iter().filter(|e| e.is_manual).count();
1098        let auto = total - manual;
1099
1100        let by_category: HashMap<MemoryCategory, usize> =
1101            self.entries.iter().fold(HashMap::new(), |mut acc, e| {
1102                *acc.entry(e.category).or_default() += 1;
1103                acc
1104            });
1105
1106        let avg_importance = if total > 0 {
1107            self.entries.iter().map(|e| e.importance).sum::<f64>() / total as f64
1108        } else {
1109            0.0
1110        };
1111
1112        let oldest = self
1113            .entries
1114            .iter()
1115            .min_by_key(|e| e.created_at)
1116            .map(|e| e.created_at);
1117        let newest = self
1118            .entries
1119            .iter()
1120            .max_by_key(|e| e.created_at)
1121            .map(|e| e.created_at);
1122
1123        let highly_referenced = self
1124            .entries
1125            .iter()
1126            .filter(|e| e.reference_count >= 3)
1127            .count();
1128
1129        MemoryStatistics {
1130            total,
1131            manual,
1132            auto,
1133            by_category,
1134            avg_importance,
1135            oldest,
1136            newest,
1137            highly_referenced,
1138        }
1139    }
1140
1141    /// Clear all memories.
1142    pub fn clear(&mut self) {
1143        self.entries.clear();
1144        self.invalidate_index();
1145    }
1146
1147    /// Remove a specific memory by ID.
1148    pub fn remove(&mut self, id: &str) -> bool {
1149        let idx = self.entries.iter().position(|e| e.id == id);
1150        if let Some(i) = idx {
1151            self.entries.remove(i);
1152            self.invalidate_index();
1153            true
1154        } else {
1155            false
1156        }
1157    }
1158
1159    /// Apply time decay to memory importance.
1160    pub fn apply_time_decay(&mut self) {
1161        let now = Utc::now();
1162        let decay_start_days = self.config.decay_start_days;
1163        let decay_rate = self.config.decay_rate;
1164        let decay_period_days = 30;
1165
1166        for entry in &mut self.entries {
1167            if entry.is_manual {
1168                continue;
1169            }
1170
1171            let days_since_reference = (now - entry.last_referenced).num_days().max(0);
1172
1173            if days_since_reference > decay_start_days {
1174                let decay_periods = (days_since_reference - decay_start_days) / decay_period_days;
1175                let decay_factor = decay_rate.powi(decay_periods as i32);
1176                entry.importance *= decay_factor;
1177                entry.importance = entry.importance.max(self.min_importance * 0.5);
1178            }
1179        }
1180
1181        self.prune();
1182    }
1183}
1184
1185// ============================================================================
1186// Memory Statistics
1187// ============================================================================
1188
1189/// Statistics about memory collection.
1190#[derive(Debug, Clone)]
1191pub struct MemoryStatistics {
1192    /// Total number of entries.
1193    pub total: usize,
1194    /// Number of manually added entries.
1195    pub manual: usize,
1196    /// Number of automatically detected entries.
1197    pub auto: usize,
1198    /// Count by category.
1199    pub by_category: HashMap<MemoryCategory, usize>,
1200    /// Average importance score.
1201    pub avg_importance: f64,
1202    /// Oldest entry creation time.
1203    pub oldest: Option<DateTime<Utc>>,
1204    /// Newest entry creation time.
1205    pub newest: Option<DateTime<Utc>>,
1206    /// Number of entries with high reference count.
1207    pub highly_referenced: usize,
1208}
1209
1210impl MemoryStatistics {
1211    /// Format statistics for display.
1212    pub fn format_summary(&self) -> String {
1213        let mut output = String::new();
1214
1215        output.push_str("记忆统计：\n");
1216        output.push_str(&format!("  总计: {} 条\n", self.total));
1217        output.push_str(&format!("  ├─ 手动添加: {} 条\n", self.manual));
1218        output.push_str(&format!("  └─ 自动检测: {} 条\n", self.auto));
1219        output.push('\n');
1220
1221        output.push_str("分类统计：\n");
1222        for (cat, count) in &self.by_category {
1223            output.push_str(&format!(
1224                "  {} {}: {} 条\n",
1225                cat.icon(),
1226                cat.display_name(),
1227                count
1228            ));
1229        }
1230        output.push('\n');
1231
1232        output.push_str("质量指标：\n");
1233        output.push_str(&format!("  平均重要性: {:.1} 分\n", self.avg_importance));
1234        output.push_str(&format!(
1235            "  高频引用: {} 条 (≥3次)\n",
1236            self.highly_referenced
1237        ));
1238
1239        if let Some(oldest) = self.oldest {
1240            let days = (Utc::now() - oldest).num_days();
1241            output.push_str(&format!("  记忆跨度: {} 天\n", days));
1242        }
1243
1244        output
1245    }
1246}
matrixcode_core/memory/manager.rs

matrixcode_core/memory/
manager.rs