Skip to main content

matrixcode_core/memory/
manager.rs

1//! Memory manager and search index.
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7use super::config::*;
8use super::entry::{MemoryCategory, MemoryEntry};
9use super::retrieval::{
10    TfIdfSearch, compute_relevance, expand_semantic_keywords, extract_context_keywords,
11    has_contradiction_signal,
12};
13use crate::providers::Message;
14use crate::truncate::truncate_with_suffix;
15
16// ============================================================================
17// Scoring Helper
18// ============================================================================
19
20/// Compare two scored entries for sorting.
21/// Manual entries always come first, then by combined score (descending).
22fn compare_scored_entries(
23    a: (&MemoryEntry, f64),
24    b: (&MemoryEntry, f64),
25    relevance_weight: f64,
26    importance_weight: f64,
27) -> std::cmp::Ordering {
28    // Manual entries always prioritized
29    if a.0.is_manual && !b.0.is_manual {
30        return std::cmp::Ordering::Less;
31    }
32    if !a.0.is_manual && b.0.is_manual {
33        return std::cmp::Ordering::Greater;
34    }
35
36    let score_a = a.1 * relevance_weight + (a.0.importance / MAX_IMPORTANCE_CEILING) * importance_weight;
37    let score_b = b.1 * relevance_weight + (b.0.importance / MAX_IMPORTANCE_CEILING) * importance_weight;
38
39    score_b.partial_cmp(&score_a).unwrap_or(std::cmp::Ordering::Equal)
40}
41
42// ============================================================================
43// Search Index
44// ============================================================================
45
46/// Search index for fast lookups.
47#[derive(Debug, Clone)]
48pub struct SearchIndex {
49    /// Lowercase content cache for each entry.
50    content_lower: Vec<String>,
51    /// Entries grouped by category.
52    by_category: HashMap<MemoryCategory, Vec<usize>>,
53    /// Entries sorted by importance (indices).
54    by_importance: Vec<usize>,
55    /// Total word frequency for relevance scoring (future use).
56    #[allow(dead_code)]
57    word_freq: HashMap<String, usize>,
58}
59
60impl SearchIndex {
61    /// Build index from entries.
62    pub fn build(entries: &[MemoryEntry]) -> Self {
63        let content_lower: Vec<String> = entries.iter().map(|e| e.content.to_lowercase()).collect();
64
65        let mut by_category: HashMap<MemoryCategory, Vec<usize>> = HashMap::new();
66        for (i, entry) in entries.iter().enumerate() {
67            by_category.entry(entry.category).or_default().push(i);
68        }
69
70        let mut by_importance: Vec<usize> = (0..entries.len()).collect();
71        by_importance.sort_by(|a, b| {
72            entries[*b]
73                .importance
74                .partial_cmp(&entries[*a].importance)
75                .unwrap_or(std::cmp::Ordering::Equal)
76        });
77
78        let mut word_freq: HashMap<String, usize> = HashMap::new();
79        for content in &content_lower {
80            for word in content.split_whitespace() {
81                *word_freq.entry(word.to_string()).or_default() += 1;
82            }
83        }
84
85        Self {
86            content_lower,
87            by_category,
88            by_importance,
89            word_freq,
90        }
91    }
92
93    /// Search by query with optional limit.
94    pub fn search(
95        &self,
96        _entries: &[MemoryEntry],
97        query_lower: &str,
98        limit: Option<usize>,
99    ) -> Vec<usize> {
100        let matches: Vec<usize> = self
101            .by_importance
102            .iter()
103            .filter(|&idx| self.content_lower[*idx].contains(query_lower))
104            .copied()
105            .collect();
106
107        if let Some(max) = limit {
108            matches.into_iter().take(max).collect()
109        } else {
110            matches
111        }
112    }
113
114    /// Multi-keyword search (matches any keyword).
115    pub fn search_multi(&self, keywords_lower: &[String]) -> Vec<usize> {
116        self.by_importance
117            .iter()
118            .filter(|&idx| {
119                let content = &self.content_lower[*idx];
120                keywords_lower.iter().any(|k| content.contains(k))
121            })
122            .copied()
123            .collect()
124    }
125}
126
127// ============================================================================
128// Helper Functions for Defaults
129// ============================================================================
130
131fn default_max_entries() -> usize {
132    100
133}
134
135fn default_min_importance() -> f64 {
136    30.0
137}
138
139fn default_enabled() -> bool {
140    true
141}
142
143// ============================================================================
144// Auto Memory Manager
145// ============================================================================
146
147/// Manager for automatic memory accumulation.
148#[derive(Debug, Clone, Serialize, Deserialize)]
149pub struct AutoMemory {
150    /// All memory entries.
151    pub entries: Vec<MemoryEntry>,
152    /// Configuration for memory management.
153    #[serde(default)]
154    pub config: MemoryConfig,
155    /// Legacy fields for backward compatibility (deprecated).
156    #[serde(default = "default_max_entries")]
157    pub max_entries: usize,
158    #[serde(default = "default_min_importance")]
159    pub min_importance: f64,
160    #[serde(default = "default_enabled")]
161    pub enabled: bool,
162    /// Search index (not serialized, rebuilt on load).
163    #[serde(skip)]
164    search_index: Option<SearchIndex>,
165}
166
167impl Default for AutoMemory {
168    fn default() -> Self {
169        let config = MemoryConfig::default();
170        Self {
171            entries: Vec::new(),
172            config: config.clone(),
173            max_entries: config.max_entries,
174            min_importance: config.min_importance,
175            enabled: config.enabled,
176            search_index: None,
177        }
178    }
179}
180
181impl AutoMemory {
182    /// Create a new auto memory manager.
183    pub fn new() -> Self {
184        Self::default()
185    }
186
187    /// Ensure search index is built.
188    fn ensure_index(&mut self) {
189        if self.search_index.is_none() {
190            self.rebuild_index();
191        }
192    }
193
194    /// Rebuild search index.
195    pub fn rebuild_index(&mut self) {
196        self.search_index = Some(SearchIndex::build(&self.entries));
197    }
198
199    /// Invalidate search index (call after modifications).
200    fn invalidate_index(&mut self) {
201        self.search_index = None;
202    }
203
204    /// Create with custom configuration.
205    pub fn with_config(config: MemoryConfig) -> Self {
206        Self {
207            entries: Vec::new(),
208            config: config.clone(),
209            max_entries: config.max_entries,
210            min_importance: config.min_importance,
211            enabled: config.enabled,
212            search_index: None,
213        }
214    }
215
216    /// Create a minimal memory manager.
217    pub fn minimal() -> Self {
218        Self::with_config(MemoryConfig::minimal())
219    }
220
221    /// Create an archival memory manager.
222    pub fn archival() -> Self {
223        Self::with_config(MemoryConfig::archival())
224    }
225
226    /// Add a new memory entry with duplicate check.
227    pub fn add(&mut self, entry: MemoryEntry) {
228        // Check for similar content before adding
229        if self.has_similar(&entry.content) {
230            log::debug!("Skipping duplicate memory: {}", entry.content);
231            return;
232        }
233
234        // Check for conflicting memories (e.g., "使用 X" vs "使用 Y")
235        if let Some(conflict_idx) = self.find_conflict(&entry.content, entry.category) {
236            let old_content = self.entries[conflict_idx].content.clone();
237            log::info!(
238                "Memory conflict: '{}' supersedes '{}'",
239                entry.content,
240                old_content
241            );
242            self.entries.remove(conflict_idx);
243            self.invalidate_index();
244        }
245
246        self.entries.push(entry);
247        self.invalidate_index();
248        self.prune();
249    }
250
251    /// Add memory from detected content.
252    pub fn add_memory(
253        &mut self,
254        category: MemoryCategory,
255        content: String,
256        source_session: Option<String>,
257    ) {
258        let entry = MemoryEntry::new(category, content, source_session, None);
259        self.add(entry);
260    }
261
262    /// Find a conflicting memory entry.
263    fn find_conflict(&self, new_content: &str, category: MemoryCategory) -> Option<usize> {
264        let new_lower = new_content.to_lowercase();
265        let new_words: HashSet<&str> = new_lower.split_whitespace().collect();
266
267        let has_change_signal = has_contradiction_signal("", &new_lower);
268        let overlap_threshold = if has_change_signal {
269            CONFLICT_OVERLAY_THRESHOLD_WITH_SIGNAL
270        } else {
271            CONFLICT_OVERLAY_THRESHOLD
272        };
273
274        for (i, entry) in self.entries.iter().enumerate() {
275            if entry.category != category {
276                continue;
277            }
278
279            let entry_lower = entry.content.to_lowercase();
280            let entry_words: HashSet<&str> = entry_lower.split_whitespace().collect();
281
282            let intersection = new_words.intersection(&entry_words).count();
283            let min_len = new_words.len().min(entry_words.len());
284
285            if min_len == 0 {
286                continue;
287            }
288
289            let topic_overlap = intersection as f64 / min_len as f64;
290            let jaccard = Self::calculate_similarity(&entry_lower, &new_lower);
291
292            if topic_overlap > overlap_threshold
293                && jaccard < SIMILARITY_THRESHOLD
294                && has_contradiction_signal(&entry_lower, &new_lower)
295            {
296                return Some(i);
297            }
298
299            if has_change_signal {
300                let old_key_terms: Vec<&str> = entry_words
301                    .iter()
302                    .filter(|w| w.len() > 2)
303                    .copied()
304                    .collect();
305                let referenced = old_key_terms.iter().any(|term| new_lower.contains(term));
306                if referenced {
307                    return Some(i);
308                }
309            }
310        }
311
312        None
313    }
314
315    /// Check if similar content already exists.
316    pub fn has_similar(&self, content: &str) -> bool {
317        let content_lower = content.to_lowercase();
318
319        if content_lower.len() < MIN_SIMILARITY_LENGTH {
320            return false;
321        }
322
323        for e in &self.entries {
324            let entry_lower = e.content.to_lowercase();
325
326            if entry_lower == content_lower {
327                log::debug!("Exact duplicate found: {}", content);
328                return true;
329            }
330
331            if entry_lower.len() < MIN_SIMILARITY_LENGTH {
332                continue;
333            }
334
335            let similarity = Self::calculate_similarity(&entry_lower, &content_lower);
336            if similarity >= SIMILARITY_THRESHOLD {
337                log::debug!(
338                    "Similar memory found (similarity={:.2}): '{}' vs '{}'",
339                    similarity,
340                    e.content,
341                    content
342                );
343                crate::debug::debug_log().log("MEMORY_DUPLICATE",
344                    &format!("similarity={:.2}, existing='{}', new='{}'",
345                        similarity,
346                        truncate_with_suffix(&e.content, 50),
347                        truncate_with_suffix(content, 50)));
348                return true;
349            }
350        }
351
352        false
353    }
354
355    /// Calculate word-based similarity between two strings.
356    pub fn calculate_similarity(a: &str, b: &str) -> f64 {
357        let a_words: HashSet<&str> = a.split_whitespace().collect();
358        let b_words: HashSet<&str> = b.split_whitespace().collect();
359
360        if a_words.is_empty() || b_words.is_empty() {
361            return 0.0;
362        }
363
364        let intersection = a_words.intersection(&b_words).count();
365        let union = a_words.union(&b_words).count();
366
367        if union == 0 {
368            0.0
369        } else {
370            intersection as f64 / union as f64
371        }
372    }
373
374    /// Remove low-importance entries when exceeding max_entries.
375    pub fn prune(&mut self) {
376        if self.entries.len() <= self.max_entries {
377            return;
378        }
379
380        let (manual_entries, auto_entries): (Vec<_>, Vec<_>) =
381            self.entries.iter().cloned().partition(|e| e.is_manual);
382
383        let mut sorted_auto = auto_entries;
384        sorted_auto.sort_by(|a, b| {
385            let importance_cmp = b
386                .importance
387                .partial_cmp(&a.importance)
388                .unwrap_or(std::cmp::Ordering::Equal);
389            if importance_cmp == std::cmp::Ordering::Equal {
390                b.last_referenced.cmp(&a.last_referenced)
391            } else {
392                importance_cmp
393            }
394        });
395
396        let kept_auto: Vec<_> = sorted_auto
397            .into_iter()
398            .filter(|e| e.importance >= self.min_importance)
399            .take(self.max_entries.saturating_sub(manual_entries.len()))
400            .collect();
401
402        self.entries = manual_entries.into_iter().chain(kept_auto).collect();
403
404        if self.entries.len() > self.max_entries {
405            self.entries.sort_by(|a, b| {
406                let importance_cmp = b
407                    .importance
408                    .partial_cmp(&a.importance)
409                    .unwrap_or(std::cmp::Ordering::Equal);
410                if importance_cmp == std::cmp::Ordering::Equal {
411                    b.last_referenced.cmp(&a.last_referenced)
412                } else {
413                    importance_cmp
414                }
415            });
416            self.entries.truncate(self.max_entries);
417        }
418
419        self.invalidate_index();
420    }
421
422    /// Smart merge of similar memories.
423    pub fn smart_merge(&mut self) -> usize {
424        if self.entries.len() < 2 {
425            return 0;
426        }
427
428        let mut merged_count = 0;
429        let mut to_remove: Vec<String> = Vec::new();
430        let mut new_entries: Vec<MemoryEntry> = Vec::new();
431        let mut processed: HashSet<String> = HashSet::new();
432
433        for i in 0..self.entries.len() {
434            let entry_i = &self.entries[i];
435            if processed.contains(&entry_i.id) {
436                continue;
437            }
438
439            let mut similar_group: Vec<usize> = vec![i];
440
441            for j in (i + 1)..self.entries.len() {
442                let entry_j = &self.entries[j];
443                if processed.contains(&entry_j.id) {
444                    continue;
445                }
446
447                if entry_i.category != entry_j.category {
448                    continue;
449                }
450
451                let similarity = Self::calculate_similarity(&entry_i.content, &entry_j.content);
452                if similarity >= MERGE_SIMILARITY_THRESHOLD {
453                    similar_group.push(j);
454                }
455            }
456
457            if similar_group.len() >= 2 {
458                let group_entries: Vec<&MemoryEntry> = similar_group
459                    .iter()
460                    .map(|&idx| &self.entries[idx])
461                    .collect();
462
463                let merged = self.merge_group(&group_entries);
464
465                for entry in &group_entries {
466                    to_remove.push(entry.id.clone());
467                    processed.insert(entry.id.clone());
468                }
469
470                new_entries.push(merged);
471                merged_count += similar_group.len() - 1;
472            } else {
473                processed.insert(entry_i.id.clone());
474            }
475        }
476
477        for id in &to_remove {
478            self.remove(id);
479        }
480
481        for entry in new_entries {
482            self.add(entry);
483        }
484
485        if merged_count > 0 {
486            log::debug!("Smart merge: reduced {} entries", merged_count);
487            self.invalidate_index();
488        }
489
490        merged_count
491    }
492
493    /// Merge a group of similar entries into one.
494    fn merge_group(&self, entries: &[&MemoryEntry]) -> MemoryEntry {
495        // SAFETY: entries is guaranteed non-empty by caller (similar_group.len() >= 2)
496        let best = entries
497            .iter()
498            .max_by(|a, b| {
499                let score_a = a.importance + (a.content.len() as f64 / 100.0);
500                let score_b = b.importance + (b.content.len() as f64 / 100.0);
501                score_b
502                    .partial_cmp(&score_a)
503                    .unwrap_or(std::cmp::Ordering::Equal)
504            })
505            .expect("merge_group called with empty entries");
506
507        let all_same = entries
508            .iter()
509            .all(|e| Self::calculate_similarity(&e.content, &best.content) >= 0.95);
510
511        if all_same {
512            let mut merged: MemoryEntry = (*best).clone();
513            merged.importance = entries
514                .iter()
515                .map(|e| e.importance)
516                .fold(best.importance, |max, val| val.max(max));
517            merged.tags.push("merged".to_string());
518            return merged;
519        }
520
521        let mut merged_content = best.content.clone();
522
523        for entry in entries {
524            if entry.id == best.id {
525                continue;
526            }
527            let unique_words = entry
528                .content
529                .split_whitespace()
530                .filter(|word| !best.content.contains(word))
531                .take(3)
532                .collect::<Vec<_>>();
533
534            if !unique_words.is_empty() {
535                let additions = unique_words.join(", ");
536                if additions.len() > 10 {
537                    merged_content =
538                        format!("{} ({})", merged_content.trim_end_matches('.'), additions);
539                }
540            }
541        }
542
543        let mut merged = MemoryEntry::new(best.category, merged_content, None, None);
544        merged.importance = entries
545            .iter()
546            .map(|e| e.importance)
547            .fold(best.importance, |max, val| val.max(max))
548            + 5.0;
549        merged.importance = merged.importance.min(MAX_IMPORTANCE_CEILING);
550
551        merged.tags.push("merged".to_string());
552        for entry in entries {
553            for tag in &entry.tags {
554                if !merged.tags.contains(tag) && !tag.starts_with("merged") {
555                    merged.tags.push(tag.clone());
556                }
557            }
558        }
559
560        merged.is_manual = entries.iter().any(|e| e.is_manual);
561
562        merged
563    }
564
565    /// Get entries by category.
566    pub fn by_category(&self, category: MemoryCategory) -> Vec<&MemoryEntry> {
567        self.entries
568            .iter()
569            .filter(|e| e.category == category)
570            .collect()
571    }
572
573    /// Get entries by category using index.
574    pub fn by_category_fast(&mut self, category: MemoryCategory) -> Vec<&MemoryEntry> {
575        self.ensure_index();
576        if let Some(ref index) = self.search_index {
577            index
578                .by_category
579                .get(&category)
580                .map(|indices| indices.iter().map(|&i| &self.entries[i]).collect())
581                .unwrap_or_default()
582        } else {
583            self.by_category(category)
584        }
585    }
586
587    /// Get top N most important entries.
588    pub fn top_n(&self, n: usize) -> Vec<&MemoryEntry> {
589        let mut sorted: Vec<_> = self.entries.iter().collect();
590        sorted.sort_by(|a, b| {
591            b.importance
592                .partial_cmp(&a.importance)
593                .unwrap_or(std::cmp::Ordering::Equal)
594        });
595        sorted.into_iter().take(n).collect()
596    }
597
598    /// Get top N using index.
599    pub fn top_n_fast(&mut self, n: usize) -> Vec<&MemoryEntry> {
600        self.ensure_index();
601        if let Some(ref index) = self.search_index {
602            index
603                .by_importance
604                .iter()
605                .take(n)
606                .map(|&i| &self.entries[i])
607                .collect()
608        } else {
609            self.top_n(n)
610        }
611    }
612
613    /// Search entries by content or tags.
614    pub fn search(&self, query: &str) -> Vec<&MemoryEntry> {
615        self.search_with_limit(query, None)
616    }
617
618    /// Search entries with result limit.
619    pub fn search_with_limit(&self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
620        let query_lower = query.to_lowercase();
621        let mut results: Vec<_> = self
622            .entries
623            .iter()
624            .filter(|e| {
625                e.content.to_lowercase().contains(&query_lower)
626                    || e.tags
627                        .iter()
628                        .any(|t| t.to_lowercase().contains(&query_lower))
629            })
630            .collect();
631
632        results.sort_by(|a, b| {
633            b.importance
634                .partial_cmp(&a.importance)
635                .unwrap_or(std::cmp::Ordering::Equal)
636        });
637
638        if let Some(max) = limit {
639            results.into_iter().take(max).collect()
640        } else {
641            results
642        }
643    }
644
645    /// Search using index.
646    pub fn search_fast(&mut self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
647        self.ensure_index();
648        let query_lower = query.to_lowercase();
649
650        if let Some(ref index) = self.search_index {
651            let indices = index.search(&self.entries, &query_lower, limit);
652            indices.iter().map(|&i| &self.entries[i]).collect()
653        } else {
654            self.search_with_limit(query, limit)
655        }
656    }
657
658    /// Multi-keyword search.
659    pub fn search_multi(&self, keywords: &[&str]) -> Vec<&MemoryEntry> {
660        if keywords.is_empty() {
661            return Vec::new();
662        }
663
664        let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
665
666        self.entries
667            .iter()
668            .filter(|e| {
669                let content_lower = e.content.to_lowercase();
670                keywords_lower.iter().any(|k| content_lower.contains(k))
671            })
672            .collect()
673    }
674
675    /// Multi-keyword search using index.
676    pub fn search_multi_fast(&mut self, keywords: &[&str]) -> Vec<&MemoryEntry> {
677        if keywords.is_empty() {
678            return Vec::new();
679        }
680
681        self.ensure_index();
682        let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
683
684        if let Some(ref index) = self.search_index {
685            let indices = index.search_multi(&keywords_lower);
686            indices.iter().map(|&i| &self.entries[i]).collect()
687        } else {
688            self.search_multi(keywords)
689        }
690    }
691
692    /// Batch add multiple entries.
693    pub fn add_batch(&mut self, entries: Vec<MemoryEntry>) {
694        for entry in entries {
695            if !self.has_similar(&entry.content) {
696                self.entries.push(entry);
697            }
698        }
699        self.prune();
700    }
701
702    /// Mark entries as referenced.
703    pub fn update_references(&mut self, messages: &[Message]) {
704        let increment = self.config.reference_increment;
705
706        let texts_lower: Vec<String> = messages
707            .iter()
708            .filter_map(Self::extract_message_text_lower)
709            .collect();
710
711        let entry_contents_lower: Vec<String> = self
712            .entries
713            .iter()
714            .map(|e| e.content.to_lowercase())
715            .collect();
716
717        for (i, entry) in self.entries.iter_mut().enumerate() {
718            let entry_lower = &entry_contents_lower[i];
719            if texts_lower.iter().any(|t| t.contains(entry_lower)) {
720                entry.mark_referenced_with_increment(increment);
721            }
722        }
723    }
724
725    /// Extract lowercase text from a message.
726    fn extract_message_text_lower(msg: &Message) -> Option<String> {
727        match &msg.content {
728            crate::providers::MessageContent::Text(t) => Some(t.to_lowercase()),
729            crate::providers::MessageContent::Blocks(blocks) => {
730                let text = blocks
731                    .iter()
732                    .filter_map(|b| {
733                        if let crate::providers::ContentBlock::Text { text } = b {
734                            Some(text.as_str())
735                        } else {
736                            None
737                        }
738                    })
739                    .collect::<Vec<_>>()
740                    .join(" ");
741                Some(text.to_lowercase())
742            }
743        }
744    }
745
746    /// Generate manifest for AI selection (Claude Code style).
747    pub fn generate_manifest(&self, max_entries: usize) -> String {
748        if self.entries.is_empty() {
749            return String::new();
750        }
751
752        let mut sorted_entries: Vec<_> = self.entries.iter().enumerate().collect();
753        sorted_entries.sort_by(|a, b| {
754            b.1.importance
755                .partial_cmp(&a.1.importance)
756                .unwrap_or(std::cmp::Ordering::Equal)
757        });
758        sorted_entries.truncate(max_entries);
759
760        let mut manifest = String::new();
761        for (original_idx, entry) in sorted_entries.iter() {
762            let preview: String = entry.content.chars().take(80).collect();
763            let preview = preview.trim_end_matches('\n');
764            manifest.push_str(&format!(
765                "{}. {} {} {} (重要性: {:.0})\n",
766                original_idx,
767                entry.category.icon(),
768                preview,
769                entry.category.display_name(),
770                entry.importance
771            ));
772        }
773
774        manifest
775    }
776
777    /// Get entries by indices (from AI selection result).
778    pub fn get_entries_by_indices(&self, indices: &[usize]) -> Vec<&MemoryEntry> {
779        indices.iter().filter_map(|i| self.entries.get(*i)).collect()
780    }
781
782    /// Generate summary for system prompt.
783    pub fn generate_prompt_summary(&self, max_entries: usize) -> String {
784        if self.entries.is_empty() {
785            return String::new();
786        }
787
788        let top_entries = self.top_n(max_entries);
789        if top_entries.is_empty() {
790            return String::new();
791        }
792
793        let mut summary = String::from("【自动记忆摘要】\n\n");
794
795        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
796        for entry in top_entries {
797            by_cat.entry(entry.category).or_default().push(entry);
798        }
799
800        for (cat, entries) in by_cat {
801            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
802            for entry in entries {
803                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
804            }
805            summary.push('\n');
806        }
807
808        summary
809    }
810
811    /// Generate context-aware summary.
812    pub fn generate_contextual_summary(&self, context: &str, max_entries: usize) -> String {
813        let keywords = extract_context_keywords(context);
814        self.generate_contextual_summary_with_keywords(&keywords, max_entries)
815    }
816
817    /// Generate context-aware summary with pre-extracted keywords.
818    pub fn generate_contextual_summary_with_keywords(
819        &self,
820        context_keywords: &[String],
821        max_entries: usize,
822    ) -> String {
823        if self.entries.is_empty() {
824            return String::new();
825        }
826
827        let expanded_keywords = expand_semantic_keywords(context_keywords);
828
829        let mut tfidf = TfIdfSearch::new();
830        tfidf.index(self);
831        let keywords_slice: Vec<&str> = expanded_keywords.iter().map(|s| s.as_str()).collect();
832        let tfidf_results = tfidf.search_multi(&keywords_slice, Some(max_entries * 2));
833
834        let mut tfidf_scores: HashMap<String, f64> = HashMap::new();
835        for (content, score) in &tfidf_results {
836            if let Some(entry) = self.entries.iter().find(|e| &e.content == content) {
837                tfidf_scores.insert(entry.id.clone(), *score);
838            }
839        }
840
841        let mut scored: Vec<(&MemoryEntry, f64)> = self
842            .entries
843            .iter()
844            .map(|entry| {
845                let relevance = compute_relevance(entry, &expanded_keywords);
846                let tfidf = tfidf_scores.get(&entry.id).copied().unwrap_or(0.0);
847                let combined = tfidf * 0.4 + relevance * 0.6;
848                (entry, combined)
849            })
850            .collect();
851
852        scored.sort_by(|a, b| compare_scored_entries(*a, *b, CONTEXT_RELEVANCE_WEIGHT, CONTEXT_IMPORTANCE_WEIGHT));
853
854        let selected: Vec<&MemoryEntry> = scored
855            .iter()
856            .take(max_entries)
857            .map(|(entry, _)| *entry)
858            .collect();
859
860        if selected.is_empty() {
861            return String::new();
862        }
863
864        let mut summary = String::from("【跨会话记忆】\n\n");
865
866        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
867        for entry in selected {
868            by_cat.entry(entry.category).or_default().push(entry);
869        }
870
871        for (cat, entries) in by_cat {
872            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
873            for entry in entries {
874                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
875            }
876            summary.push('\n');
877        }
878
879        summary
880    }
881
882    /// Update reference statistics.
883    pub fn update_retrieval_stats(&mut self, retrieved_ids: &[String]) {
884        for id in retrieved_ids {
885            if let Some(entry) = self.entries.iter_mut().find(|e| &e.id == id) {
886                entry.mark_referenced();
887                log::debug!("Updated reference stats for memory {}", id);
888            }
889        }
890    }
891
892    /// Get IDs of entries for retrieval.
893    pub fn get_retrieval_ids(
894        &self,
895        context_keywords: &[String],
896        max_entries: usize,
897    ) -> Vec<String> {
898        if self.entries.is_empty() {
899            return Vec::new();
900        }
901
902        let expanded_keywords = expand_semantic_keywords(context_keywords);
903
904        let mut scored: Vec<(&MemoryEntry, f64)> = self
905            .entries
906            .iter()
907            .map(|entry| {
908                let relevance = compute_relevance(entry, &expanded_keywords);
909                (entry, relevance)
910            })
911            .collect();
912
913        scored.sort_by(|a, b| compare_scored_entries(*a, *b, 1.0, 1.0));
914
915        scored
916            .iter()
917            .take(max_entries)
918            .map(|(e, _)| e.id.clone())
919            .collect()
920    }
921
922    /// Generate context-aware summary async.
923    /// Note: AI keyword extraction has been removed, uses rule-based extraction now.
924    pub async fn generate_contextual_summary_async(
925        &self,
926        context: &str,
927        max_entries: usize,
928        _fast_provider: Option<&dyn crate::providers::Provider>,
929    ) -> String {
930        if self.entries.is_empty() {
931            return String::new();
932        }
933
934        let context_keywords = extract_context_keywords(context);
935
936        let mut scored: Vec<(&MemoryEntry, f64)> = self
937            .entries
938            .iter()
939            .map(|entry| {
940                let relevance = compute_relevance(entry, &context_keywords);
941                (entry, relevance)
942            })
943            .collect();
944
945        scored.sort_by(|a, b| compare_scored_entries(*a, *b, CONTEXT_RELEVANCE_WEIGHT, CONTEXT_IMPORTANCE_WEIGHT));
946
947        let selected: Vec<&MemoryEntry> = scored
948            .iter()
949            .take(max_entries)
950            .map(|(entry, _)| *entry)
951            .collect();
952
953        if selected.is_empty() {
954            return String::new();
955        }
956
957        let mut summary = String::from("【跨会话记忆】\n\n");
958
959        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
960        for entry in selected {
961            by_cat.entry(entry.category).or_default().push(entry);
962        }
963
964        for (cat, entries) in by_cat {
965            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
966            for entry in entries {
967                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
968            }
969            summary.push('\n');
970        }
971
972        summary
973    }
974
975    /// Format all entries for display.
976    pub fn format_all(&self) -> String {
977        if self.entries.is_empty() {
978            return "[no memories accumulated]".to_string();
979        }
980
981        let mut result = String::from("Accumulated memories:\n\n");
982
983        let mut sorted: Vec<_> = self.entries.iter().collect();
984        sorted.sort_by(|a, b| {
985            b.importance
986                .partial_cmp(&a.importance)
987                .unwrap_or(std::cmp::Ordering::Equal)
988        });
989
990        for entry in sorted {
991            result.push_str(&entry.format_line());
992            result.push('\n');
993        }
994
995        result
996    }
997
998    /// Generate statistics summary.
999    pub fn generate_statistics(&self) -> MemoryStatistics {
1000        let total = self.entries.len();
1001        let manual = self.entries.iter().filter(|e| e.is_manual).count();
1002        let auto = total - manual;
1003
1004        let by_category: HashMap<MemoryCategory, usize> =
1005            self.entries.iter().fold(HashMap::new(), |mut acc, e| {
1006                *acc.entry(e.category).or_default() += 1;
1007                acc
1008            });
1009
1010        let avg_importance = if total > 0 {
1011            self.entries.iter().map(|e| e.importance).sum::<f64>() / total as f64
1012        } else {
1013            0.0
1014        };
1015
1016        let oldest = self.entries.iter().min_by_key(|e| e.created_at).map(|e| e.created_at);
1017        let newest = self.entries.iter().max_by_key(|e| e.created_at).map(|e| e.created_at);
1018
1019        let highly_referenced = self.entries.iter().filter(|e| e.reference_count >= 3).count();
1020
1021        MemoryStatistics {
1022            total,
1023            manual,
1024            auto,
1025            by_category,
1026            avg_importance,
1027            oldest,
1028            newest,
1029            highly_referenced,
1030        }
1031    }
1032
1033    /// Clear all memories.
1034    pub fn clear(&mut self) {
1035        self.entries.clear();
1036        self.invalidate_index();
1037    }
1038
1039    /// Remove a specific memory by ID.
1040    pub fn remove(&mut self, id: &str) -> bool {
1041        let idx = self.entries.iter().position(|e| e.id == id);
1042        if let Some(i) = idx {
1043            self.entries.remove(i);
1044            self.invalidate_index();
1045            true
1046        } else {
1047            false
1048        }
1049    }
1050
1051    /// Apply time decay to memory importance.
1052    pub fn apply_time_decay(&mut self) {
1053        let now = Utc::now();
1054        let decay_start_days = self.config.decay_start_days;
1055        let decay_rate = self.config.decay_rate;
1056        let decay_period_days = 30;
1057
1058        for entry in &mut self.entries {
1059            if entry.is_manual {
1060                continue;
1061            }
1062
1063            let days_since_reference = (now - entry.last_referenced).num_days().max(0);
1064
1065            if days_since_reference > decay_start_days {
1066                let decay_periods = (days_since_reference - decay_start_days) / decay_period_days;
1067                let decay_factor = decay_rate.powi(decay_periods as i32);
1068                entry.importance *= decay_factor;
1069                entry.importance = entry.importance.max(self.min_importance * 0.5);
1070            }
1071        }
1072
1073        self.prune();
1074    }
1075}
1076
1077// ============================================================================
1078// Memory Statistics
1079// ============================================================================
1080
1081/// Statistics about memory collection.
1082#[derive(Debug, Clone)]
1083pub struct MemoryStatistics {
1084    /// Total number of entries.
1085    pub total: usize,
1086    /// Number of manually added entries.
1087    pub manual: usize,
1088    /// Number of automatically detected entries.
1089    pub auto: usize,
1090    /// Count by category.
1091    pub by_category: HashMap<MemoryCategory, usize>,
1092    /// Average importance score.
1093    pub avg_importance: f64,
1094    /// Oldest entry creation time.
1095    pub oldest: Option<DateTime<Utc>>,
1096    /// Newest entry creation time.
1097    pub newest: Option<DateTime<Utc>>,
1098    /// Number of entries with high reference count.
1099    pub highly_referenced: usize,
1100}
1101
1102impl MemoryStatistics {
1103    /// Format statistics for display.
1104    pub fn format_summary(&self) -> String {
1105        let mut output = String::new();
1106
1107        output.push_str("记忆统计:\n");
1108        output.push_str(&format!("  总计: {} 条\n", self.total));
1109        output.push_str(&format!("  ├─ 手动添加: {} 条\n", self.manual));
1110        output.push_str(&format!("  └─ 自动检测: {} 条\n", self.auto));
1111        output.push('\n');
1112
1113        output.push_str("分类统计:\n");
1114        for (cat, count) in &self.by_category {
1115            output.push_str(&format!(
1116                "  {} {}: {} 条\n",
1117                cat.icon(),
1118                cat.display_name(),
1119                count
1120            ));
1121        }
1122        output.push('\n');
1123
1124        output.push_str("质量指标:\n");
1125        output.push_str(&format!("  平均重要性: {:.1} 分\n", self.avg_importance));
1126        output.push_str(&format!("  高频引用: {} 条 (≥3次)\n", self.highly_referenced));
1127
1128        if let Some(oldest) = self.oldest {
1129            let days = (Utc::now() - oldest).num_days();
1130            output.push_str(&format!("  记忆跨度: {} 天\n", days));
1131        }
1132
1133        output
1134    }
1135}