Skip to main content

matrixcode_core/memory/
manager.rs

1//! Memory manager and search index.
2
3use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7use super::config::*;
8use super::entry::{MemoryCategory, MemoryEntry};
9use super::retrieval::{
10    TfIdfSearch, compute_relevance, expand_semantic_keywords, extract_context_keywords,
11    has_contradiction_signal,
12};
13use crate::providers::Message;
14use crate::truncate::truncate_with_suffix;
15
16// ============================================================================
17// Scoring Helper
18// ============================================================================
19
20/// Compare two scored entries for sorting.
21/// Manual entries always come first, then by combined score (descending).
22fn compare_scored_entries(
23    a: (&MemoryEntry, f64),
24    b: (&MemoryEntry, f64),
25    relevance_weight: f64,
26    importance_weight: f64,
27) -> std::cmp::Ordering {
28    // Manual entries always prioritized
29    if a.0.is_manual && !b.0.is_manual {
30        return std::cmp::Ordering::Less;
31    }
32    if !a.0.is_manual && b.0.is_manual {
33        return std::cmp::Ordering::Greater;
34    }
35
36    let score_a =
37        a.1 * relevance_weight + (a.0.importance / MAX_IMPORTANCE_CEILING) * importance_weight;
38    let score_b =
39        b.1 * relevance_weight + (b.0.importance / MAX_IMPORTANCE_CEILING) * importance_weight;
40
41    score_b
42        .partial_cmp(&score_a)
43        .unwrap_or(std::cmp::Ordering::Equal)
44}
45
46// ============================================================================
47// Search Index
48// ============================================================================
49
50/// Search index for fast lookups.
51#[derive(Debug, Clone)]
52pub struct SearchIndex {
53    /// Lowercase content cache for each entry.
54    content_lower: Vec<String>,
55    /// Entries grouped by category.
56    by_category: HashMap<MemoryCategory, Vec<usize>>,
57    /// Entries sorted by importance (indices).
58    by_importance: Vec<usize>,
59    /// Total word frequency for relevance scoring (future use).
60    #[allow(dead_code)]
61    word_freq: HashMap<String, usize>,
62}
63
64impl SearchIndex {
65    /// Build index from entries.
66    pub fn build(entries: &[MemoryEntry]) -> Self {
67        let content_lower: Vec<String> = entries.iter().map(|e| e.content.to_lowercase()).collect();
68
69        let mut by_category: HashMap<MemoryCategory, Vec<usize>> = HashMap::new();
70        for (i, entry) in entries.iter().enumerate() {
71            by_category.entry(entry.category).or_default().push(i);
72        }
73
74        let mut by_importance: Vec<usize> = (0..entries.len()).collect();
75        by_importance.sort_by(|a, b| {
76            entries[*b]
77                .importance
78                .partial_cmp(&entries[*a].importance)
79                .unwrap_or(std::cmp::Ordering::Equal)
80        });
81
82        let mut word_freq: HashMap<String, usize> = HashMap::new();
83        for content in &content_lower {
84            for word in content.split_whitespace() {
85                *word_freq.entry(word.to_string()).or_default() += 1;
86            }
87        }
88
89        Self {
90            content_lower,
91            by_category,
92            by_importance,
93            word_freq,
94        }
95    }
96
97    /// Search by query with optional limit.
98    pub fn search(
99        &self,
100        _entries: &[MemoryEntry],
101        query_lower: &str,
102        limit: Option<usize>,
103    ) -> Vec<usize> {
104        let matches: Vec<usize> = self
105            .by_importance
106            .iter()
107            .filter(|&idx| self.content_lower[*idx].contains(query_lower))
108            .copied()
109            .collect();
110
111        if let Some(max) = limit {
112            matches.into_iter().take(max).collect()
113        } else {
114            matches
115        }
116    }
117
118    /// Multi-keyword search (matches any keyword).
119    pub fn search_multi(&self, keywords_lower: &[String]) -> Vec<usize> {
120        self.by_importance
121            .iter()
122            .filter(|&idx| {
123                let content = &self.content_lower[*idx];
124                keywords_lower.iter().any(|k| content.contains(k))
125            })
126            .copied()
127            .collect()
128    }
129}
130
131// ============================================================================
132// Helper Functions for Defaults
133// ============================================================================
134
135fn default_max_entries() -> usize {
136    100
137}
138
139fn default_min_importance() -> f64 {
140    30.0
141}
142
143fn default_enabled() -> bool {
144    true
145}
146
147// ============================================================================
148// Auto Memory Manager
149// ============================================================================
150
151/// Manager for automatic memory accumulation.
152#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct AutoMemory {
154    /// All memory entries.
155    pub entries: Vec<MemoryEntry>,
156    /// Configuration for memory management.
157    #[serde(default)]
158    pub config: MemoryConfig,
159    /// Legacy fields for backward compatibility (deprecated).
160    #[serde(default = "default_max_entries")]
161    pub max_entries: usize,
162    #[serde(default = "default_min_importance")]
163    pub min_importance: f64,
164    #[serde(default = "default_enabled")]
165    pub enabled: bool,
166    /// Search index (not serialized, rebuilt on load).
167    #[serde(skip)]
168    search_index: Option<SearchIndex>,
169}
170
171impl Default for AutoMemory {
172    fn default() -> Self {
173        let config = MemoryConfig::default();
174        Self {
175            entries: Vec::new(),
176            config: config.clone(),
177            max_entries: config.max_entries,
178            min_importance: config.min_importance,
179            enabled: config.enabled,
180            search_index: None,
181        }
182    }
183}
184
185impl AutoMemory {
186    /// Create a new auto memory manager.
187    pub fn new() -> Self {
188        Self::default()
189    }
190
191    /// Ensure search index is built.
192    fn ensure_index(&mut self) {
193        if self.search_index.is_none() {
194            self.rebuild_index();
195        }
196    }
197
198    /// Rebuild search index.
199    pub fn rebuild_index(&mut self) {
200        self.search_index = Some(SearchIndex::build(&self.entries));
201    }
202
203    /// Invalidate search index (call after modifications).
204    fn invalidate_index(&mut self) {
205        self.search_index = None;
206    }
207
208    /// Create with custom configuration.
209    pub fn with_config(config: MemoryConfig) -> Self {
210        Self {
211            entries: Vec::new(),
212            config: config.clone(),
213            max_entries: config.max_entries,
214            min_importance: config.min_importance,
215            enabled: config.enabled,
216            search_index: None,
217        }
218    }
219
220    /// Create a minimal memory manager.
221    pub fn minimal() -> Self {
222        Self::with_config(MemoryConfig::minimal())
223    }
224
225    /// Create an archival memory manager.
226    pub fn archival() -> Self {
227        Self::with_config(MemoryConfig::archival())
228    }
229
230    /// Add a new memory entry with duplicate check.
231    pub fn add(&mut self, entry: MemoryEntry) {
232        // Check for similar content before adding
233        if self.has_similar(&entry.content) {
234            log::debug!("Skipping duplicate memory: {}", entry.content);
235            return;
236        }
237
238        // Check for conflicting memories (e.g., "使用 X" vs "使用 Y")
239        if let Some(conflict_idx) = self.find_conflict(&entry.content, entry.category) {
240            let old_content = self.entries[conflict_idx].content.clone();
241            log::info!(
242                "Memory conflict: '{}' supersedes '{}'",
243                entry.content,
244                old_content
245            );
246            self.entries.remove(conflict_idx);
247            self.invalidate_index();
248        }
249
250        self.entries.push(entry);
251        self.invalidate_index();
252        self.prune();
253    }
254
255    /// Add memory from detected content.
256    pub fn add_memory(
257        &mut self,
258        category: MemoryCategory,
259        content: String,
260        source_session: Option<String>,
261    ) {
262        let entry = MemoryEntry::new(category, content, source_session, None);
263        self.add(entry);
264    }
265
266    /// Find a conflicting memory entry.
267    fn find_conflict(&self, new_content: &str, category: MemoryCategory) -> Option<usize> {
268        let new_lower = new_content.to_lowercase();
269        let new_words: HashSet<&str> = new_lower.split_whitespace().collect();
270
271        let has_change_signal = has_contradiction_signal("", &new_lower);
272        let overlap_threshold = if has_change_signal {
273            CONFLICT_OVERLAY_THRESHOLD_WITH_SIGNAL
274        } else {
275            CONFLICT_OVERLAY_THRESHOLD
276        };
277
278        for (i, entry) in self.entries.iter().enumerate() {
279            if entry.category != category {
280                continue;
281            }
282
283            let entry_lower = entry.content.to_lowercase();
284            let entry_words: HashSet<&str> = entry_lower.split_whitespace().collect();
285
286            let intersection = new_words.intersection(&entry_words).count();
287            let min_len = new_words.len().min(entry_words.len());
288
289            if min_len == 0 {
290                continue;
291            }
292
293            let topic_overlap = intersection as f64 / min_len as f64;
294            let jaccard = Self::calculate_similarity(&entry_lower, &new_lower);
295
296            if topic_overlap > overlap_threshold
297                && jaccard < SIMILARITY_THRESHOLD
298                && has_contradiction_signal(&entry_lower, &new_lower)
299            {
300                return Some(i);
301            }
302
303            if has_change_signal {
304                let old_key_terms: Vec<&str> = entry_words
305                    .iter()
306                    .filter(|w| w.len() > 2)
307                    .copied()
308                    .collect();
309                let referenced = old_key_terms.iter().any(|term| new_lower.contains(term));
310                if referenced {
311                    return Some(i);
312                }
313            }
314        }
315
316        None
317    }
318
319    /// Check if similar content already exists.
320    pub fn has_similar(&self, content: &str) -> bool {
321        let content_lower = content.to_lowercase();
322
323        if content_lower.len() < MIN_SIMILARITY_LENGTH {
324            return false;
325        }
326
327        for e in &self.entries {
328            let entry_lower = e.content.to_lowercase();
329
330            if entry_lower == content_lower {
331                log::debug!("Exact duplicate found: {}", content);
332                return true;
333            }
334
335            if entry_lower.len() < MIN_SIMILARITY_LENGTH {
336                continue;
337            }
338
339            let similarity = Self::calculate_similarity(&entry_lower, &content_lower);
340            if similarity >= SIMILARITY_THRESHOLD {
341                log::debug!(
342                    "Similar memory found (similarity={:.2}): '{}' vs '{}'",
343                    similarity,
344                    e.content,
345                    content
346                );
347                crate::debug::debug_log().log(
348                    "MEMORY_DUPLICATE",
349                    &format!(
350                        "similarity={:.2}, existing='{}', new='{}'",
351                        similarity,
352                        truncate_with_suffix(&e.content, 50),
353                        truncate_with_suffix(content, 50)
354                    ),
355                );
356                return true;
357            }
358        }
359
360        false
361    }
362
363    /// Calculate word-based similarity between two strings.
364    pub fn calculate_similarity(a: &str, b: &str) -> f64 {
365        let a_words: HashSet<&str> = a.split_whitespace().collect();
366        let b_words: HashSet<&str> = b.split_whitespace().collect();
367
368        if a_words.is_empty() || b_words.is_empty() {
369            return 0.0;
370        }
371
372        let intersection = a_words.intersection(&b_words).count();
373        let union = a_words.union(&b_words).count();
374
375        if union == 0 {
376            0.0
377        } else {
378            intersection as f64 / union as f64
379        }
380    }
381
382    /// Remove low-importance entries when exceeding max_entries.
383    pub fn prune(&mut self) {
384        if self.entries.len() <= self.max_entries {
385            return;
386        }
387
388        let (manual_entries, auto_entries): (Vec<_>, Vec<_>) =
389            self.entries.iter().cloned().partition(|e| e.is_manual);
390
391        let mut sorted_auto = auto_entries;
392        sorted_auto.sort_by(|a, b| {
393            let importance_cmp = b
394                .importance
395                .partial_cmp(&a.importance)
396                .unwrap_or(std::cmp::Ordering::Equal);
397            if importance_cmp == std::cmp::Ordering::Equal {
398                b.last_referenced.cmp(&a.last_referenced)
399            } else {
400                importance_cmp
401            }
402        });
403
404        let kept_auto: Vec<_> = sorted_auto
405            .into_iter()
406            .filter(|e| e.importance >= self.min_importance)
407            .take(self.max_entries.saturating_sub(manual_entries.len()))
408            .collect();
409
410        self.entries = manual_entries.into_iter().chain(kept_auto).collect();
411
412        if self.entries.len() > self.max_entries {
413            self.entries.sort_by(|a, b| {
414                let importance_cmp = b
415                    .importance
416                    .partial_cmp(&a.importance)
417                    .unwrap_or(std::cmp::Ordering::Equal);
418                if importance_cmp == std::cmp::Ordering::Equal {
419                    b.last_referenced.cmp(&a.last_referenced)
420                } else {
421                    importance_cmp
422                }
423            });
424            self.entries.truncate(self.max_entries);
425        }
426
427        self.invalidate_index();
428    }
429
430    /// Smart merge of similar memories.
431    pub fn smart_merge(&mut self) -> usize {
432        if self.entries.len() < 2 {
433            return 0;
434        }
435
436        let mut merged_count = 0;
437        let mut to_remove: Vec<String> = Vec::new();
438        let mut new_entries: Vec<MemoryEntry> = Vec::new();
439        let mut processed: HashSet<String> = HashSet::new();
440
441        for i in 0..self.entries.len() {
442            let entry_i = &self.entries[i];
443            if processed.contains(&entry_i.id) {
444                continue;
445            }
446
447            let mut similar_group: Vec<usize> = vec![i];
448
449            for j in (i + 1)..self.entries.len() {
450                let entry_j = &self.entries[j];
451                if processed.contains(&entry_j.id) {
452                    continue;
453                }
454
455                if entry_i.category != entry_j.category {
456                    continue;
457                }
458
459                let similarity = Self::calculate_similarity(&entry_i.content, &entry_j.content);
460                if similarity >= MERGE_SIMILARITY_THRESHOLD {
461                    similar_group.push(j);
462                }
463            }
464
465            if similar_group.len() >= 2 {
466                let group_entries: Vec<&MemoryEntry> = similar_group
467                    .iter()
468                    .map(|&idx| &self.entries[idx])
469                    .collect();
470
471                let merged = self.merge_group(&group_entries);
472
473                for entry in &group_entries {
474                    to_remove.push(entry.id.clone());
475                    processed.insert(entry.id.clone());
476                }
477
478                new_entries.push(merged);
479                merged_count += similar_group.len() - 1;
480            } else {
481                processed.insert(entry_i.id.clone());
482            }
483        }
484
485        for id in &to_remove {
486            self.remove(id);
487        }
488
489        for entry in new_entries {
490            self.add(entry);
491        }
492
493        if merged_count > 0 {
494            log::debug!("Smart merge: reduced {} entries", merged_count);
495            self.invalidate_index();
496        }
497
498        merged_count
499    }
500
501    /// Merge a group of similar entries into one.
502    fn merge_group(&self, entries: &[&MemoryEntry]) -> MemoryEntry {
503        // SAFETY: entries is guaranteed non-empty by caller (similar_group.len() >= 2)
504        let best = entries
505            .iter()
506            .max_by(|a, b| {
507                let score_a = a.importance + (a.content.len() as f64 / 100.0);
508                let score_b = b.importance + (b.content.len() as f64 / 100.0);
509                score_b
510                    .partial_cmp(&score_a)
511                    .unwrap_or(std::cmp::Ordering::Equal)
512            })
513            .expect("merge_group called with empty entries");
514
515        let all_same = entries
516            .iter()
517            .all(|e| Self::calculate_similarity(&e.content, &best.content) >= 0.95);
518
519        if all_same {
520            let mut merged: MemoryEntry = (*best).clone();
521            merged.importance = entries
522                .iter()
523                .map(|e| e.importance)
524                .fold(best.importance, |max, val| val.max(max));
525            merged.tags.push("merged".to_string());
526            return merged;
527        }
528
529        let mut merged_content = best.content.clone();
530
531        for entry in entries {
532            if entry.id == best.id {
533                continue;
534            }
535            let unique_words = entry
536                .content
537                .split_whitespace()
538                .filter(|word| !best.content.contains(word))
539                .take(3)
540                .collect::<Vec<_>>();
541
542            if !unique_words.is_empty() {
543                let additions = unique_words.join(", ");
544                if additions.len() > 10 {
545                    merged_content =
546                        format!("{} ({})", merged_content.trim_end_matches('.'), additions);
547                }
548            }
549        }
550
551        let mut merged = MemoryEntry::new(best.category, merged_content, None, None);
552        merged.importance = entries
553            .iter()
554            .map(|e| e.importance)
555            .fold(best.importance, |max, val| val.max(max))
556            + 5.0;
557        merged.importance = merged.importance.min(MAX_IMPORTANCE_CEILING);
558
559        merged.tags.push("merged".to_string());
560        for entry in entries {
561            for tag in &entry.tags {
562                if !merged.tags.contains(tag) && !tag.starts_with("merged") {
563                    merged.tags.push(tag.clone());
564                }
565            }
566        }
567
568        merged.is_manual = entries.iter().any(|e| e.is_manual);
569
570        merged
571    }
572
573    /// Get entries by category.
574    pub fn by_category(&self, category: MemoryCategory) -> Vec<&MemoryEntry> {
575        self.entries
576            .iter()
577            .filter(|e| e.category == category)
578            .collect()
579    }
580
581    /// Get entries by category using index.
582    pub fn by_category_fast(&mut self, category: MemoryCategory) -> Vec<&MemoryEntry> {
583        self.ensure_index();
584        if let Some(ref index) = self.search_index {
585            index
586                .by_category
587                .get(&category)
588                .map(|indices| indices.iter().map(|&i| &self.entries[i]).collect())
589                .unwrap_or_default()
590        } else {
591            self.by_category(category)
592        }
593    }
594
595    /// Get top N most important entries.
596    pub fn top_n(&self, n: usize) -> Vec<&MemoryEntry> {
597        let mut sorted: Vec<_> = self.entries.iter().collect();
598        sorted.sort_by(|a, b| {
599            b.importance
600                .partial_cmp(&a.importance)
601                .unwrap_or(std::cmp::Ordering::Equal)
602        });
603        sorted.into_iter().take(n).collect()
604    }
605
606    /// Get top N using index.
607    pub fn top_n_fast(&mut self, n: usize) -> Vec<&MemoryEntry> {
608        self.ensure_index();
609        if let Some(ref index) = self.search_index {
610            index
611                .by_importance
612                .iter()
613                .take(n)
614                .map(|&i| &self.entries[i])
615                .collect()
616        } else {
617            self.top_n(n)
618        }
619    }
620
621    /// Search entries by content or tags.
622    pub fn search(&self, query: &str) -> Vec<&MemoryEntry> {
623        self.search_with_limit(query, None)
624    }
625
626    /// Search entries with result limit.
627    pub fn search_with_limit(&self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
628        let query_lower = query.to_lowercase();
629        let mut results: Vec<_> = self
630            .entries
631            .iter()
632            .filter(|e| {
633                e.content.to_lowercase().contains(&query_lower)
634                    || e.tags
635                        .iter()
636                        .any(|t| t.to_lowercase().contains(&query_lower))
637            })
638            .collect();
639
640        results.sort_by(|a, b| {
641            b.importance
642                .partial_cmp(&a.importance)
643                .unwrap_or(std::cmp::Ordering::Equal)
644        });
645
646        if let Some(max) = limit {
647            results.into_iter().take(max).collect()
648        } else {
649            results
650        }
651    }
652
653    /// Search using index.
654    pub fn search_fast(&mut self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
655        self.ensure_index();
656        let query_lower = query.to_lowercase();
657
658        if let Some(ref index) = self.search_index {
659            let indices = index.search(&self.entries, &query_lower, limit);
660            indices.iter().map(|&i| &self.entries[i]).collect()
661        } else {
662            self.search_with_limit(query, limit)
663        }
664    }
665
666    /// Multi-keyword search.
667    pub fn search_multi(&self, keywords: &[&str]) -> Vec<&MemoryEntry> {
668        if keywords.is_empty() {
669            return Vec::new();
670        }
671
672        let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
673
674        self.entries
675            .iter()
676            .filter(|e| {
677                let content_lower = e.content.to_lowercase();
678                keywords_lower.iter().any(|k| content_lower.contains(k))
679            })
680            .collect()
681    }
682
683    /// Multi-keyword search using index.
684    pub fn search_multi_fast(&mut self, keywords: &[&str]) -> Vec<&MemoryEntry> {
685        if keywords.is_empty() {
686            return Vec::new();
687        }
688
689        self.ensure_index();
690        let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
691
692        if let Some(ref index) = self.search_index {
693            let indices = index.search_multi(&keywords_lower);
694            indices.iter().map(|&i| &self.entries[i]).collect()
695        } else {
696            self.search_multi(keywords)
697        }
698    }
699
700    /// Batch add multiple entries.
701    pub fn add_batch(&mut self, entries: Vec<MemoryEntry>) {
702        for entry in entries {
703            if !self.has_similar(&entry.content) {
704                self.entries.push(entry);
705            }
706        }
707        self.prune();
708    }
709
710    /// Mark entries as referenced.
711    pub fn update_references(&mut self, messages: &[Message]) {
712        let increment = self.config.reference_increment;
713
714        let texts_lower: Vec<String> = messages
715            .iter()
716            .filter_map(Self::extract_message_text_lower)
717            .collect();
718
719        let entry_contents_lower: Vec<String> = self
720            .entries
721            .iter()
722            .map(|e| e.content.to_lowercase())
723            .collect();
724
725        for (i, entry) in self.entries.iter_mut().enumerate() {
726            let entry_lower = &entry_contents_lower[i];
727            if texts_lower.iter().any(|t| t.contains(entry_lower)) {
728                entry.mark_referenced_with_increment(increment);
729            }
730        }
731    }
732
733    /// Extract lowercase text from a message.
734    fn extract_message_text_lower(msg: &Message) -> Option<String> {
735        match &msg.content {
736            crate::providers::MessageContent::Text(t) => Some(t.to_lowercase()),
737            crate::providers::MessageContent::Blocks(blocks) => {
738                let text = blocks
739                    .iter()
740                    .filter_map(|b| {
741                        if let crate::providers::ContentBlock::Text { text } = b {
742                            Some(text.as_str())
743                        } else {
744                            None
745                        }
746                    })
747                    .collect::<Vec<_>>()
748                    .join(" ");
749                Some(text.to_lowercase())
750            }
751        }
752    }
753
754    /// Generate manifest for AI selection (Claude Code style).
755    pub fn generate_manifest(&self, max_entries: usize) -> String {
756        if self.entries.is_empty() {
757            return String::new();
758        }
759
760        let mut sorted_entries: Vec<_> = self.entries.iter().enumerate().collect();
761        sorted_entries.sort_by(|a, b| {
762            b.1.importance
763                .partial_cmp(&a.1.importance)
764                .unwrap_or(std::cmp::Ordering::Equal)
765        });
766        sorted_entries.truncate(max_entries);
767
768        let mut manifest = String::new();
769        for (original_idx, entry) in sorted_entries.iter() {
770            let preview: String = entry.content.chars().take(80).collect();
771            let preview = preview.trim_end_matches('\n');
772            manifest.push_str(&format!(
773                "{}. {} {} {} (重要性: {:.0})\n",
774                original_idx,
775                entry.category.icon(),
776                preview,
777                entry.category.display_name(),
778                entry.importance
779            ));
780        }
781
782        manifest
783    }
784
785    /// Get entries by indices (from AI selection result).
786    pub fn get_entries_by_indices(&self, indices: &[usize]) -> Vec<&MemoryEntry> {
787        indices
788            .iter()
789            .filter_map(|i| self.entries.get(*i))
790            .collect()
791    }
792
793    /// Generate summary for system prompt.
794    pub fn generate_prompt_summary(&self, max_entries: usize) -> String {
795        if self.entries.is_empty() {
796            return String::new();
797        }
798
799        let top_entries = self.top_n(max_entries);
800        if top_entries.is_empty() {
801            return String::new();
802        }
803
804        let mut summary = String::from("【自动记忆摘要】\n\n");
805
806        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
807        for entry in top_entries {
808            by_cat.entry(entry.category).or_default().push(entry);
809        }
810
811        for (cat, entries) in by_cat {
812            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
813            for entry in entries {
814                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
815            }
816            summary.push('\n');
817        }
818
819        summary
820    }
821
822    /// Generate context-aware summary.
823    pub fn generate_contextual_summary(&self, context: &str, max_entries: usize) -> String {
824        let keywords = extract_context_keywords(context);
825        self.generate_contextual_summary_with_keywords(&keywords, max_entries)
826    }
827
828    /// Generate context-aware summary with pre-extracted keywords.
829    pub fn generate_contextual_summary_with_keywords(
830        &self,
831        context_keywords: &[String],
832        max_entries: usize,
833    ) -> String {
834        if self.entries.is_empty() {
835            return String::new();
836        }
837
838        let expanded_keywords = expand_semantic_keywords(context_keywords);
839
840        let mut tfidf = TfIdfSearch::new();
841        tfidf.index(self);
842        let keywords_slice: Vec<&str> = expanded_keywords.iter().map(|s| s.as_str()).collect();
843        let tfidf_results = tfidf.search_multi(&keywords_slice, Some(max_entries * 2));
844
845        let mut tfidf_scores: HashMap<String, f64> = HashMap::new();
846        for (content, score) in &tfidf_results {
847            if let Some(entry) = self.entries.iter().find(|e| &e.content == content) {
848                tfidf_scores.insert(entry.id.clone(), *score);
849            }
850        }
851
852        let mut scored: Vec<(&MemoryEntry, f64)> = self
853            .entries
854            .iter()
855            .map(|entry| {
856                let relevance = compute_relevance(entry, &expanded_keywords);
857                let tfidf = tfidf_scores.get(&entry.id).copied().unwrap_or(0.0);
858                let combined = tfidf * 0.4 + relevance * 0.6;
859                (entry, combined)
860            })
861            .collect();
862
863        scored.sort_by(|a, b| {
864            compare_scored_entries(*a, *b, CONTEXT_RELEVANCE_WEIGHT, CONTEXT_IMPORTANCE_WEIGHT)
865        });
866
867        let selected: Vec<&MemoryEntry> = scored
868            .iter()
869            .take(max_entries)
870            .map(|(entry, _)| *entry)
871            .collect();
872
873        if selected.is_empty() {
874            return String::new();
875        }
876
877        let mut summary = String::from("【跨会话记忆】\n\n");
878
879        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
880        for entry in selected {
881            by_cat.entry(entry.category).or_default().push(entry);
882        }
883
884        for (cat, entries) in by_cat {
885            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
886            for entry in entries {
887                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
888            }
889            summary.push('\n');
890        }
891
892        summary
893    }
894
895    /// Update reference statistics.
896    pub fn update_retrieval_stats(&mut self, retrieved_ids: &[String]) {
897        for id in retrieved_ids {
898            if let Some(entry) = self.entries.iter_mut().find(|e| &e.id == id) {
899                entry.mark_referenced();
900                log::debug!("Updated reference stats for memory {}", id);
901            }
902        }
903    }
904
905    /// Get IDs of entries for retrieval.
906    pub fn get_retrieval_ids(
907        &self,
908        context_keywords: &[String],
909        max_entries: usize,
910    ) -> Vec<String> {
911        if self.entries.is_empty() {
912            return Vec::new();
913        }
914
915        let expanded_keywords = expand_semantic_keywords(context_keywords);
916
917        let mut scored: Vec<(&MemoryEntry, f64)> = self
918            .entries
919            .iter()
920            .map(|entry| {
921                let relevance = compute_relevance(entry, &expanded_keywords);
922                (entry, relevance)
923            })
924            .collect();
925
926        scored.sort_by(|a, b| compare_scored_entries(*a, *b, 1.0, 1.0));
927
928        scored
929            .iter()
930            .take(max_entries)
931            .map(|(e, _)| e.id.clone())
932            .collect()
933    }
934
935    /// Generate context-aware summary async.
936    /// Note: AI keyword extraction has been removed, uses rule-based extraction now.
937    pub async fn generate_contextual_summary_async(
938        &self,
939        context: &str,
940        max_entries: usize,
941        _fast_provider: Option<&dyn crate::providers::Provider>,
942    ) -> String {
943        if self.entries.is_empty() {
944            return String::new();
945        }
946
947        let context_keywords = extract_context_keywords(context);
948
949        let mut scored: Vec<(&MemoryEntry, f64)> = self
950            .entries
951            .iter()
952            .map(|entry| {
953                let relevance = compute_relevance(entry, &context_keywords);
954                (entry, relevance)
955            })
956            .collect();
957
958        scored.sort_by(|a, b| {
959            compare_scored_entries(*a, *b, CONTEXT_RELEVANCE_WEIGHT, CONTEXT_IMPORTANCE_WEIGHT)
960        });
961
962        let selected: Vec<&MemoryEntry> = scored
963            .iter()
964            .take(max_entries)
965            .map(|(entry, _)| *entry)
966            .collect();
967
968        if selected.is_empty() {
969            return String::new();
970        }
971
972        let mut summary = String::from("【跨会话记忆】\n\n");
973
974        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
975        for entry in selected {
976            by_cat.entry(entry.category).or_default().push(entry);
977        }
978
979        for (cat, entries) in by_cat {
980            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
981            for entry in entries {
982                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
983            }
984            summary.push('\n');
985        }
986
987        summary
988    }
989
990    /// Format all entries for display.
991    pub fn format_all(&self) -> String {
992        if self.entries.is_empty() {
993            return "[no memories accumulated]".to_string();
994        }
995
996        let mut result = String::from("Accumulated memories:\n\n");
997
998        let mut sorted: Vec<_> = self.entries.iter().collect();
999        sorted.sort_by(|a, b| {
1000            b.importance
1001                .partial_cmp(&a.importance)
1002                .unwrap_or(std::cmp::Ordering::Equal)
1003        });
1004
1005        for entry in sorted {
1006            result.push_str(&entry.format_line());
1007            result.push('\n');
1008        }
1009
1010        result
1011    }
1012
1013    /// Generate statistics summary.
1014    pub fn generate_statistics(&self) -> MemoryStatistics {
1015        let total = self.entries.len();
1016        let manual = self.entries.iter().filter(|e| e.is_manual).count();
1017        let auto = total - manual;
1018
1019        let by_category: HashMap<MemoryCategory, usize> =
1020            self.entries.iter().fold(HashMap::new(), |mut acc, e| {
1021                *acc.entry(e.category).or_default() += 1;
1022                acc
1023            });
1024
1025        let avg_importance = if total > 0 {
1026            self.entries.iter().map(|e| e.importance).sum::<f64>() / total as f64
1027        } else {
1028            0.0
1029        };
1030
1031        let oldest = self
1032            .entries
1033            .iter()
1034            .min_by_key(|e| e.created_at)
1035            .map(|e| e.created_at);
1036        let newest = self
1037            .entries
1038            .iter()
1039            .max_by_key(|e| e.created_at)
1040            .map(|e| e.created_at);
1041
1042        let highly_referenced = self
1043            .entries
1044            .iter()
1045            .filter(|e| e.reference_count >= 3)
1046            .count();
1047
1048        MemoryStatistics {
1049            total,
1050            manual,
1051            auto,
1052            by_category,
1053            avg_importance,
1054            oldest,
1055            newest,
1056            highly_referenced,
1057        }
1058    }
1059
1060    /// Clear all memories.
1061    pub fn clear(&mut self) {
1062        self.entries.clear();
1063        self.invalidate_index();
1064    }
1065
1066    /// Remove a specific memory by ID.
1067    pub fn remove(&mut self, id: &str) -> bool {
1068        let idx = self.entries.iter().position(|e| e.id == id);
1069        if let Some(i) = idx {
1070            self.entries.remove(i);
1071            self.invalidate_index();
1072            true
1073        } else {
1074            false
1075        }
1076    }
1077
1078    /// Apply time decay to memory importance.
1079    pub fn apply_time_decay(&mut self) {
1080        let now = Utc::now();
1081        let decay_start_days = self.config.decay_start_days;
1082        let decay_rate = self.config.decay_rate;
1083        let decay_period_days = 30;
1084
1085        for entry in &mut self.entries {
1086            if entry.is_manual {
1087                continue;
1088            }
1089
1090            let days_since_reference = (now - entry.last_referenced).num_days().max(0);
1091
1092            if days_since_reference > decay_start_days {
1093                let decay_periods = (days_since_reference - decay_start_days) / decay_period_days;
1094                let decay_factor = decay_rate.powi(decay_periods as i32);
1095                entry.importance *= decay_factor;
1096                entry.importance = entry.importance.max(self.min_importance * 0.5);
1097            }
1098        }
1099
1100        self.prune();
1101    }
1102}
1103
1104// ============================================================================
1105// Memory Statistics
1106// ============================================================================
1107
1108/// Statistics about memory collection.
1109#[derive(Debug, Clone)]
1110pub struct MemoryStatistics {
1111    /// Total number of entries.
1112    pub total: usize,
1113    /// Number of manually added entries.
1114    pub manual: usize,
1115    /// Number of automatically detected entries.
1116    pub auto: usize,
1117    /// Count by category.
1118    pub by_category: HashMap<MemoryCategory, usize>,
1119    /// Average importance score.
1120    pub avg_importance: f64,
1121    /// Oldest entry creation time.
1122    pub oldest: Option<DateTime<Utc>>,
1123    /// Newest entry creation time.
1124    pub newest: Option<DateTime<Utc>>,
1125    /// Number of entries with high reference count.
1126    pub highly_referenced: usize,
1127}
1128
1129impl MemoryStatistics {
1130    /// Format statistics for display.
1131    pub fn format_summary(&self) -> String {
1132        let mut output = String::new();
1133
1134        output.push_str("记忆统计:\n");
1135        output.push_str(&format!("  总计: {} 条\n", self.total));
1136        output.push_str(&format!("  ├─ 手动添加: {} 条\n", self.manual));
1137        output.push_str(&format!("  └─ 自动检测: {} 条\n", self.auto));
1138        output.push('\n');
1139
1140        output.push_str("分类统计:\n");
1141        for (cat, count) in &self.by_category {
1142            output.push_str(&format!(
1143                "  {} {}: {} 条\n",
1144                cat.icon(),
1145                cat.display_name(),
1146                count
1147            ));
1148        }
1149        output.push('\n');
1150
1151        output.push_str("质量指标:\n");
1152        output.push_str(&format!("  平均重要性: {:.1} 分\n", self.avg_importance));
1153        output.push_str(&format!(
1154            "  高频引用: {} 条 (≥3次)\n",
1155            self.highly_referenced
1156        ));
1157
1158        if let Some(oldest) = self.oldest {
1159            let days = (Utc::now() - oldest).num_days();
1160            output.push_str(&format!("  记忆跨度: {} 天\n", days));
1161        }
1162
1163        output
1164    }
1165}