1use chrono::{DateTime, Utc};
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7use super::config::*;
8use super::entry::{MemoryCategory, MemoryEntry};
9use super::retrieval::{
10 TfIdfSearch, compute_relevance, expand_semantic_keywords, extract_context_keywords,
11 has_contradiction_signal,
12};
13use crate::providers::Message;
14use crate::truncate::truncate_with_suffix;
15
16fn compare_scored_entries(
23 a: (&MemoryEntry, f64),
24 b: (&MemoryEntry, f64),
25 relevance_weight: f64,
26 importance_weight: f64,
27) -> std::cmp::Ordering {
28 if a.0.is_manual && !b.0.is_manual {
30 return std::cmp::Ordering::Less;
31 }
32 if !a.0.is_manual && b.0.is_manual {
33 return std::cmp::Ordering::Greater;
34 }
35
36 let score_a =
37 a.1 * relevance_weight + (a.0.importance / MAX_IMPORTANCE_CEILING) * importance_weight;
38 let score_b =
39 b.1 * relevance_weight + (b.0.importance / MAX_IMPORTANCE_CEILING) * importance_weight;
40
41 score_b
42 .partial_cmp(&score_a)
43 .unwrap_or(std::cmp::Ordering::Equal)
44}
45
46#[derive(Debug, Clone)]
52pub struct SearchIndex {
53 content_lower: Vec<String>,
55 by_category: HashMap<MemoryCategory, Vec<usize>>,
57 by_importance: Vec<usize>,
59 #[allow(dead_code)]
61 word_freq: HashMap<String, usize>,
62}
63
64impl SearchIndex {
65 pub fn build(entries: &[MemoryEntry]) -> Self {
67 let content_lower: Vec<String> = entries.iter().map(|e| e.content.to_lowercase()).collect();
68
69 let mut by_category: HashMap<MemoryCategory, Vec<usize>> = HashMap::new();
70 for (i, entry) in entries.iter().enumerate() {
71 by_category.entry(entry.category).or_default().push(i);
72 }
73
74 let mut by_importance: Vec<usize> = (0..entries.len()).collect();
75 by_importance.sort_by(|a, b| {
76 entries[*b]
77 .importance
78 .partial_cmp(&entries[*a].importance)
79 .unwrap_or(std::cmp::Ordering::Equal)
80 });
81
82 let mut word_freq: HashMap<String, usize> = HashMap::new();
83 for content in &content_lower {
84 for word in content.split_whitespace() {
85 *word_freq.entry(word.to_string()).or_default() += 1;
86 }
87 }
88
89 Self {
90 content_lower,
91 by_category,
92 by_importance,
93 word_freq,
94 }
95 }
96
97 pub fn search(
99 &self,
100 _entries: &[MemoryEntry],
101 query_lower: &str,
102 limit: Option<usize>,
103 ) -> Vec<usize> {
104 let matches: Vec<usize> = self
105 .by_importance
106 .iter()
107 .filter(|&idx| self.content_lower[*idx].contains(query_lower))
108 .copied()
109 .collect();
110
111 if let Some(max) = limit {
112 matches.into_iter().take(max).collect()
113 } else {
114 matches
115 }
116 }
117
118 pub fn search_multi(&self, keywords_lower: &[String]) -> Vec<usize> {
120 self.by_importance
121 .iter()
122 .filter(|&idx| {
123 let content = &self.content_lower[*idx];
124 keywords_lower.iter().any(|k| content.contains(k))
125 })
126 .copied()
127 .collect()
128 }
129}
130
131fn default_max_entries() -> usize {
136 100
137}
138
139fn default_min_importance() -> f64 {
140 30.0
141}
142
143fn default_enabled() -> bool {
144 true
145}
146
147#[derive(Debug, Clone, Serialize, Deserialize)]
153pub struct AutoMemory {
154 pub entries: Vec<MemoryEntry>,
156 #[serde(default)]
158 pub config: MemoryConfig,
159 #[serde(default = "default_max_entries")]
161 pub max_entries: usize,
162 #[serde(default = "default_min_importance")]
163 pub min_importance: f64,
164 #[serde(default = "default_enabled")]
165 pub enabled: bool,
166 #[serde(skip)]
168 search_index: Option<SearchIndex>,
169}
170
171impl Default for AutoMemory {
172 fn default() -> Self {
173 let config = MemoryConfig::default();
174 Self {
175 entries: Vec::new(),
176 config: config.clone(),
177 max_entries: config.max_entries,
178 min_importance: config.min_importance,
179 enabled: config.enabled,
180 search_index: None,
181 }
182 }
183}
184
185impl AutoMemory {
186 pub fn new() -> Self {
188 Self::default()
189 }
190
191 fn ensure_index(&mut self) {
193 if self.search_index.is_none() {
194 self.rebuild_index();
195 }
196 }
197
198 pub fn rebuild_index(&mut self) {
200 self.search_index = Some(SearchIndex::build(&self.entries));
201 }
202
203 fn invalidate_index(&mut self) {
205 self.search_index = None;
206 }
207
208 pub fn with_config(config: MemoryConfig) -> Self {
210 Self {
211 entries: Vec::new(),
212 config: config.clone(),
213 max_entries: config.max_entries,
214 min_importance: config.min_importance,
215 enabled: config.enabled,
216 search_index: None,
217 }
218 }
219
220 pub fn minimal() -> Self {
222 Self::with_config(MemoryConfig::minimal())
223 }
224
225 pub fn archival() -> Self {
227 Self::with_config(MemoryConfig::archival())
228 }
229
230 pub fn add(&mut self, entry: MemoryEntry) {
232 if self.has_similar(&entry.content) {
234 log::debug!("Skipping duplicate memory: {}", entry.content);
235 return;
236 }
237
238 if let Some(conflict_idx) = self.find_conflict(&entry.content, entry.category) {
240 let old_content = self.entries[conflict_idx].content.clone();
241 log::info!(
242 "Memory conflict: '{}' supersedes '{}'",
243 entry.content,
244 old_content
245 );
246 self.entries.remove(conflict_idx);
247 self.invalidate_index();
248 }
249
250 self.entries.push(entry);
251 self.invalidate_index();
252 self.prune();
253 }
254
255 pub fn add_memory(
257 &mut self,
258 category: MemoryCategory,
259 content: String,
260 source_session: Option<String>,
261 ) {
262 let entry = MemoryEntry::new(category, content, source_session, None);
263 self.add(entry);
264 }
265
266 fn find_conflict(&self, new_content: &str, category: MemoryCategory) -> Option<usize> {
268 let new_lower = new_content.to_lowercase();
269 let new_words: HashSet<&str> = new_lower.split_whitespace().collect();
270
271 let has_change_signal = has_contradiction_signal("", &new_lower);
272 let overlap_threshold = if has_change_signal {
273 CONFLICT_OVERLAY_THRESHOLD_WITH_SIGNAL
274 } else {
275 CONFLICT_OVERLAY_THRESHOLD
276 };
277
278 for (i, entry) in self.entries.iter().enumerate() {
279 if entry.category != category {
280 continue;
281 }
282
283 let entry_lower = entry.content.to_lowercase();
284 let entry_words: HashSet<&str> = entry_lower.split_whitespace().collect();
285
286 let intersection = new_words.intersection(&entry_words).count();
287 let min_len = new_words.len().min(entry_words.len());
288
289 if min_len == 0 {
290 continue;
291 }
292
293 let topic_overlap = intersection as f64 / min_len as f64;
294 let jaccard = Self::calculate_similarity(&entry_lower, &new_lower);
295
296 if topic_overlap > overlap_threshold
297 && jaccard < SIMILARITY_THRESHOLD
298 && has_contradiction_signal(&entry_lower, &new_lower)
299 {
300 return Some(i);
301 }
302
303 if has_change_signal {
304 let old_key_terms: Vec<&str> = entry_words
305 .iter()
306 .filter(|w| w.len() > 2)
307 .copied()
308 .collect();
309 let referenced = old_key_terms.iter().any(|term| new_lower.contains(term));
310 if referenced {
311 return Some(i);
312 }
313 }
314 }
315
316 None
317 }
318
319 pub fn has_similar(&self, content: &str) -> bool {
321 let content_lower = content.to_lowercase();
322
323 if content_lower.len() < MIN_SIMILARITY_LENGTH {
324 return false;
325 }
326
327 for e in &self.entries {
328 let entry_lower = e.content.to_lowercase();
329
330 if entry_lower == content_lower {
331 log::debug!("Exact duplicate found: {}", content);
332 return true;
333 }
334
335 if entry_lower.len() < MIN_SIMILARITY_LENGTH {
336 continue;
337 }
338
339 let similarity = Self::calculate_similarity_enhanced(&entry_lower, &content_lower);
341 if similarity >= SIMILARITY_THRESHOLD {
342 log::debug!(
343 "Similar memory found (similarity={:.2}): '{}' vs '{}'",
344 similarity,
345 e.content,
346 content
347 );
348 crate::debug::debug_log().log(
349 "MEMORY_DUPLICATE",
350 &format!(
351 "similarity={:.2}, existing='{}', new='{}'",
352 similarity,
353 truncate_with_suffix(&e.content, 50),
354 truncate_with_suffix(content, 50)
355 ),
356 );
357 return true;
358 }
359 }
360
361 false
362 }
363
364 pub fn calculate_similarity(a: &str, b: &str) -> f64 {
366 let a_words: HashSet<&str> = a.split_whitespace().collect();
367 let b_words: HashSet<&str> = b.split_whitespace().collect();
368
369 if a_words.is_empty() || b_words.is_empty() {
370 return 0.0;
371 }
372
373 let intersection = a_words.intersection(&b_words).count();
374 let union = a_words.union(&b_words).count();
375
376 if union == 0 {
377 0.0
378 } else {
379 intersection as f64 / union as f64
380 }
381 }
382
383 pub fn calculate_similarity_enhanced(a: &str, b: &str) -> f64 {
386 let jaccard = Self::calculate_similarity(a, b);
388
389 let semantic = Self::calculate_semantic_similarity(a, b);
391
392 jaccard.max(semantic)
394 }
395
396 fn calculate_semantic_similarity(a: &str, b: &str) -> f64 {
399 let patterns = [
401 ("项目技术栈:", "技术栈"), ("入口文件:", "入口"), ("模块位于", "位于"), ("位于 packages/", "packages/"), ("核心功能:", "功能"), ("配置文件:", "配置"), ];
408
409 for (pattern, _) in patterns {
410 if a.contains(pattern) && b.contains(pattern) {
411 return 0.85;
413 }
414 }
415
416 let category_patterns = Self::extract_category_patterns(a);
418 let b_patterns = Self::extract_category_patterns(b);
419
420 if !category_patterns.is_empty() && !b_patterns.is_empty() {
421 let matches = category_patterns.intersection(&b_patterns).count();
423 if matches > 0 {
424 return 0.7 + (matches as f64 * 0.05).min(0.15); }
426 }
427
428 0.0
429 }
430
431 fn extract_category_patterns(content: &str) -> HashSet<&'static str> {
433 let mut patterns = HashSet::new();
434
435 if content.contains("决定") || content.contains("选择") || content.contains("采用") {
437 patterns.insert("decision");
438 }
439
440 if content.contains("偏好") || content.contains("习惯") || content.contains("喜欢") {
442 patterns.insert("preference");
443 }
444
445 if content.contains("解决") || content.contains("修复") || content.contains("通过") {
447 patterns.insert("solution");
448 }
449
450 if content.contains("位于") || content.contains("入口") || content.contains("模块") {
452 patterns.insert("structure");
453 }
454
455 if content.contains("技术栈") || content.contains("框架") || content.contains("库") {
457 patterns.insert("technical");
458 }
459
460 patterns
461 }
462
463 pub fn prune(&mut self) {
465 if self.entries.len() <= self.max_entries {
466 return;
467 }
468
469 let (manual_entries, auto_entries): (Vec<_>, Vec<_>) =
470 self.entries.iter().cloned().partition(|e| e.is_manual);
471
472 let mut sorted_auto = auto_entries;
473 sorted_auto.sort_by(|a, b| {
474 let importance_cmp = b
475 .importance
476 .partial_cmp(&a.importance)
477 .unwrap_or(std::cmp::Ordering::Equal);
478 if importance_cmp == std::cmp::Ordering::Equal {
479 b.last_referenced.cmp(&a.last_referenced)
480 } else {
481 importance_cmp
482 }
483 });
484
485 let kept_auto: Vec<_> = sorted_auto
486 .into_iter()
487 .filter(|e| e.importance >= self.min_importance)
488 .take(self.max_entries.saturating_sub(manual_entries.len()))
489 .collect();
490
491 self.entries = manual_entries.into_iter().chain(kept_auto).collect();
492
493 if self.entries.len() > self.max_entries {
494 self.entries.sort_by(|a, b| {
495 let importance_cmp = b
496 .importance
497 .partial_cmp(&a.importance)
498 .unwrap_or(std::cmp::Ordering::Equal);
499 if importance_cmp == std::cmp::Ordering::Equal {
500 b.last_referenced.cmp(&a.last_referenced)
501 } else {
502 importance_cmp
503 }
504 });
505 self.entries.truncate(self.max_entries);
506 }
507
508 self.invalidate_index();
509 }
510
511 pub fn smart_merge(&mut self) -> usize {
513 if self.entries.len() < 2 {
514 return 0;
515 }
516
517 let mut merged_count = 0;
518 let mut to_remove: Vec<String> = Vec::new();
519 let mut new_entries: Vec<MemoryEntry> = Vec::new();
520 let mut processed: HashSet<String> = HashSet::new();
521
522 for i in 0..self.entries.len() {
523 let entry_i = &self.entries[i];
524 if processed.contains(&entry_i.id) {
525 continue;
526 }
527
528 let mut similar_group: Vec<usize> = vec![i];
529
530 for j in (i + 1)..self.entries.len() {
531 let entry_j = &self.entries[j];
532 if processed.contains(&entry_j.id) {
533 continue;
534 }
535
536 if entry_i.category != entry_j.category {
537 continue;
538 }
539
540 let similarity = Self::calculate_similarity(&entry_i.content, &entry_j.content);
541 if similarity >= MERGE_SIMILARITY_THRESHOLD {
542 similar_group.push(j);
543 }
544 }
545
546 if similar_group.len() >= 2 {
547 let group_entries: Vec<&MemoryEntry> = similar_group
548 .iter()
549 .map(|&idx| &self.entries[idx])
550 .collect();
551
552 let merged = self.merge_group(&group_entries);
553
554 for entry in &group_entries {
555 to_remove.push(entry.id.clone());
556 processed.insert(entry.id.clone());
557 }
558
559 new_entries.push(merged);
560 merged_count += similar_group.len() - 1;
561 } else {
562 processed.insert(entry_i.id.clone());
563 }
564 }
565
566 for id in &to_remove {
567 self.remove(id);
568 }
569
570 for entry in new_entries {
571 self.add(entry);
572 }
573
574 if merged_count > 0 {
575 log::debug!("Smart merge: reduced {} entries", merged_count);
576 self.invalidate_index();
577 }
578
579 merged_count
580 }
581
582 fn merge_group(&self, entries: &[&MemoryEntry]) -> MemoryEntry {
584 let best = entries
586 .iter()
587 .max_by(|a, b| {
588 let score_a = a.importance + (a.content.len() as f64 / 100.0);
589 let score_b = b.importance + (b.content.len() as f64 / 100.0);
590 score_b
591 .partial_cmp(&score_a)
592 .unwrap_or(std::cmp::Ordering::Equal)
593 })
594 .expect("merge_group called with empty entries");
595
596 let all_same = entries
597 .iter()
598 .all(|e| Self::calculate_similarity(&e.content, &best.content) >= 0.95);
599
600 if all_same {
601 let mut merged: MemoryEntry = (*best).clone();
602 merged.importance = entries
603 .iter()
604 .map(|e| e.importance)
605 .fold(best.importance, |max, val| val.max(max));
606 merged.tags.push("merged".to_string());
607 return merged;
608 }
609
610 let mut merged_content = best.content.clone();
611
612 for entry in entries {
613 if entry.id == best.id {
614 continue;
615 }
616 let unique_words = entry
617 .content
618 .split_whitespace()
619 .filter(|word| !best.content.contains(word))
620 .take(3)
621 .collect::<Vec<_>>();
622
623 if !unique_words.is_empty() {
624 let additions = unique_words.join(", ");
625 if additions.len() > 10 {
626 merged_content =
627 format!("{} ({})", merged_content.trim_end_matches('.'), additions);
628 }
629 }
630 }
631
632 let mut merged = MemoryEntry::new(best.category, merged_content, None, None);
633 merged.importance = entries
634 .iter()
635 .map(|e| e.importance)
636 .fold(best.importance, |max, val| val.max(max))
637 + 5.0;
638 merged.importance = merged.importance.min(MAX_IMPORTANCE_CEILING);
639
640 merged.tags.push("merged".to_string());
641 for entry in entries {
642 for tag in &entry.tags {
643 if !merged.tags.contains(tag) && !tag.starts_with("merged") {
644 merged.tags.push(tag.clone());
645 }
646 }
647 }
648
649 merged.is_manual = entries.iter().any(|e| e.is_manual);
650
651 merged
652 }
653
654 pub fn by_category(&self, category: MemoryCategory) -> Vec<&MemoryEntry> {
656 self.entries
657 .iter()
658 .filter(|e| e.category == category)
659 .collect()
660 }
661
662 pub fn by_category_fast(&mut self, category: MemoryCategory) -> Vec<&MemoryEntry> {
664 self.ensure_index();
665 if let Some(ref index) = self.search_index {
666 index
667 .by_category
668 .get(&category)
669 .map(|indices| indices.iter().map(|&i| &self.entries[i]).collect())
670 .unwrap_or_default()
671 } else {
672 self.by_category(category)
673 }
674 }
675
676 pub fn top_n(&self, n: usize) -> Vec<&MemoryEntry> {
678 let mut sorted: Vec<_> = self.entries.iter().collect();
679 sorted.sort_by(|a, b| {
680 b.importance
681 .partial_cmp(&a.importance)
682 .unwrap_or(std::cmp::Ordering::Equal)
683 });
684 sorted.into_iter().take(n).collect()
685 }
686
687 pub fn top_n_fast(&mut self, n: usize) -> Vec<&MemoryEntry> {
689 self.ensure_index();
690 if let Some(ref index) = self.search_index {
691 index
692 .by_importance
693 .iter()
694 .take(n)
695 .map(|&i| &self.entries[i])
696 .collect()
697 } else {
698 self.top_n(n)
699 }
700 }
701
702 pub fn search(&self, query: &str) -> Vec<&MemoryEntry> {
704 self.search_with_limit(query, None)
705 }
706
707 pub fn search_with_limit(&self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
709 let query_lower = query.to_lowercase();
710 let mut results: Vec<_> = self
711 .entries
712 .iter()
713 .filter(|e| {
714 e.content.to_lowercase().contains(&query_lower)
715 || e.tags
716 .iter()
717 .any(|t| t.to_lowercase().contains(&query_lower))
718 })
719 .collect();
720
721 results.sort_by(|a, b| {
722 b.importance
723 .partial_cmp(&a.importance)
724 .unwrap_or(std::cmp::Ordering::Equal)
725 });
726
727 if let Some(max) = limit {
728 results.into_iter().take(max).collect()
729 } else {
730 results
731 }
732 }
733
734 pub fn search_fast(&mut self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
736 self.ensure_index();
737 let query_lower = query.to_lowercase();
738
739 if let Some(ref index) = self.search_index {
740 let indices = index.search(&self.entries, &query_lower, limit);
741 indices.iter().map(|&i| &self.entries[i]).collect()
742 } else {
743 self.search_with_limit(query, limit)
744 }
745 }
746
747 pub fn search_multi(&self, keywords: &[&str]) -> Vec<&MemoryEntry> {
749 if keywords.is_empty() {
750 return Vec::new();
751 }
752
753 let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
754
755 self.entries
756 .iter()
757 .filter(|e| {
758 let content_lower = e.content.to_lowercase();
759 keywords_lower.iter().any(|k| content_lower.contains(k))
760 })
761 .collect()
762 }
763
764 pub fn search_multi_fast(&mut self, keywords: &[&str]) -> Vec<&MemoryEntry> {
766 if keywords.is_empty() {
767 return Vec::new();
768 }
769
770 self.ensure_index();
771 let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
772
773 if let Some(ref index) = self.search_index {
774 let indices = index.search_multi(&keywords_lower);
775 indices.iter().map(|&i| &self.entries[i]).collect()
776 } else {
777 self.search_multi(keywords)
778 }
779 }
780
781 pub fn add_batch(&mut self, entries: Vec<MemoryEntry>) {
783 for entry in entries {
784 if !self.has_similar(&entry.content) {
785 self.entries.push(entry);
786 }
787 }
788 self.prune();
789 }
790
791 pub fn update_references(&mut self, messages: &[Message]) {
793 let increment = self.config.reference_increment;
794
795 let texts_lower: Vec<String> = messages
796 .iter()
797 .filter_map(Self::extract_message_text_lower)
798 .collect();
799
800 let entry_contents_lower: Vec<String> = self
801 .entries
802 .iter()
803 .map(|e| e.content.to_lowercase())
804 .collect();
805
806 for (i, entry) in self.entries.iter_mut().enumerate() {
807 let entry_lower = &entry_contents_lower[i];
808 if texts_lower.iter().any(|t| t.contains(entry_lower)) {
809 entry.mark_referenced_with_increment(increment);
810 }
811 }
812 }
813
814 fn extract_message_text_lower(msg: &Message) -> Option<String> {
816 match &msg.content {
817 crate::providers::MessageContent::Text(t) => Some(t.to_lowercase()),
818 crate::providers::MessageContent::Blocks(blocks) => {
819 let text = blocks
820 .iter()
821 .filter_map(|b| {
822 if let crate::providers::ContentBlock::Text { text } = b {
823 Some(text.as_str())
824 } else {
825 None
826 }
827 })
828 .collect::<Vec<_>>()
829 .join(" ");
830 Some(text.to_lowercase())
831 }
832 }
833 }
834
835 pub fn generate_manifest(&self, max_entries: usize) -> String {
837 if self.entries.is_empty() {
838 return String::new();
839 }
840
841 let mut sorted_entries: Vec<_> = self.entries.iter().enumerate().collect();
842 sorted_entries.sort_by(|a, b| {
843 b.1.importance
844 .partial_cmp(&a.1.importance)
845 .unwrap_or(std::cmp::Ordering::Equal)
846 });
847 sorted_entries.truncate(max_entries);
848
849 let mut manifest = String::new();
850 for (original_idx, entry) in sorted_entries.iter() {
851 let preview: String = entry.content.chars().take(80).collect();
852 let preview = preview.trim_end_matches('\n');
853 manifest.push_str(&format!(
854 "{}. {} {} {} (重要性: {:.0})\n",
855 original_idx,
856 entry.category.icon(),
857 preview,
858 entry.category.display_name(),
859 entry.importance
860 ));
861 }
862
863 manifest
864 }
865
866 pub fn get_entries_by_indices(&self, indices: &[usize]) -> Vec<&MemoryEntry> {
868 indices
869 .iter()
870 .filter_map(|i| self.entries.get(*i))
871 .collect()
872 }
873
874 pub fn generate_prompt_summary(&self, max_entries: usize) -> String {
876 if self.entries.is_empty() {
877 return String::new();
878 }
879
880 let top_entries = self.top_n(max_entries);
881 if top_entries.is_empty() {
882 return String::new();
883 }
884
885 let mut summary = String::from("【自动记忆摘要】\n\n");
886
887 let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
888 for entry in top_entries {
889 by_cat.entry(entry.category).or_default().push(entry);
890 }
891
892 for (cat, entries) in by_cat {
893 summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
894 for entry in entries {
895 summary.push_str(&format!(" {}\n", entry.format_for_prompt()));
896 }
897 summary.push('\n');
898 }
899
900 summary
901 }
902
903 pub fn generate_contextual_summary(&self, context: &str, max_entries: usize) -> String {
905 let keywords = extract_context_keywords(context);
906 self.generate_contextual_summary_with_keywords(&keywords, max_entries)
907 }
908
909 pub fn generate_contextual_summary_with_keywords(
911 &self,
912 context_keywords: &[String],
913 max_entries: usize,
914 ) -> String {
915 if self.entries.is_empty() {
916 return String::new();
917 }
918
919 let expanded_keywords = expand_semantic_keywords(context_keywords);
920
921 let mut tfidf = TfIdfSearch::new();
922 tfidf.index(self);
923 let keywords_slice: Vec<&str> = expanded_keywords.iter().map(|s| s.as_str()).collect();
924 let tfidf_results = tfidf.search_multi(&keywords_slice, Some(max_entries * 2));
925
926 let mut tfidf_scores: HashMap<String, f64> = HashMap::new();
927 for (content, score) in &tfidf_results {
928 if let Some(entry) = self.entries.iter().find(|e| &e.content == content) {
929 tfidf_scores.insert(entry.id.clone(), *score);
930 }
931 }
932
933 let mut scored: Vec<(&MemoryEntry, f64)> = self
934 .entries
935 .iter()
936 .map(|entry| {
937 let relevance = compute_relevance(entry, &expanded_keywords);
938 let tfidf = tfidf_scores.get(&entry.id).copied().unwrap_or(0.0);
939 let combined = tfidf * 0.4 + relevance * 0.6;
940 (entry, combined)
941 })
942 .collect();
943
944 scored.sort_by(|a, b| {
945 compare_scored_entries(*a, *b, CONTEXT_RELEVANCE_WEIGHT, CONTEXT_IMPORTANCE_WEIGHT)
946 });
947
948 let selected: Vec<&MemoryEntry> = scored
949 .iter()
950 .take(max_entries)
951 .map(|(entry, _)| *entry)
952 .collect();
953
954 if selected.is_empty() {
955 return String::new();
956 }
957
958 let mut summary = String::from("【跨会话记忆】\n\n");
959
960 let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
961 for entry in selected {
962 by_cat.entry(entry.category).or_default().push(entry);
963 }
964
965 for (cat, entries) in by_cat {
966 summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
967 for entry in entries {
968 summary.push_str(&format!(" {}\n", entry.format_for_prompt()));
969 }
970 summary.push('\n');
971 }
972
973 summary
974 }
975
976 pub fn update_retrieval_stats(&mut self, retrieved_ids: &[String]) {
978 for id in retrieved_ids {
979 if let Some(entry) = self.entries.iter_mut().find(|e| &e.id == id) {
980 entry.mark_referenced();
981 log::debug!("Updated reference stats for memory {}", id);
982 }
983 }
984 }
985
986 pub fn get_retrieval_ids(
988 &self,
989 context_keywords: &[String],
990 max_entries: usize,
991 ) -> Vec<String> {
992 if self.entries.is_empty() {
993 return Vec::new();
994 }
995
996 let expanded_keywords = expand_semantic_keywords(context_keywords);
997
998 let mut scored: Vec<(&MemoryEntry, f64)> = self
999 .entries
1000 .iter()
1001 .map(|entry| {
1002 let relevance = compute_relevance(entry, &expanded_keywords);
1003 (entry, relevance)
1004 })
1005 .collect();
1006
1007 scored.sort_by(|a, b| compare_scored_entries(*a, *b, 1.0, 1.0));
1008
1009 scored
1010 .iter()
1011 .take(max_entries)
1012 .map(|(e, _)| e.id.clone())
1013 .collect()
1014 }
1015
1016 pub async fn generate_contextual_summary_async(
1019 &self,
1020 context: &str,
1021 max_entries: usize,
1022 _fast_provider: Option<&dyn crate::providers::Provider>,
1023 ) -> String {
1024 if self.entries.is_empty() {
1025 return String::new();
1026 }
1027
1028 let context_keywords = extract_context_keywords(context);
1029
1030 let mut scored: Vec<(&MemoryEntry, f64)> = self
1031 .entries
1032 .iter()
1033 .map(|entry| {
1034 let relevance = compute_relevance(entry, &context_keywords);
1035 (entry, relevance)
1036 })
1037 .collect();
1038
1039 scored.sort_by(|a, b| {
1040 compare_scored_entries(*a, *b, CONTEXT_RELEVANCE_WEIGHT, CONTEXT_IMPORTANCE_WEIGHT)
1041 });
1042
1043 let selected: Vec<&MemoryEntry> = scored
1044 .iter()
1045 .take(max_entries)
1046 .map(|(entry, _)| *entry)
1047 .collect();
1048
1049 if selected.is_empty() {
1050 return String::new();
1051 }
1052
1053 let mut summary = String::from("【跨会话记忆】\n\n");
1054
1055 let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
1056 for entry in selected {
1057 by_cat.entry(entry.category).or_default().push(entry);
1058 }
1059
1060 for (cat, entries) in by_cat {
1061 summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
1062 for entry in entries {
1063 summary.push_str(&format!(" {}\n", entry.format_for_prompt()));
1064 }
1065 summary.push('\n');
1066 }
1067
1068 summary
1069 }
1070
1071 pub fn format_all(&self) -> String {
1073 if self.entries.is_empty() {
1074 return "[no memories accumulated]".to_string();
1075 }
1076
1077 let mut result = String::from("Accumulated memories:\n\n");
1078
1079 let mut sorted: Vec<_> = self.entries.iter().collect();
1080 sorted.sort_by(|a, b| {
1081 b.importance
1082 .partial_cmp(&a.importance)
1083 .unwrap_or(std::cmp::Ordering::Equal)
1084 });
1085
1086 for entry in sorted {
1087 result.push_str(&entry.format_line());
1088 result.push('\n');
1089 }
1090
1091 result
1092 }
1093
1094 pub fn generate_statistics(&self) -> MemoryStatistics {
1096 let total = self.entries.len();
1097 let manual = self.entries.iter().filter(|e| e.is_manual).count();
1098 let auto = total - manual;
1099
1100 let by_category: HashMap<MemoryCategory, usize> =
1101 self.entries.iter().fold(HashMap::new(), |mut acc, e| {
1102 *acc.entry(e.category).or_default() += 1;
1103 acc
1104 });
1105
1106 let avg_importance = if total > 0 {
1107 self.entries.iter().map(|e| e.importance).sum::<f64>() / total as f64
1108 } else {
1109 0.0
1110 };
1111
1112 let oldest = self
1113 .entries
1114 .iter()
1115 .min_by_key(|e| e.created_at)
1116 .map(|e| e.created_at);
1117 let newest = self
1118 .entries
1119 .iter()
1120 .max_by_key(|e| e.created_at)
1121 .map(|e| e.created_at);
1122
1123 let highly_referenced = self
1124 .entries
1125 .iter()
1126 .filter(|e| e.reference_count >= 3)
1127 .count();
1128
1129 MemoryStatistics {
1130 total,
1131 manual,
1132 auto,
1133 by_category,
1134 avg_importance,
1135 oldest,
1136 newest,
1137 highly_referenced,
1138 }
1139 }
1140
1141 pub fn clear(&mut self) {
1143 self.entries.clear();
1144 self.invalidate_index();
1145 }
1146
1147 pub fn remove(&mut self, id: &str) -> bool {
1149 let idx = self.entries.iter().position(|e| e.id == id);
1150 if let Some(i) = idx {
1151 self.entries.remove(i);
1152 self.invalidate_index();
1153 true
1154 } else {
1155 false
1156 }
1157 }
1158
1159 pub fn apply_time_decay(&mut self) {
1161 let now = Utc::now();
1162 let decay_start_days = self.config.decay_start_days;
1163 let decay_rate = self.config.decay_rate;
1164 let decay_period_days = 30;
1165
1166 for entry in &mut self.entries {
1167 if entry.is_manual {
1168 continue;
1169 }
1170
1171 let days_since_reference = (now - entry.last_referenced).num_days().max(0);
1172
1173 if days_since_reference > decay_start_days {
1174 let decay_periods = (days_since_reference - decay_start_days) / decay_period_days;
1175 let decay_factor = decay_rate.powi(decay_periods as i32);
1176 entry.importance *= decay_factor;
1177 entry.importance = entry.importance.max(self.min_importance * 0.5);
1178 }
1179 }
1180
1181 self.prune();
1182 }
1183}
1184
1185#[derive(Debug, Clone)]
1191pub struct MemoryStatistics {
1192 pub total: usize,
1194 pub manual: usize,
1196 pub auto: usize,
1198 pub by_category: HashMap<MemoryCategory, usize>,
1200 pub avg_importance: f64,
1202 pub oldest: Option<DateTime<Utc>>,
1204 pub newest: Option<DateTime<Utc>>,
1206 pub highly_referenced: usize,
1208}
1209
1210impl MemoryStatistics {
1211 pub fn format_summary(&self) -> String {
1213 let mut output = String::new();
1214
1215 output.push_str("记忆统计:\n");
1216 output.push_str(&format!(" 总计: {} 条\n", self.total));
1217 output.push_str(&format!(" ├─ 手动添加: {} 条\n", self.manual));
1218 output.push_str(&format!(" └─ 自动检测: {} 条\n", self.auto));
1219 output.push('\n');
1220
1221 output.push_str("分类统计:\n");
1222 for (cat, count) in &self.by_category {
1223 output.push_str(&format!(
1224 " {} {}: {} 条\n",
1225 cat.icon(),
1226 cat.display_name(),
1227 count
1228 ));
1229 }
1230 output.push('\n');
1231
1232 output.push_str("质量指标:\n");
1233 output.push_str(&format!(" 平均重要性: {:.1} 分\n", self.avg_importance));
1234 output.push_str(&format!(
1235 " 高频引用: {} 条 (≥3次)\n",
1236 self.highly_referenced
1237 ));
1238
1239 if let Some(oldest) = self.oldest {
1240 let days = (Utc::now() - oldest).num_days();
1241 output.push_str(&format!(" 记忆跨度: {} 天\n", days));
1242 }
1243
1244 output
1245 }
1246}