matrixcode_core/
memory.rs

1//! Auto Memory system for MatrixCode.
2//!
3//! This module implements automatic memory accumulation inspired by Claude Code.
4//! It captures user preferences, project decisions, key findings, and solutions
5//! across sessions, providing persistent context that survives conversation compression.
6
7use anyhow::Result;
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10use std::collections::HashMap;
11use std::path::{Path, PathBuf};
12use std::fs;
13
14use crate::providers::Message;
15
16// Helper function to truncate strings (replaces ui::truncate_str)
17fn truncate_str(s: &str, max_len: usize) -> String {
18    if s.len() > max_len {
19        format!("{}...", &s[..max_len.saturating_sub(3)])
20    } else {
21        s.to_string()
22    }
23}
24
25fn truncate(s: &str, max_len: usize) -> String {
26    if s.len() > max_len {
27        s[..max_len].to_string()
28    } else {
29        s.to_string()
30    }
31}
32
33// ============================================================================
34// Constants
35// ============================================================================
36
37/// Maximum importance score ceiling (entries cannot exceed this).
38pub const MAX_IMPORTANCE_CEILING: f64 = 100.0;
39
40/// Minimum content length for similarity check (to avoid short words matching everything).
41pub const MIN_SIMILARITY_LENGTH: usize = 10;
42
43/// Similarity threshold for considering entries as duplicates (0.0-1.0).
44pub const SIMILARITY_THRESHOLD: f64 = 0.7;
45
46/// Minimum content length for memory detection (to avoid capturing too generic content).
47pub const MIN_MEMORY_CONTENT_LENGTH: usize = 15;
48
49/// Maximum entries to return from detection (to avoid overwhelming).
50pub const MAX_DETECTED_ENTRIES: usize = 5;
51
52/// Maximum length for memory content before truncation.
53pub const MAX_MEMORY_CONTENT_LENGTH: usize = 200;
54
55/// Maximum length for display (shorter for terminal readability).
56pub const MAX_DISPLAY_LENGTH: usize = 60;
57
58/// Topic overlap threshold for conflict detection.
59pub const CONFLICT_OVERLAY_THRESHOLD: f64 = 0.5;
60
61/// Lower topic overlap threshold when change signal is present.
62pub const CONFLICT_OVERLAY_THRESHOLD_WITH_SIGNAL: f64 = 0.3;
63
64/// Importance threshold for displaying star marker (⭐).
65pub const IMPORTANCE_STAR_THRESHOLD: f64 = 80.0;
66
67/// Weight for relevance in contextual summary (relevance vs importance trade-off).
68pub const CONTEXT_RELEVANCE_WEIGHT: f64 = 0.6;
69
70/// Weight for importance in contextual summary (1.0 - CONTEXT_RELEVANCE_WEIGHT).
71pub const CONTEXT_IMPORTANCE_WEIGHT: f64 = 0.4;
72
73/// Default model for cost-effective memory extraction.
74pub const DEFAULT_MEMORY_EXTRACTOR_MODEL: &str = "claude-3-5-haiku-20241022";
75
76/// Minimum keywords threshold for triggering AI fallback.
77/// If rule-based extraction produces fewer keywords than this, AI is used.
78pub const MIN_KEYWORDS_FOR_AI_FALLBACK: usize = 2;
79
80/// AI keyword extraction mode.
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
82pub enum AiKeywordMode {
83    /// Hybrid mode: rule-based first, AI fallback when keywords are insufficient (default).
84    #[default]
85    Auto,
86    /// Always use AI for keyword extraction.
87    Always,
88    /// Never use AI, only rule-based extraction.
89    Never,
90}
91
92impl AiKeywordMode {
93    /// Parse from environment variable string.
94    pub fn from_env() -> Self {
95        match std::env::var("MEMORY_AI_KEYWORDS")
96            .unwrap_or_default()
97            .to_lowercase()
98            .as_str()
99        {
100            "always" | "true" | "1" => AiKeywordMode::Always,
101            "never" | "false" | "0" => AiKeywordMode::Never,
102            "auto" | "" => AiKeywordMode::Auto,
103            other => {
104                log::warn!("Unknown MEMORY_AI_KEYWORDS value: '{}', using 'auto'", other);
105                AiKeywordMode::Auto
106            }
107        }
108    }
109    
110    /// Whether AI extraction should be used given the keyword count.
111    pub fn should_use_ai(&self, keyword_count: usize) -> bool {
112        match self {
113            AiKeywordMode::Always => true,
114            AiKeywordMode::Never => false,
115            AiKeywordMode::Auto => keyword_count < MIN_KEYWORDS_FOR_AI_FALLBACK,
116        }
117    }
118}
119
120/// Default importance scores by category.
121pub const DEFAULT_IMPORTANCE_DECISION: f64 = 90.0;
122pub const DEFAULT_IMPORTANCE_SOLUTION: f64 = 85.0;
123pub const DEFAULT_IMPORTANCE_PREF: f64 = 70.0;
124pub const DEFAULT_IMPORTANCE_FINDING: f64 = 60.0;
125pub const DEFAULT_IMPORTANCE_TECH: f64 = 50.0;
126pub const DEFAULT_IMPORTANCE_STRUCTURE: f64 = 40.0;
127
128// ============================================================================
129// Memory Configuration
130// ============================================================================
131
132/// Configuration for the memory system.
133#[derive(Debug, Clone, Serialize, Deserialize)]
134pub struct MemoryConfig {
135    /// Maximum number of entries to keep.
136    pub max_entries: usize,
137    /// Minimum importance threshold to keep.
138    pub min_importance: f64,
139    /// Whether auto accumulation is enabled.
140    pub enabled: bool,
141    /// Days before time decay starts.
142    pub decay_start_days: i64,
143    /// Decay rate per period (0.0-1.0).
144    pub decay_rate: f64,
145    /// Importance increment per reference.
146    pub reference_increment: f64,
147    /// Maximum importance ceiling.
148    pub max_importance_ceiling: f64,
149}
150
151impl Default for MemoryConfig {
152    fn default() -> Self {
153        Self {
154            max_entries: 100,
155            min_importance: 30.0,
156            enabled: true,
157            decay_start_days: 30,
158            decay_rate: 0.5,
159            reference_increment: 2.0,
160            max_importance_ceiling: MAX_IMPORTANCE_CEILING,
161        }
162    }
163}
164
165impl MemoryConfig {
166    /// Create a new config with custom max entries.
167    pub fn with_max_entries(max: usize) -> Self {
168        Self {
169            max_entries: max,
170            ..Self::default()
171        }
172    }
173    
174    /// Create a minimal config for low-memory environments.
175    pub fn minimal() -> Self {
176        Self {
177            max_entries: 50,
178            min_importance: 50.0,
179            enabled: true,
180            decay_start_days: 14,
181            decay_rate: 0.6,
182            reference_increment: 1.0,
183            max_importance_ceiling: MAX_IMPORTANCE_CEILING,
184        }
185    }
186    
187    /// Create a config for long-term archival.
188    pub fn archival() -> Self {
189        Self {
190            max_entries: 500,
191            min_importance: 20.0,
192            enabled: true,
193            decay_start_days: 90,
194            decay_rate: 0.3,
195            reference_increment: 3.0,
196            max_importance_ceiling: MAX_IMPORTANCE_CEILING,
197        }
198    }
199}
200
201// ============================================================================
202// Memory Categories
203// ============================================================================
204
205/// Categories for memory entries.
206#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
207#[serde(rename_all = "snake_case")]
208pub enum MemoryCategory {
209    /// User preferences (e.g., "I prefer vim over nano")
210    Preference,
211    /// Project decisions (e.g., "Decided to use PostgreSQL")
212    Decision,
213    /// Key findings (e.g., "API endpoint is at /api/v2")
214    Finding,
215    /// Problem solutions (e.g., "Fixed auth bug by adding token refresh")
216    Solution,
217    /// Technical notes (e.g., "React Query is used for data fetching")
218    Technical,
219    /// Project structure (e.g., "src/index.ts is entry point")
220    Structure,
221}
222
223impl MemoryCategory {
224    /// Get display name for the category.
225    pub fn display_name(&self) -> &'static str {
226        match self {
227            MemoryCategory::Preference => "偏好",
228            MemoryCategory::Decision => "决策",
229            MemoryCategory::Finding => "发现",
230            MemoryCategory::Solution => "解决方案",
231            MemoryCategory::Technical => "技术",
232            MemoryCategory::Structure => "结构",
233        }
234    }
235
236    /// Get icon for the category.
237    pub fn icon(&self) -> &'static str {
238        match self {
239            MemoryCategory::Preference => "👤",
240            MemoryCategory::Decision => "🎯",
241            MemoryCategory::Finding => "💡",
242            MemoryCategory::Solution => "🔧",
243            MemoryCategory::Technical => "📚",
244            MemoryCategory::Structure => "🏗️",
245        }
246    }
247
248    /// Get default importance score for the category.
249    pub fn default_importance(&self) -> f64 {
250        match self {
251            MemoryCategory::Decision => DEFAULT_IMPORTANCE_DECISION,
252            MemoryCategory::Solution => DEFAULT_IMPORTANCE_SOLUTION,
253            MemoryCategory::Preference => DEFAULT_IMPORTANCE_PREF,
254            MemoryCategory::Finding => DEFAULT_IMPORTANCE_FINDING,
255            MemoryCategory::Technical => DEFAULT_IMPORTANCE_TECH,
256            MemoryCategory::Structure => DEFAULT_IMPORTANCE_STRUCTURE,
257        }
258    }
259}
260
261// ============================================================================
262// Memory Entry
263// ============================================================================
264
265/// A single memory entry.
266#[derive(Debug, Clone, Serialize, Deserialize)]
267pub struct MemoryEntry {
268    /// Unique identifier.
269    pub id: String,
270    /// When the memory was created.
271    pub created_at: DateTime<Utc>,
272    /// When the memory was last accessed/referenced.
273    pub last_referenced: DateTime<Utc>,
274    /// Category of the memory.
275    pub category: MemoryCategory,
276    /// The memory content.
277    pub content: String,
278    /// Source session ID (where this memory was created).
279    pub source_session: Option<String>,
280    /// Number of times this memory has been referenced.
281    pub reference_count: u32,
282    /// Importance score (0-100, higher = more important).
283    pub importance: f64,
284    /// Tags for searching/filtering.
285    pub tags: Vec<String>,
286    /// Whether this memory was manually added by user.
287    pub is_manual: bool,
288}
289
290impl MemoryEntry {
291    /// Create a new memory entry.
292    pub fn new(category: MemoryCategory, content: String, source_session: Option<String>) -> Self {
293        let id = uuid::Uuid::new_v4().to_string();
294        Self {
295            id,
296            created_at: Utc::now(),
297            last_referenced: Utc::now(),
298            category,
299            content,
300            source_session,
301            reference_count: 0,
302            importance: category.default_importance(),
303            tags: Vec::new(),
304            is_manual: false,
305        }
306    }
307
308    /// Create a manually added memory entry.
309    pub fn manual(category: MemoryCategory, content: String) -> Self {
310        let mut entry = Self::new(category, content, None);
311        entry.is_manual = true;
312        entry.importance = 95.0; // Manual entries are highly important
313        entry
314    }
315
316    /// Mark this memory as referenced (increases importance over time).
317    pub fn mark_referenced(&mut self) {
318        self.mark_referenced_with_increment(2.0);
319    }
320
321    /// Mark this memory as referenced with custom importance increment.
322    pub fn mark_referenced_with_increment(&mut self, increment: f64) {
323        self.reference_count += 1;
324        self.last_referenced = Utc::now();
325        // Increase importance slightly with each reference (capped at ceiling)
326        self.importance = (self.importance + increment).min(MAX_IMPORTANCE_CEILING);
327    }
328
329    /// Format for display.
330    pub fn format_line(&self) -> String {
331        let time = self.created_at.format("%Y-%m-%d %H:%M");
332        let importance_marker = if self.importance >= IMPORTANCE_STAR_THRESHOLD { "⭐" } else { "" };
333        let manual_marker = if self.is_manual { "📝" } else { "" };
334        format!(
335            "{} {} {}{}{} {}",
336            self.category.icon(),
337            time,
338            importance_marker,
339            manual_marker,
340            self.category.display_name(),
341            truncate_str(&self.content, MAX_DISPLAY_LENGTH)
342        )
343    }
344
345    /// Format for inclusion in system prompt.
346    pub fn format_for_prompt(&self) -> String {
347        let category_name = self.category.display_name();
348        if self.content.len() > MAX_MEMORY_CONTENT_LENGTH {
349            format!("{}: {}...", category_name, truncate(&self.content, MAX_MEMORY_CONTENT_LENGTH - 3))
350        } else {
351            format!("{}: {}", category_name, self.content)
352        }
353    }
354}
355
356// ============================================================================
357// Auto Memory Manager
358// ============================================================================
359
360/// Manager for automatic memory accumulation.
361#[derive(Debug, Clone, Serialize, Deserialize)]
362pub struct AutoMemory {
363    /// All memory entries.
364    pub entries: Vec<MemoryEntry>,
365    /// Configuration for memory management.
366    #[serde(default)]
367    pub config: MemoryConfig,
368    /// Legacy fields for backward compatibility (deprecated).
369    #[serde(default = "default_max_entries")]
370    pub max_entries: usize,
371    #[serde(default = "default_min_importance")]
372    pub min_importance: f64,
373    #[serde(default = "default_enabled")]
374    pub enabled: bool,
375    /// Search index (not serialized, rebuilt on load).
376    #[serde(skip)]
377    search_index: Option<SearchIndex>,
378}
379
380/// Search index for fast lookups.
381#[derive(Debug, Clone)]
382struct SearchIndex {
383    /// Lowercase content cache for each entry.
384    content_lower: Vec<String>,
385    /// Entries grouped by category.
386    by_category: HashMap<MemoryCategory, Vec<usize>>,
387    /// Entries sorted by importance (indices).
388    by_importance: Vec<usize>,
389    /// Total word frequency for relevance scoring (future use).
390    #[allow(dead_code)]
391    word_freq: HashMap<String, usize>,
392}
393
394impl SearchIndex {
395    /// Build index from entries.
396    fn build(entries: &[MemoryEntry]) -> Self {
397        // Build lowercase cache
398        let content_lower: Vec<String> = entries
399            .iter()
400            .map(|e| e.content.to_lowercase())
401            .collect();
402        
403        // Build category index
404        let mut by_category: HashMap<MemoryCategory, Vec<usize>> = HashMap::new();
405        for (i, entry) in entries.iter().enumerate() {
406            by_category.entry(entry.category).or_default().push(i);
407        }
408        
409        // Build importance index (sorted descending)
410        let mut by_importance: Vec<usize> = (0..entries.len()).collect();
411        by_importance.sort_by(|a, b| {
412            entries[*b].importance.partial_cmp(&entries[*a].importance)
413                .unwrap_or(std::cmp::Ordering::Equal)
414        });
415        
416        // Build word frequency
417        let mut word_freq: HashMap<String, usize> = HashMap::new();
418        for content in &content_lower {
419            for word in content.split_whitespace() {
420                *word_freq.entry(word.to_string()).or_default() += 1;
421            }
422        }
423        
424        Self {
425            content_lower,
426            by_category,
427            by_importance,
428            word_freq,
429        }
430    }
431    
432    /// Get lowercase content for entry.
433    #[allow(dead_code)]
434    fn get_lower(&self, idx: usize) -> &str {
435        &self.content_lower[idx]
436    }
437    
438    /// Search by query with optional limit.
439    fn search(&self, _entries: &[MemoryEntry], query_lower: &str, limit: Option<usize>) -> Vec<usize> {
440        // Use importance index to search in priority order
441        let matches: Vec<usize> = self.by_importance
442            .iter()
443            .filter(|&idx| self.content_lower[*idx].contains(query_lower))
444            .copied()
445            .collect();
446        
447        if let Some(max) = limit {
448            matches.into_iter().take(max).collect()
449        } else {
450            matches
451        }
452    }
453    
454    /// Multi-keyword search (matches any keyword).
455    fn search_multi(&self, keywords_lower: &[String]) -> Vec<usize> {
456        self.by_importance
457            .iter()
458            .filter(|&idx| {
459                let content = &self.content_lower[*idx];
460                keywords_lower.iter().any(|k| content.contains(k))
461            })
462            .copied()
463            .collect()
464    }
465    
466    /// Invalidate and rebuild index.
467    #[allow(dead_code)]
468    fn rebuild(&mut self, entries: &[MemoryEntry]) {
469        *self = Self::build(entries);
470    }
471}
472
473fn default_max_entries() -> usize { 100 }
474fn default_min_importance() -> f64 { 30.0 }
475fn default_enabled() -> bool { true }
476
477impl Default for AutoMemory {
478    fn default() -> Self {
479        let config = MemoryConfig::default();
480        Self {
481            entries: Vec::new(),
482            config: config.clone(),
483            max_entries: config.max_entries,
484            min_importance: config.min_importance,
485            enabled: config.enabled,
486            search_index: None,
487        }
488    }
489}
490
491impl AutoMemory {
492    /// Create a new auto memory manager.
493    pub fn new() -> Self {
494        Self::default()
495    }
496    
497    /// Ensure search index is built.
498    fn ensure_index(&mut self) {
499        if self.search_index.is_none() {
500            self.rebuild_index();
501        }
502    }
503    
504    /// Rebuild search index.
505    pub fn rebuild_index(&mut self) {
506        self.search_index = Some(SearchIndex::build(&self.entries));
507    }
508    
509    /// Invalidate search index (call after modifications).
510    fn invalidate_index(&mut self) {
511        self.search_index = None;
512    }
513
514    /// Create with custom configuration.
515    pub fn with_config(config: MemoryConfig) -> Self {
516        Self {
517            entries: Vec::new(),
518            config: config.clone(),
519            max_entries: config.max_entries,
520            min_importance: config.min_importance,
521            enabled: config.enabled,
522            search_index: None,
523        }
524    }
525
526    /// Create a minimal memory manager (low-memory environments).
527    pub fn minimal() -> Self {
528        Self::with_config(MemoryConfig::minimal())
529    }
530
531    /// Create an archival memory manager (long-term storage).
532    pub fn archival() -> Self {
533        Self::with_config(MemoryConfig::archival())
534    }
535
536    /// Add a new memory entry.
537    pub fn add(&mut self, entry: MemoryEntry) {
538        self.entries.push(entry);
539        self.invalidate_index();  // Index needs rebuild
540        self.prune();
541    }
542
543    /// Add memory from detected content.
544    pub fn add_memory(
545        &mut self,
546        category: MemoryCategory,
547        content: String,
548        source_session: Option<String>,
549    ) {
550        // Check for duplicates (similar content)
551        if self.has_similar(&content) {
552            return;
553        }
554
555        // Check for conflicts (same category, contradicting content)
556        if let Some(conflict_idx) = self.find_conflict(&content, category) {
557            // Replace the old conflicting entry with the new one
558            let old_content = self.entries[conflict_idx].content.clone();
559            log::debug!("Memory conflict detected: '{}' supersedes '{}'", content, old_content);
560            self.entries.remove(conflict_idx);
561            self.invalidate_index();
562        }
563
564        let entry = MemoryEntry::new(category, content, source_session);
565        self.add(entry);
566    }
567
568    /// Find a conflicting memory entry.
569    /// 
570    /// A conflict is detected when:
571    /// 1. Same category (e.g., both are Decision)
572    /// 2. Same subject/topic (overlapping keywords)
573    /// 3. Different conclusion (not similar enough to be a duplicate)
574    /// 
575    /// Example conflicts:
576    /// - "决定使用 PostgreSQL" vs "决定使用 MySQL" (same topic: database choice)
577    /// - "偏好 vim" vs "偏好 vscode" (same topic: editor preference)
578    fn find_conflict(&self, new_content: &str, category: MemoryCategory) -> Option<usize> {
579        let new_lower = new_content.to_lowercase();
580        let new_words: std::collections::HashSet<&str> = new_lower.split_whitespace().collect();
581        
582        // If new content has explicit change signals, lower the threshold
583        let has_change_signal = has_contradiction_signal("", &new_lower);
584        let overlap_threshold = if has_change_signal { 
585            CONFLICT_OVERLAY_THRESHOLD_WITH_SIGNAL 
586        } else { 
587            CONFLICT_OVERLAY_THRESHOLD 
588        };
589        
590        // Only check entries in the same category
591        for (i, entry) in self.entries.iter().enumerate() {
592            if entry.category != category {
593                continue;
594            }
595            
596            let entry_lower = entry.content.to_lowercase();
597            let entry_words: std::collections::HashSet<&str> = entry_lower.split_whitespace().collect();
598            
599            // Calculate topic overlap (shared words)
600            let intersection = new_words.intersection(&entry_words).count();
601            let min_len = new_words.len().min(entry_words.len());
602            
603            if min_len == 0 {
604                continue;
605            }
606            
607            let topic_overlap = intersection as f64 / min_len as f64;
608            
609            // High topic overlap but not a duplicate
610            let jaccard = Self::calculate_similarity(&entry_lower, &new_lower);
611            
612            if topic_overlap > overlap_threshold && jaccard < SIMILARITY_THRESHOLD {
613                // Check for contradiction patterns
614                if has_contradiction_signal(&entry_lower, &new_lower) {
615                    return Some(i);
616                }
617            }
618            
619            // Also check if new content explicitly references old content
620            // e.g., "不再使用 vim" when old entry contains "vim"
621            if has_change_signal {
622                // Check if old entry's key terms appear in new content
623                let old_key_terms: Vec<&str> = entry_words.iter()
624                    .filter(|w| w.len() > 2)
625                    .copied()
626                    .collect();
627                let referenced = old_key_terms.iter()
628                    .any(|term| new_lower.contains(term));
629                if referenced {
630                    return Some(i);
631                }
632            }
633        }
634        
635        None
636    }
637
638    /// Check if similar content already exists.
639    /// Uses minimum length threshold to prevent short words from matching everything.
640    pub fn has_similar(&self, content: &str) -> bool {
641        let content_lower = content.to_lowercase();
642        
643        // Skip short content - they're likely too generic to be useful memories
644        if content_lower.len() < MIN_SIMILARITY_LENGTH {
645            return false;
646        }
647        
648        self.entries.iter().any(|e| {
649            let entry_lower = e.content.to_lowercase();
650            
651            // Exact match
652            if entry_lower == content_lower {
653                return true;
654            }
655            
656            // Skip comparing with short entries
657            if entry_lower.len() < MIN_SIMILARITY_LENGTH {
658                return false;
659            }
660            
661            // Calculate word-based similarity (Jaccard-like)
662            let similarity = Self::calculate_similarity(&entry_lower, &content_lower);
663            similarity >= SIMILARITY_THRESHOLD
664        })
665    }
666
667/// Calculate word-based similarity between two strings.
668    /// Returns a value between 0.0 (no similarity) and 1.0 (identical).
669    fn calculate_similarity(a: &str, b: &str) -> f64 {
670        use std::collections::HashSet;
671        
672        let a_words: HashSet<&str> = a.split_whitespace().collect();
673        let b_words: HashSet<&str> = b.split_whitespace().collect();
674        
675        if a_words.is_empty() || b_words.is_empty() {
676            return 0.0;
677        }
678        
679        let intersection = a_words.intersection(&b_words).count();
680        let union = a_words.union(&b_words).count();
681        
682        if union == 0 {
683            0.0
684        } else {
685            intersection as f64 / union as f64
686        }
687    }
688
689    /// Remove low-importance entries when exceeding max_entries.
690    /// Strategy: preserve manual entries + high importance entries, sorted by importance.
691    pub fn prune(&mut self) {
692        if self.entries.len() <= self.max_entries {
693            return;
694        }
695
696        // First, separate entries by priority
697        // Manual entries are always kept (highest priority)
698        let (manual_entries, auto_entries): (Vec<_>, Vec<_>) = self.entries
699            .iter()
700            .cloned()
701            .partition(|e| e.is_manual);
702        
703        // Sort auto entries by importance (descending) + recency as tiebreaker
704        let mut sorted_auto = auto_entries;
705        sorted_auto.sort_by(|a, b| {
706            // First compare by importance
707            let importance_cmp = b.importance.partial_cmp(&a.importance)
708                .unwrap_or(std::cmp::Ordering::Equal);
709            
710            // If equal importance, prefer more recently referenced
711            if importance_cmp == std::cmp::Ordering::Equal {
712                b.last_referenced.cmp(&a.last_referenced)
713            } else {
714                importance_cmp
715            }
716        });
717        
718        // Filter auto entries above min_importance threshold
719        let kept_auto: Vec<_> = sorted_auto
720            .into_iter()
721            .filter(|e| e.importance >= self.min_importance)
722            .take(self.max_entries.saturating_sub(manual_entries.len()))
723            .collect();
724        
725        // Combine: manual entries first, then sorted auto entries
726        self.entries = manual_entries.into_iter().chain(kept_auto).collect();
727        
728        // Final safety check: if still too many, truncate oldest/least important
729        if self.entries.len() > self.max_entries {
730            self.entries.sort_by(|a, b| {
731                let importance_cmp = b.importance.partial_cmp(&a.importance)
732                    .unwrap_or(std::cmp::Ordering::Equal);
733                if importance_cmp == std::cmp::Ordering::Equal {
734                    b.last_referenced.cmp(&a.last_referenced)
735                } else {
736                    importance_cmp
737                }
738            });
739            self.entries.truncate(self.max_entries);
740        }
741        
742        self.invalidate_index();  // Index needs rebuild after prune
743    }
744
745    /// Get entries by category.
746    pub fn by_category(&self, category: MemoryCategory) -> Vec<&MemoryEntry> {
747        self.entries.iter().filter(|e| e.category == category).collect()
748    }
749    
750    /// Get entries by category using index (faster).
751    pub fn by_category_fast(&mut self, category: MemoryCategory) -> Vec<&MemoryEntry> {
752        self.ensure_index();
753        if let Some(ref index) = self.search_index {
754            index.by_category.get(&category)
755                .map(|indices| indices.iter().map(|&i| &self.entries[i]).collect())
756                .unwrap_or_default()
757        } else {
758            self.by_category(category)
759        }
760    }
761
762    /// Get top N most important entries.
763    pub fn top_n(&self, n: usize) -> Vec<&MemoryEntry> {
764        let mut sorted: Vec<_> = self.entries.iter().collect();
765        sorted.sort_by(|a, b| b.importance.partial_cmp(&a.importance).unwrap_or(std::cmp::Ordering::Equal));
766        sorted.into_iter().take(n).collect()
767    }
768    
769    /// Get top N using index (faster).
770    pub fn top_n_fast(&mut self, n: usize) -> Vec<&MemoryEntry> {
771        self.ensure_index();
772        if let Some(ref index) = self.search_index {
773            index.by_importance
774                .iter()
775                .take(n)
776                .map(|&i| &self.entries[i])
777                .collect()
778        } else {
779            self.top_n(n)
780        }
781    }
782
783    /// Search entries by content or tags.
784    pub fn search(&self, query: &str) -> Vec<&MemoryEntry> {
785        self.search_with_limit(query, None)
786    }
787
788    /// Search entries with result limit.
789    pub fn search_with_limit(&self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
790        let query_lower = query.to_lowercase();
791        let mut results: Vec<_> = self.entries
792            .iter()
793            .filter(|e| {
794                e.content.to_lowercase().contains(&query_lower) ||
795                e.tags.iter().any(|t| t.to_lowercase().contains(&query_lower))
796            })
797            .collect();
798        
799        // Sort by relevance (importance) then apply limit
800        results.sort_by(|a, b| b.importance.partial_cmp(&a.importance).unwrap_or(std::cmp::Ordering::Equal));
801        
802        if let Some(max) = limit {
803            results.into_iter().take(max).collect()
804        } else {
805            results
806        }
807    }
808    
809    /// Search using index (faster).
810    pub fn search_fast(&mut self, query: &str, limit: Option<usize>) -> Vec<&MemoryEntry> {
811        self.ensure_index();
812        let query_lower = query.to_lowercase();
813        
814        if let Some(ref index) = self.search_index {
815            let indices = index.search(&self.entries, &query_lower, limit);
816            indices.iter().map(|&i| &self.entries[i]).collect()
817        } else {
818            self.search_with_limit(query, limit)
819        }
820    }
821
822    /// Multi-keyword search (matches any keyword).
823    pub fn search_multi(&self, keywords: &[&str]) -> Vec<&MemoryEntry> {
824        if keywords.is_empty() {
825            return Vec::new();
826        }
827        
828        let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
829        
830        self.entries
831            .iter()
832            .filter(|e| {
833                let content_lower = e.content.to_lowercase();
834                keywords_lower.iter().any(|k| content_lower.contains(k))
835            })
836            .collect()
837    }
838    
839    /// Multi-keyword search using index (faster).
840    pub fn search_multi_fast(&mut self, keywords: &[&str]) -> Vec<&MemoryEntry> {
841        if keywords.is_empty() {
842            return Vec::new();
843        }
844        
845        self.ensure_index();
846        let keywords_lower: Vec<String> = keywords.iter().map(|k| k.to_lowercase()).collect();
847        
848        if let Some(ref index) = self.search_index {
849            let indices = index.search_multi(&keywords_lower);
850            indices.iter().map(|&i| &self.entries[i]).collect()
851        } else {
852            self.search_multi(keywords)
853        }
854    }
855
856    /// Batch add multiple entries efficiently.
857    /// Only prunes once at the end instead of after each entry.
858    pub fn add_batch(&mut self, entries: Vec<MemoryEntry>) {
859        // Filter out duplicates first
860        for entry in entries {
861            if !self.has_similar(&entry.content) {
862                self.entries.push(entry);
863            }
864        }
865        // Single prune at the end
866        self.prune();
867    }
868
869    /// Mark entries as referenced if they appear in the conversation.
870    /// Optimized: pre-computes lowercase versions to avoid repeated conversions.
871    pub fn update_references(&mut self, messages: &[Message]) {
872        let increment = self.config.reference_increment;
873        
874        // Pre-compute all message texts in lowercase (optimization)
875        let texts_lower: Vec<String> = messages
876            .iter()
877            .filter_map(Self::extract_message_text_lower)
878            .collect();
879        
880        // Pre-compute all entry contents in lowercase
881        let entry_contents_lower: Vec<String> = self.entries
882            .iter()
883            .map(|e| e.content.to_lowercase())
884            .collect();
885        
886        // Check each entry against all texts
887        for (i, entry) in self.entries.iter_mut().enumerate() {
888            let entry_lower = &entry_contents_lower[i];
889            if texts_lower.iter().any(|t| t.contains(entry_lower)) {
890                entry.mark_referenced_with_increment(increment);
891            }
892        }
893    }
894    
895    /// Extract lowercase text from a message for reference checking.
896    fn extract_message_text_lower(msg: &Message) -> Option<String> {
897        match &msg.content {
898            crate::providers::MessageContent::Text(t) => Some(t.to_lowercase()),
899            crate::providers::MessageContent::Blocks(blocks) => {
900                let text = blocks
901                    .iter()
902                    .filter_map(|b| {
903                        if let crate::providers::ContentBlock::Text { text } = b {
904                            Some(text.as_str())
905                        } else {
906                            None
907                        }
908                    })
909                    .collect::<Vec<_>>()
910                    .join(" ");
911                Some(text.to_lowercase())
912            }
913        }
914    }
915
916    /// Generate summary for system prompt.
917    pub fn generate_prompt_summary(&self, max_entries: usize) -> String {
918        if self.entries.is_empty() {
919            return String::new();
920        }
921
922        let top_entries = self.top_n(max_entries);
923        if top_entries.is_empty() {
924            return String::new();
925        }
926
927        let mut summary = String::from("【自动记忆摘要】\n\n");
928        
929        // Group by category
930        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
931        for entry in top_entries {
932            by_cat.entry(entry.category).or_default().push(entry);
933        }
934
935        for (cat, entries) in by_cat {
936            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
937            for entry in entries {
938                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
939            }
940            summary.push('\n');
941        }
942
943        summary
944    }
945
946    /// Generate context-aware summary for system prompt.
947    /// 
948    /// Unlike `generate_prompt_summary` which always returns top N by importance,
949    /// this method selects memories that are relevant to the current conversation context.
950    /// 
951    /// Strategy:
952    /// 1. Always include manual entries (user explicitly added)
953    /// 2. Include entries whose content overlaps with recent conversation keywords
954    /// 3. Fill remaining slots with top importance entries
955    pub fn generate_contextual_summary(&self, context: &str, max_entries: usize) -> String {
956        // Extract keywords internally
957        let keywords = extract_context_keywords(context);
958        self.generate_contextual_summary_with_keywords(&keywords, max_entries)
959    }
960    
961    /// Generate context-aware summary with pre-extracted keywords.
962    /// More efficient when keywords are already extracted (e.g., by AI).
963    pub fn generate_contextual_summary_with_keywords(&self, context_keywords: &[String], max_entries: usize) -> String {
964        if self.entries.is_empty() {
965            return String::new();
966        }
967
968        // Score each entry by relevance to context keywords
969        let mut scored: Vec<(&MemoryEntry, f64)> = self.entries
970            .iter()
971            .map(|entry| {
972                let relevance = compute_relevance(entry, &context_keywords);
973                (entry, relevance)
974            })
975            .collect();
976        
977        // Sort by: manual first, then relevance + importance combined
978        scored.sort_by(|a, b| {
979            // Manual entries always first
980            if a.0.is_manual && !b.0.is_manual {
981                return std::cmp::Ordering::Less;
982            }
983            if !a.0.is_manual && b.0.is_manual {
984                return std::cmp::Ordering::Greater;
985            }
986            
987            // Combined score: relevance weight + importance weight
988            let score_a = a.1 * CONTEXT_RELEVANCE_WEIGHT + (a.0.importance / MAX_IMPORTANCE_CEILING) * CONTEXT_IMPORTANCE_WEIGHT;
989            let score_b = b.1 * CONTEXT_RELEVANCE_WEIGHT + (b.0.importance / MAX_IMPORTANCE_CEILING) * CONTEXT_IMPORTANCE_WEIGHT;
990            
991            score_b.partial_cmp(&score_a).unwrap_or(std::cmp::Ordering::Equal)
992        });
993        
994        // Take top entries
995        let selected: Vec<&MemoryEntry> = scored
996            .iter()
997            .take(max_entries)
998            .map(|(entry, _)| *entry)
999            .collect();
1000        
1001        if selected.is_empty() {
1002            return String::new();
1003        }
1004
1005        let mut summary = String::from("【跨会话记忆】\n\n");
1006        
1007        // Group by category
1008        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
1009        for entry in selected {
1010            by_cat.entry(entry.category).or_default().push(entry);
1011        }
1012
1013        for (cat, entries) in by_cat {
1014            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
1015            for entry in entries {
1016                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
1017            }
1018            summary.push('\n');
1019        }
1020
1021        summary
1022    }
1023
1024    /// Generate context-aware summary with AI-enhanced keyword extraction.
1025    /// 
1026    /// This is the async version that uses AI to extract keywords when
1027    /// rule-based extraction produces insufficient results.
1028    pub async fn generate_contextual_summary_async(
1029        &self,
1030        context: &str,
1031        max_entries: usize,
1032        fast_provider: Option<&dyn crate::providers::Provider>,
1033    ) -> String {
1034        if self.entries.is_empty() {
1035            return String::new();
1036        }
1037
1038        // Extract keywords using hybrid approach (rule-based + AI fallback)
1039        let context_keywords = if let Some(provider) = fast_provider {
1040            extract_keywords_hybrid(context, Some(provider)).await
1041        } else {
1042            extract_context_keywords(context)
1043        };
1044        
1045        // Score each entry by relevance to context
1046        let mut scored: Vec<(&MemoryEntry, f64)> = self.entries
1047            .iter()
1048            .map(|entry| {
1049                let relevance = compute_relevance(entry, &context_keywords);
1050                (entry, relevance)
1051            })
1052            .collect();
1053        
1054        // Sort by: manual first, then relevance + importance combined
1055        scored.sort_by(|a, b| {
1056            // Manual entries always first
1057            if a.0.is_manual && !b.0.is_manual {
1058                return std::cmp::Ordering::Less;
1059            }
1060            if !a.0.is_manual && b.0.is_manual {
1061                return std::cmp::Ordering::Greater;
1062            }
1063            
1064            // Combined score: relevance weight + importance weight
1065            let score_a = a.1 * CONTEXT_RELEVANCE_WEIGHT + (a.0.importance / MAX_IMPORTANCE_CEILING) * CONTEXT_IMPORTANCE_WEIGHT;
1066            let score_b = b.1 * CONTEXT_RELEVANCE_WEIGHT + (b.0.importance / MAX_IMPORTANCE_CEILING) * CONTEXT_IMPORTANCE_WEIGHT;
1067            
1068            score_b.partial_cmp(&score_a).unwrap_or(std::cmp::Ordering::Equal)
1069        });
1070        
1071        // Take top entries
1072        let selected: Vec<&MemoryEntry> = scored
1073            .iter()
1074            .take(max_entries)
1075            .map(|(entry, _)| *entry)
1076            .collect();
1077        
1078        if selected.is_empty() {
1079            return String::new();
1080        }
1081
1082        let mut summary = String::from("【跨会话记忆】\n\n");
1083        
1084        // Group by category
1085        let mut by_cat: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
1086        for entry in selected {
1087            by_cat.entry(entry.category).or_default().push(entry);
1088        }
1089
1090        for (cat, entries) in by_cat {
1091            summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
1092            for entry in entries {
1093                summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
1094            }
1095            summary.push('\n');
1096        }
1097
1098        summary
1099    }
1100
1101    /// Format all entries for display.
1102    pub fn format_all(&self) -> String {
1103        if self.entries.is_empty() {
1104            return "[no memories accumulated]".to_string();
1105        }
1106
1107        let mut result = String::from("Accumulated memories:\n\n");
1108        
1109        // Sort by importance
1110        let mut sorted: Vec<_> = self.entries.iter().collect();
1111        sorted.sort_by(|a, b| b.importance.partial_cmp(&a.importance).unwrap_or(std::cmp::Ordering::Equal));
1112
1113        for entry in sorted {
1114            result.push_str(&entry.format_line());
1115            result.push('\n');
1116        }
1117
1118        result
1119    }
1120
1121    /// Generate statistics summary for display.
1122    pub fn generate_statistics(&self) -> MemoryStatistics {
1123        let total = self.entries.len();
1124        let manual = self.entries.iter().filter(|e| e.is_manual).count();
1125        let auto = total - manual;
1126        
1127        // Count by category
1128        let by_category: HashMap<MemoryCategory, usize> = self.entries
1129            .iter()
1130            .fold(HashMap::new(), |mut acc, e| {
1131                *acc.entry(e.category).or_default() += 1;
1132                acc
1133            });
1134        
1135        // Calculate average importance
1136        let avg_importance = if total > 0 {
1137            self.entries.iter().map(|e| e.importance).sum::<f64>() / total as f64
1138        } else {
1139            0.0
1140        };
1141        
1142        // Find oldest and newest
1143        let oldest = self.entries
1144            .iter()
1145            .min_by_key(|e| e.created_at)
1146            .map(|e| e.created_at);
1147        let newest = self.entries
1148            .iter()
1149            .max_by_key(|e| e.created_at)
1150            .map(|e| e.created_at);
1151        
1152        // Count highly referenced
1153        let highly_referenced = self.entries
1154            .iter()
1155            .filter(|e| e.reference_count >= 3)
1156            .count();
1157        
1158        MemoryStatistics {
1159            total,
1160            manual,
1161            auto,
1162            by_category,
1163            avg_importance,
1164            oldest,
1165            newest,
1166            highly_referenced,
1167        }
1168    }
1169
1170    /// Clear all memories.
1171    pub fn clear(&mut self) {
1172        self.entries.clear();
1173        self.invalidate_index();
1174    }
1175
1176    /// Remove a specific memory by ID.
1177    pub fn remove(&mut self, id: &str) -> bool {
1178        let idx = self.entries.iter().position(|e| e.id == id);
1179        if let Some(i) = idx {
1180            self.entries.remove(i);
1181            self.invalidate_index();
1182            true
1183        } else {
1184            false
1185        }
1186    }
1187
1188    /// Apply time decay to memory importance.
1189    /// Entries that haven't been referenced recently will have their importance reduced.
1190    pub fn apply_time_decay(&mut self) {
1191        let now = Utc::now();
1192        let decay_start_days = self.config.decay_start_days;
1193        let decay_rate = self.config.decay_rate;
1194        let decay_period_days = 30;  // Each decay period is 30 days
1195        
1196        for entry in &mut self.entries {
1197            // Skip manual entries - they should never decay
1198            if entry.is_manual {
1199                continue;
1200            }
1201            
1202            // Calculate days since last reference
1203            let days_since_reference = (now - entry.last_referenced)
1204                .num_days()
1205                .max(0);
1206            
1207            // Apply decay if older than threshold
1208            if days_since_reference > decay_start_days {
1209                // Calculate number of decay periods
1210                let decay_periods = (days_since_reference - decay_start_days) / decay_period_days;
1211                
1212                // Apply exponential decay
1213                let decay_factor = decay_rate.powi(decay_periods as i32);
1214                entry.importance *= decay_factor;
1215                
1216                // Ensure minimum importance (at least half of min_importance)
1217                entry.importance = entry.importance.max(self.min_importance * 0.5);
1218            }
1219        }
1220        
1221        // Re-prune after decay (low importance entries may now be removed)
1222        self.prune();
1223    }
1224}
1225
1226/// Statistics about memory collection.
1227#[derive(Debug, Clone)]
1228pub struct MemoryStatistics {
1229    /// Total number of entries.
1230    pub total: usize,
1231    /// Number of manually added entries.
1232    pub manual: usize,
1233    /// Number of automatically detected entries.
1234    pub auto: usize,
1235    /// Count by category.
1236    pub by_category: HashMap<MemoryCategory, usize>,
1237    /// Average importance score.
1238    pub avg_importance: f64,
1239    /// Oldest entry creation time.
1240    pub oldest: Option<DateTime<Utc>>,
1241    /// Newest entry creation time.
1242    pub newest: Option<DateTime<Utc>>,
1243    /// Number of entries with high reference count (>= 3).
1244    pub highly_referenced: usize,
1245}
1246
1247impl MemoryStatistics {
1248    /// Format statistics for display.
1249    pub fn format_summary(&self) -> String {
1250        use std::fmt::Write;
1251        
1252        let mut output = String::new();
1253        
1254        writeln!(output, "记忆统计：").unwrap();
1255        writeln!(output, "  总计: {} 条", self.total).unwrap();
1256        writeln!(output, "  ├─ 手动添加: {} 条", self.manual).unwrap();
1257        writeln!(output, "  └─ 自动检测: {} 条", self.auto).unwrap();
1258        writeln!(output).unwrap();
1259        
1260        writeln!(output, "分类统计：").unwrap();
1261        for (cat, count) in &self.by_category {
1262            writeln!(output, "  {} {}: {} 条", cat.icon(), cat.display_name(), count).unwrap();
1263        }
1264        writeln!(output).unwrap();
1265        
1266        writeln!(output, "质量指标：").unwrap();
1267        writeln!(output, "  平均重要性: {:.1} 分", self.avg_importance).unwrap();
1268        writeln!(output, "  高频引用: {} 条 (≥3次)", self.highly_referenced).unwrap();
1269        
1270        if let Some(oldest) = self.oldest {
1271            let days = (Utc::now() - oldest).num_days();
1272            writeln!(output, "  记忆跨度: {} 天", days).unwrap();
1273        }
1274        
1275        output
1276    }
1277}
1278
1279// ============================================================================
1280// Memory Storage with File Lock
1281// ============================================================================
1282
1283/// File lock for preventing concurrent access to memory storage.
1284/// Uses a simple lock file approach (.lock) with atomic operations.
1285pub struct MemoryFileLock {
1286    /// Path to the lock file.
1287    lock_path: PathBuf,
1288    /// Whether we currently hold the lock.
1289    locked: bool,
1290}
1291
1292impl MemoryFileLock {
1293    /// Create a new file lock for the given directory.
1294    pub fn new(base_dir: &Path) -> Self {
1295        Self {
1296            lock_path: base_dir.join("memory.lock"),
1297            locked: false,
1298        }
1299    }
1300    
1301    /// Acquire the lock (blocking with timeout).
1302    /// Returns true if lock was acquired, false if timeout expired.
1303    pub fn acquire(&mut self, timeout_ms: u64) -> Result<bool> {
1304        if self.locked {
1305            return Ok(true);  // Already locked
1306        }
1307        
1308        let start = std::time::Instant::now();
1309        
1310        while start.elapsed().as_millis() < timeout_ms as u128 {
1311            // Try to create lock file atomically
1312            match fs::File::create_new(&self.lock_path) {
1313                Ok(_) => {
1314                    // Write lock info (PID + timestamp)
1315                    let lock_info = format!(
1316                        "{}:{}",
1317                        std::process::id(),
1318                        Utc::now().to_rfc3339()
1319                    );
1320                    fs::write(&self.lock_path, lock_info)?;
1321                    self.locked = true;
1322                    return Ok(true);
1323                }
1324                Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
1325                    // Lock file exists, check if it's stale
1326                    if self.is_stale_lock()? {
1327                        self.remove_stale_lock()?;
1328                    }
1329                    // Wait a bit before retrying
1330                    std::thread::sleep(std::time::Duration::from_millis(50));
1331                }
1332                Err(e) => {
1333                    return Err(e.into());
1334                }
1335            }
1336        }
1337        
1338        Ok(false)  // Timeout expired
1339    }
1340    
1341    /// Check if the existing lock is stale (older than 30 seconds).
1342    fn is_stale_lock(&self) -> Result<bool> {
1343        if !self.lock_path.exists() {
1344            return Ok(false);
1345        }
1346        
1347        // Check lock file age
1348        let metadata = fs::metadata(&self.lock_path)?;
1349        let modified = metadata.modified()?;
1350        let age = std::time::SystemTime::now()
1351            .duration_since(modified)
1352            .unwrap_or(std::time::Duration::ZERO);
1353        
1354        // Consider lock stale if older than 30 seconds
1355        Ok(age > std::time::Duration::from_secs(30))
1356    }
1357    
1358    /// Remove stale lock file.
1359    fn remove_stale_lock(&self) -> Result<()> {
1360        if self.lock_path.exists() {
1361            fs::remove_file(&self.lock_path)?;
1362        }
1363        Ok(())
1364    }
1365    
1366    /// Release the lock.
1367    pub fn release(&mut self) -> Result<()> {
1368        if self.locked {
1369            fs::remove_file(&self.lock_path)?;
1370            self.locked = false;
1371        }
1372        Ok(())
1373    }
1374}
1375
1376impl Drop for MemoryFileLock {
1377    fn drop(&mut self) {
1378        // Auto-release lock on drop
1379        let _ = self.release();
1380    }
1381}
1382
1383/// Storage for memory files (global and project-level) with file locking.
1384pub struct MemoryStorage {
1385    /// Base directory for global memory (~/.matrix).
1386    base_dir: PathBuf,
1387    /// Project root directory (optional).
1388    project_root: Option<PathBuf>,
1389    /// File lock for preventing concurrent writes.
1390    lock: MemoryFileLock,
1391}
1392
1393impl MemoryStorage {
1394    /// Create a new memory storage.
1395    pub fn new(project_root: Option<&Path>) -> Result<Self> {
1396        let base_dir = Self::get_base_dir()?;
1397        let lock = MemoryFileLock::new(&base_dir);
1398        Ok(Self {
1399            base_dir,
1400            project_root: project_root.map(|p| p.to_path_buf()),
1401            lock,
1402        })
1403    }
1404
1405    /// Create a new storage with explicit lock timeout.
1406    pub fn with_lock_timeout(project_root: Option<&Path>, timeout_ms: u64) -> Result<Self> {
1407        let mut storage = Self::new(project_root)?;
1408        storage.lock.acquire(timeout_ms)?;
1409        Ok(storage)
1410    }
1411
1412    /// Get the base directory for memory storage.
1413    fn get_base_dir() -> Result<PathBuf> {
1414        let home = std::env::var_os("HOME")
1415            .or_else(|| std::env::var_os("USERPROFILE"))
1416            .ok_or_else(|| anyhow::anyhow!("HOME or USERPROFILE not set"))?;
1417        let mut p = PathBuf::from(home);
1418        p.push(".matrix");
1419        Ok(p)
1420    }
1421
1422    /// Path to global memory file.
1423    pub fn global_memory_path(&self) -> PathBuf {
1424        self.base_dir.join("memory.json")
1425    }
1426
1427    /// Path to project memory file.
1428    pub fn project_memory_path(&self) -> Option<PathBuf> {
1429        self.project_root.as_ref().map(|p| p.join(".matrix/memory.json"))
1430    }
1431
1432    /// Path to config file.
1433    pub fn config_path(&self) -> PathBuf {
1434        self.base_dir.join("memory_config.json")
1435    }
1436
1437    /// Ensure directories exist.
1438    fn ensure_dirs(&self) -> Result<()> {
1439        fs::create_dir_all(&self.base_dir)?;
1440        if let Some(root) = &self.project_root {
1441            let memory_dir = root.join(".matrix");
1442            fs::create_dir_all(memory_dir)?;
1443        }
1444        Ok(())
1445    }
1446
1447    /// Acquire lock before write operations.
1448    fn acquire_lock(&mut self) -> Result<()> {
1449        self.lock.acquire(5000)?;  // 5 second timeout
1450        Ok(())
1451    }
1452
1453    /// Release lock after write operations.
1454    fn release_lock(&mut self) -> Result<()> {
1455        self.lock.release()?;
1456        Ok(())
1457    }
1458
1459    /// Load global memory (no lock needed for read).
1460    pub fn load_global(&self) -> Result<AutoMemory> {
1461        let path = self.global_memory_path();
1462        if !path.exists() {
1463            return Ok(AutoMemory::new());
1464        }
1465        let data = fs::read_to_string(&path)?;
1466        let memory: AutoMemory = serde_json::from_str(&data)?;
1467        Ok(memory)
1468    }
1469
1470    /// Load project memory (no lock needed for read).
1471    pub fn load_project(&self) -> Result<Option<AutoMemory>> {
1472        let path = self.project_memory_path();
1473        match path {
1474            Some(p) if p.exists() => {
1475                let data = fs::read_to_string(&p)?;
1476                let memory: AutoMemory = serde_json::from_str(&data)?;
1477                Ok(Some(memory))
1478            }
1479            _ => Ok(None),
1480        }
1481    }
1482
1483    /// Load combined memory (global + project).
1484    pub fn load_combined(&self) -> Result<AutoMemory> {
1485        let mut combined = self.load_global()?;
1486        
1487        if let Some(project) = self.load_project()? {
1488            // Merge project entries into global
1489            for entry in project.entries {
1490                // Tag as project-specific
1491                let mut tagged_entry = entry;
1492                if !tagged_entry.tags.contains(&"project".to_string()) {
1493                    tagged_entry.tags.push("project".to_string());
1494                }
1495                combined.entries.push(tagged_entry);
1496            }
1497            combined.prune();
1498        }
1499
1500        Ok(combined)
1501    }
1502
1503    /// Save global memory (with file lock).
1504    pub fn save_global(&mut self, memory: &AutoMemory) -> Result<()> {
1505        self.acquire_lock()?;
1506        self.ensure_dirs()?;
1507        
1508        let path = self.global_memory_path();
1509        let json = serde_json::to_string_pretty(memory)?;
1510        
1511        // Write to temp file then rename (atomic)
1512        let tmp = path.with_extension("json.tmp");
1513        fs::write(&tmp, json)?;
1514        fs::rename(&tmp, &path)?;
1515        
1516        self.release_lock()?;
1517        Ok(())
1518    }
1519
1520    /// Save project memory (with file lock).
1521    pub fn save_project(&mut self, memory: &AutoMemory) -> Result<()> {
1522        self.acquire_lock()?;
1523        self.ensure_dirs()?;
1524        
1525        let path = self.project_memory_path()
1526            .ok_or_else(|| anyhow::anyhow!("no project root"))?;
1527        let json = serde_json::to_string_pretty(memory)?;
1528        
1529        let tmp = path.with_extension("json.tmp");
1530        fs::write(&tmp, json)?;
1531        fs::rename(&tmp, &path)?;
1532        
1533        self.release_lock()?;
1534        Ok(())
1535    }
1536
1537    /// Save config to separate file.
1538    pub fn save_config(&mut self, config: &MemoryConfig) -> Result<()> {
1539        self.ensure_dirs()?;
1540        let path = self.config_path();
1541        let json = serde_json::to_string_pretty(config)?;
1542        fs::write(&path, json)?;
1543        Ok(())
1544    }
1545
1546    /// Load config from file.
1547    pub fn load_config(&self) -> Result<MemoryConfig> {
1548        let path = self.config_path();
1549        if !path.exists() {
1550            return Ok(MemoryConfig::default());
1551        }
1552        let data = fs::read_to_string(&path)?;
1553        let config: MemoryConfig = serde_json::from_str(&data)?;
1554        Ok(config)
1555    }
1556
1557    /// Add entry to appropriate storage (with file lock).
1558    pub fn add_entry(&mut self, entry: MemoryEntry, is_project_specific: bool) -> Result<()> {
1559        self.acquire_lock()?;
1560        
1561        if is_project_specific {
1562            let mut project = self.load_project()?.unwrap_or_else(AutoMemory::new);
1563            project.add(entry);
1564            self.save_project_locked(&project)?;
1565        } else {
1566            let mut global = self.load_global()?;
1567            global.add(entry);
1568            self.save_global_locked(&global)?;
1569        }
1570        
1571        self.release_lock()?;
1572        Ok(())
1573    }
1574
1575    /// Remove entry from storage by ID (with file lock).
1576    pub fn remove_entry(&mut self, id: &str, is_project_specific: bool) -> Result<bool> {
1577        self.acquire_lock()?;
1578        
1579        let removed = if is_project_specific {
1580            if let Some(mut project) = self.load_project()? {
1581                let removed = project.remove(id);
1582                if removed {
1583                    self.save_project_locked(&project)?;
1584                }
1585                removed
1586            } else {
1587                false
1588            }
1589        } else {
1590            let mut global = self.load_global()?;
1591            let removed = global.remove(id);
1592            if removed {
1593                self.save_global_locked(&global)?;
1594            }
1595            removed
1596        };
1597        
1598        self.release_lock()?;
1599        Ok(removed)
1600    }
1601    
1602    /// Internal save methods that don't acquire lock (assumed already locked).
1603    fn save_global_locked(&self, memory: &AutoMemory) -> Result<()> {
1604        let path = self.global_memory_path();
1605        let json = serde_json::to_string_pretty(memory)?;
1606        let tmp = path.with_extension("json.tmp");
1607        fs::write(&tmp, json)?;
1608        fs::rename(&tmp, &path)?;
1609        Ok(())
1610    }
1611    
1612    fn save_project_locked(&self, memory: &AutoMemory) -> Result<()> {
1613        let path = self.project_memory_path()
1614            .ok_or_else(|| anyhow::anyhow!("no project root"))?;
1615        let json = serde_json::to_string_pretty(memory)?;
1616        let tmp = path.with_extension("json.tmp");
1617        fs::write(&tmp, json)?;
1618        fs::rename(&tmp, &path)?;
1619        Ok(())
1620    }
1621}
1622
1623// ============================================================================
1624// Helper Functions (Global)
1625// ============================================================================
1626
1627/// Calculate word-based similarity between two strings (Jaccard coefficient).
1628/// Returns a value between 0.0 (no similarity) and 1.0 (identical words).
1629/// This is the public version for external use.
1630pub fn calculate_similarity(a: &str, b: &str) -> f64 {
1631    AutoMemory::calculate_similarity(a, b)
1632}
1633
1634/// Extract meaningful keywords from conversation context.
1635/// Filters out common stop words and short tokens.
1636/// Public for external use (e.g., TUI keyword display).
1637pub fn extract_context_keywords(context: &str) -> Vec<String> {
1638    use std::collections::HashSet;
1639    
1640    // Common stop words (Chinese + English)
1641    let stop_words: HashSet<&str> = [
1642        // Chinese stop words
1643        "的", "了", "是", "在", "我", "有", "和", "就", "不", "人", "都", "一", "一个",
1644        "上", "也", "很", "到", "说", "要", "去", "你", "会", "着", "没有", "看", "好",
1645        "自己", "这", "他", "她", "它", "们", "那", "些", "什么", "怎么", "如何", "请",
1646        "能", "可以", "需要", "应该", "可能", "因为", "所以", "但是", "然后", "还是",
1647        "已经", "正在", "将要", "曾经", "一下", "一点", "一些", "所有", "每个", "任何",
1648        // English stop words
1649        "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
1650        "have", "has", "had", "do", "does", "did", "will", "would", "could",
1651        "should", "may", "might", "can", "shall", "to", "of", "in", "for",
1652        "on", "with", "at", "by", "from", "as", "into", "through", "during",
1653        "before", "after", "above", "below", "between", "and", "but", "or",
1654        "not", "no", "so", "if", "then", "than", "too", "very", "just",
1655        "this", "that", "these", "those", "it", "its", "i", "me", "my",
1656        "we", "our", "you", "your", "he", "his", "she", "her", "they", "their",
1657        "please", "help", "need", "want", "make", "get", "let", "use",
1658    ].iter().copied().collect();
1659    
1660    // Technical/meaningful patterns to extract (Chinese + English)
1661    let tech_patterns: HashSet<&str> = [
1662        // Technical terms (keep these even if short)
1663        "api", "cli", "gui", "tui", "web", "http", "json", "xml", "sql", "db",
1664        "git", "npm", "cargo", "rust", "js", "ts", "py", "go", "java", "cpp",
1665        "cpu", "gpu", "io", "fs", "os", "ui", "ux", "ai", "ml", "dl",
1666        // File extensions
1667        "rs", "js", "ts", "py", "go", "java", "c", "h", "cpp", "hpp",
1668        "json", "yaml", "yml", "toml", "md", "txt", "html", "css", "scss",
1669        // Short meaningful words
1670        "bug", "fix", "add", "new", "old", "use", "run", "build", "test",
1671        "code", "data", "file", "dir", "path", "name", "type", "value",
1672    ].iter().copied().collect();
1673    
1674    let lower = context.to_lowercase();
1675    let mut keywords: HashSet<String> = HashSet::new();
1676    
1677    // 1. Extract English words (space-separated)
1678    for word in lower.split_whitespace() {
1679        let cleaned = word.trim_matches(|c: char| !c.is_alphanumeric()).to_string();
1680        if cleaned.len() >= 2 && !stop_words.contains(cleaned.as_str()) {
1681            keywords.insert(cleaned.clone());
1682        }
1683        // Keep technical short words
1684        if tech_patterns.contains(cleaned.as_str()) {
1685            keywords.insert(cleaned);
1686        }
1687    }
1688    
1689    // 2. Extract Chinese words/phrases (2-4 character sequences)
1690    // Chinese characters are typically 3 bytes in UTF-8
1691    let chinese_chars: Vec<char> = lower
1692        .chars()
1693        .filter(|c| *c >= '\u{4E00}' && *c <= '\u{9FFF}')  // Chinese Unicode range
1694        .collect();
1695    
1696    // Extract 2-4 character Chinese sequences
1697    for window_size in 2..=4 {
1698        if chinese_chars.len() >= window_size {
1699            for window in chinese_chars.windows(window_size) {
1700                let phrase: String = window.iter().collect();
1701                // Skip if contains stop words
1702                let has_stop = stop_words.iter().any(|sw| phrase.contains(sw));
1703                if !has_stop && phrase.len() >= window_size {
1704                    keywords.insert(phrase);
1705                }
1706            }
1707        }
1708    }
1709    
1710    // 3. Extract specific patterns (project names, file names, etc.)
1711    // Look for common project/file patterns
1712    let patterns = [
1713        // File paths
1714        r"[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z]{1,4}",  // file.ext
1715        r"[a-zA-Z_][a-zA-Z0-9_]*\.[a-zA-Z_][a-zA-Z0-9_]*",  // module.submodule
1716        // CamelCase/snake_case identifiers
1717        r"[A-Z][a-z]+[A-Z][a-zA-Z]*",  // CamelCase
1718        r"[a-z][a-z0-9]*_[a-z][a-z0-9_]*",  // snake_case
1719        // Numbers with units
1720        r"[0-9]+[kKmMgGtT][bB]?",  // 4K, 100MB
1721    ];
1722    
1723    for pattern in patterns {
1724        if let Ok(re) = regex::Regex::new(pattern) {
1725            for cap in re.find_iter(&lower) {
1726                keywords.insert(cap.as_str().to_string());
1727            }
1728        }
1729    }
1730    
1731    // Convert to vector and sort by length (prefer longer, more specific keywords)
1732    let mut result: Vec<String> = keywords.into_iter().collect();
1733    result.sort_by(|a, b| b.len().cmp(&a.len()));
1734    
1735    // Take top keywords (avoid too many)
1736    result.truncate(15);
1737    
1738    result
1739}
1740
1741/// Compute relevance score of a memory entry to context keywords.
1742/// Returns 0.0-1.0 where 1.0 means highly relevant.
1743fn compute_relevance(entry: &MemoryEntry, context_keywords: &[String]) -> f64 {
1744    if context_keywords.is_empty() {
1745        return 0.0;
1746    }
1747    
1748    let content_lower = entry.content.to_lowercase();
1749    
1750    // Count how many context keywords appear in this entry
1751    let matches = context_keywords
1752        .iter()
1753        .filter(|kw| content_lower.contains(kw.as_str()))
1754        .count();
1755    
1756    // Normalize by total keywords (0.0-1.0)
1757    let keyword_score = matches as f64 / context_keywords.len() as f64;
1758    
1759    // Boost for tag matches
1760    let tag_matches = entry.tags
1761        .iter()
1762        .filter(|tag| {
1763            let tag_lower = tag.to_lowercase();
1764            context_keywords.iter().any(|kw| tag_lower.contains(kw.as_str()))
1765        })
1766        .count();
1767    
1768    let tag_score = if tag_matches > 0 { 0.2 } else { 0.0 };
1769    
1770    // Combined score (capped at 1.0)
1771    (keyword_score + tag_score).min(1.0)
1772}
1773
1774/// Detect if two memory contents have contradiction signals.
1775/// 
1776/// Contradiction patterns:
1777/// - Same verb/action but different object ("使用 PostgreSQL" vs "使用 MySQL")
1778/// - Negation patterns ("不用 X" vs "使用 X")
1779/// - Replacement patterns ("改用", "换成", "替换为")
1780fn has_contradiction_signal(old: &str, new: &str) -> bool {
1781    // Check for replacement/change keywords in new content
1782    let change_signals = [
1783        "改用", "换成", "替换", "改为", "切换到", "迁移到",
1784        "不再使用", "弃用", "放弃", "取消",
1785        "switched to", "replaced", "migrated to", "changed to",
1786        "no longer", "deprecated", "abandoned",
1787    ];
1788    
1789    for signal in &change_signals {
1790        if new.contains(signal) {
1791            return true;
1792        }
1793    }
1794    
1795    // Check for same action verb but different object
1796    // e.g., "决定使用 PostgreSQL" vs "决定使用 MySQL"
1797    let action_verbs = [
1798        "决定使用", "选择使用", "采用", "使用",
1799        "decided to use", "chose", "using", "adopted",
1800    ];
1801    
1802    for verb in &action_verbs {
1803        if old.contains(verb) && new.contains(verb) {
1804            // Both have the same action verb - likely a conflict
1805            // (if they were the same thing, has_similar would have caught it)
1806            return true;
1807        }
1808    }
1809    
1810    // Check for preference conflicts
1811    let pref_verbs = ["偏好", "喜欢", "prefer", "like"];
1812    for verb in &pref_verbs {
1813        if old.contains(verb) && new.contains(verb) {
1814            return true;
1815        }
1816    }
1817    
1818    false
1819}
1820
1821// ============================================================================
1822// AI-Based Memory Extraction
1823// ============================================================================
1824
1825/// Trait for memory extraction implementations.
1826#[async_trait::async_trait]
1827pub trait MemoryExtractor: Send + Sync {
1828    /// Extract memories from conversation text using AI.
1829    async fn extract(&self, text: &str, session_id: Option<&str>) -> Result<Vec<MemoryEntry>>;
1830    
1831    /// Get the model name used for extraction.
1832    fn model_name(&self) -> &str;
1833}
1834
1835/// AI-based memory extractor using a fast/cheap model.
1836pub struct AiMemoryExtractor {
1837    provider: Box<dyn crate::providers::Provider>,
1838    model: String,
1839}
1840
1841impl AiMemoryExtractor {
1842    /// Create a new AI memory extractor.
1843    pub fn new(provider: Box<dyn crate::providers::Provider>, model: String) -> Self {
1844        Self { provider, model }
1845    }
1846}
1847
1848/// System prompt for memory extraction.
1849const MEMORY_EXTRACT_SYSTEM_PROMPT: &str = r#"你是一个记忆提取助手。你的任务是从对话中识别并提取值得长期记忆的关键信息。
1850
1851记忆类型：
18521. Decision（决策）: 项目或技术选型的决定，如"决定使用 PostgreSQL"
18532. Preference（偏好）: 用户习惯或偏好，如"我喜欢用 vim"
18543. Solution（解决方案）: 解决问题的具体方法，如"通过添加 middleware 修复 bug"
18554. Finding（发现）: 重要发现或信息，如"API 端点在 /api/v2"
18565. Technical（技术）: 技术栈或框架信息，如"使用 React Query 做数据获取"
18576. Structure（结构）: 项目结构信息，如"入口文件是 src/index.ts"
1858
1859提取原则：
1860- 只提取有价值、可复用的信息
1861- 避免提取临时性、一次性信息
1862- 避免提取过于具体的代码细节
1863- 每条记忆应简洁明确（一句话）
1864- 最多提取 5 条记忆
1865
1866输出格式（严格 JSON）：
1867```json
1868{
1869  "memories": [
1870    {
1871      "category": "decision",
1872      "content": "决定使用 PostgreSQL 作为主数据库",
1873      "importance": 90
1874    },
1875    {
1876      "category": "preference", 
1877      "content": "用户偏好 TypeScript 而非 JavaScript",
1878      "importance": 70
1879    }
1880  ]
1881}
1882```
1883
1884如果没有值得记忆的内容，返回：
1885```json
1886{"memories": []}
1887```
1888
1889直接输出 JSON，不要加代码块包裹。"#;
1890
1891#[async_trait::async_trait]
1892impl MemoryExtractor for AiMemoryExtractor {
1893    async fn extract(&self, text: &str, session_id: Option<&str>) -> Result<Vec<MemoryEntry>> {
1894        use crate::providers::{ChatRequest, Message, MessageContent, Role};
1895        
1896        // Truncate text if too long (memory extraction focuses on key points)
1897        let truncated_text = if text.len() > 4000 {
1898            truncate_str(text, 4000)
1899        } else {
1900            text.to_string()
1901        };
1902        
1903        let request = ChatRequest {
1904            messages: vec![Message {
1905                role: Role::User,
1906                content: MessageContent::Text(format!(
1907                    "请从以下对话中提取值得记忆的关键信息：\n\n{}", 
1908                    truncated_text
1909                )),
1910            }],
1911            tools: vec![],  // No tools for memory extraction
1912            system: Some(MEMORY_EXTRACT_SYSTEM_PROMPT.to_string()),
1913            think: false,   // No extended thinking
1914            max_tokens: 512, // Short response
1915            server_tools: vec![],
1916            enable_caching: false,
1917        };
1918        
1919        let response = self.provider.chat(request).await?;
1920        
1921        // Extract text from response
1922        let response_text = response.content
1923            .iter()
1924            .filter_map(|block| {
1925                if let crate::providers::ContentBlock::Text { text } = block {
1926                    Some(text.clone())
1927                } else {
1928                    None
1929                }
1930            })
1931            .collect::<Vec<_>>()
1932            .join("");
1933        
1934        // Parse JSON response
1935        parse_memory_response(&response_text, session_id)
1936    }
1937    
1938    fn model_name(&self) -> &str {
1939        &self.model
1940    }
1941}
1942
1943/// Parse AI response into memory entries.
1944fn parse_memory_response(json_text: &str, session_id: Option<&str>) -> Result<Vec<MemoryEntry>> {
1945    // Clean up response (remove possible markdown code blocks)
1946    let cleaned = json_text
1947        .trim()
1948        .trim_start_matches("```json")
1949        .trim_start_matches("```")
1950        .trim_end_matches("```")
1951        .trim();
1952    
1953    // Parse JSON
1954    #[derive(serde::Deserialize)]
1955    struct MemoryResponse {
1956        memories: Vec<MemoryItem>,
1957    }
1958    
1959    #[derive(serde::Deserialize)]
1960    struct MemoryItem {
1961        category: String,
1962        content: String,
1963        #[serde(default)]
1964        importance: f64,
1965    }
1966    
1967    let parsed: MemoryResponse = serde_json::from_str(cleaned)?;
1968    
1969    // Convert to MemoryEntry
1970    let entries = parsed.memories
1971        .into_iter()
1972        .filter_map(|item| {
1973            // Parse category
1974            let category = match item.category.to_lowercase().as_str() {
1975                "decision" => MemoryCategory::Decision,
1976                "preference" => MemoryCategory::Preference,
1977                "solution" => MemoryCategory::Solution,
1978                "finding" => MemoryCategory::Finding,
1979                "technical" => MemoryCategory::Technical,
1980                "structure" => MemoryCategory::Structure,
1981                _ => return None,  // Skip unknown categories
1982            };
1983            
1984            // Skip too short content
1985            if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
1986                return None;
1987            }
1988            
1989            // Create entry with AI-suggested importance or default
1990            let mut entry = MemoryEntry::new(
1991                category,
1992                item.content,
1993                session_id.map(|s| s.to_string()),
1994            );
1995            
1996            // Override importance if AI suggested a value
1997            if item.importance > 0.0 {
1998                entry.importance = item.importance.clamp(0.0, 100.0);
1999            }
2000            
2001            Some(entry)
2002        })
2003        .collect();
2004    
2005    // Deduplicate and limit
2006    Ok(deduplicate_entries(entries))
2007}
2008
2009// ============================================================================
2010// AI-Based Keyword Extraction (for context-aware memory retrieval)
2011// ============================================================================
2012
2013/// System prompt for AI keyword extraction.
2014const KEYWORD_EXTRACT_SYSTEM_PROMPT: &str = r#"你是一个关键词提取助手。你的任务是从用户输入中提取有意义的关键词，用于检索相关记忆。
2015
2016提取原则：
20171. 只提取有实际意义的词汇（技术名词、项目名、概念等）
20182. 过滤掉常见的停用词（的、是、在、我、你、the、a、is 等）
20193. 保留专有名词和技术术语
20204. 中英文混合输入时，两种语言的关键词都提取
20215. 提取 3-10 个关键词
2022
2023输出格式（严格 JSON）：
2024```json
2025{
2026  "keywords": ["数据库", "PostgreSQL", "优化", "查询"]
2027}
2028```
2029
2030如果没有有意义的关键词，返回：
2031```json
2032{"keywords": []}
2033```
2034
2035直接输出 JSON，不要加代码块包裹。"#;
2036
2037/// Extract keywords from context using AI (for context-aware memory retrieval).
2038/// 
2039/// This is used when the rule-based keyword extraction produces too few results
2040/// or when the context is complex and needs better understanding.
2041pub async fn extract_keywords_with_ai(
2042    context: &str,
2043    provider: &dyn crate::providers::Provider,
2044) -> Result<Vec<String>> {
2045    use crate::providers::{ChatRequest, Message, MessageContent, Role};
2046    
2047    // Truncate if too long
2048    let truncated = if context.len() > 1000 {
2049        truncate_str(context, 1000)
2050    } else {
2051        context.to_string()
2052    };
2053    
2054    let request = ChatRequest {
2055        messages: vec![Message {
2056            role: Role::User,
2057            content: MessageContent::Text(format!(
2058                "请从以下文本中提取关键词：\n\n{}", 
2059                truncated
2060            )),
2061        }],
2062        tools: vec![],
2063        system: Some(KEYWORD_EXTRACT_SYSTEM_PROMPT.to_string()),
2064        think: false,
2065        max_tokens: 256,
2066        server_tools: vec![],
2067        enable_caching: false,
2068    };
2069    
2070    let response = provider.chat(request).await?;
2071    
2072    // Extract text from response
2073    let response_text = response.content
2074        .iter()
2075        .filter_map(|block| {
2076            if let crate::providers::ContentBlock::Text { text } = block {
2077                Some(text.clone())
2078            } else {
2079                None
2080            }
2081        })
2082        .collect::<Vec<_>>()
2083        .join("");
2084    
2085    // Parse JSON response
2086    parse_keyword_response(&response_text)
2087}
2088
2089/// Parse AI keyword extraction response.
2090fn parse_keyword_response(json_text: &str) -> Result<Vec<String>> {
2091    // Clean up response
2092    let cleaned = json_text
2093        .trim()
2094        .trim_start_matches("```json")
2095        .trim_start_matches("```")
2096        .trim_end_matches("```")
2097        .trim();
2098    
2099    #[derive(serde::Deserialize)]
2100    struct KeywordResponse {
2101        keywords: Vec<String>,
2102    }
2103    
2104    let parsed: KeywordResponse = serde_json::from_str(cleaned)?;
2105    
2106    // Filter out empty or too-short keywords
2107    Ok(parsed.keywords
2108        .into_iter()
2109        .filter(|k| k.len() >= 2)
2110        .collect())
2111}
2112
2113/// Extract keywords from context with hybrid approach.
2114/// 
2115/// Strategy:
2116/// 1. First use rule-based stop word filtering (fast, zero cost)
2117/// 2. If result is insufficient (too few keywords), fall back to AI extraction
2118/// 3. Behavior controlled by MEMORY_AI_KEYWORDS env var (auto/always/never)
2119pub async fn extract_keywords_hybrid(
2120    context: &str,
2121    fast_provider: Option<&dyn crate::providers::Provider>,
2122) -> Vec<String> {
2123    // Get AI keyword extraction mode from environment
2124    let mode = AiKeywordMode::from_env();
2125    
2126    // If mode is Never, skip AI entirely
2127    if mode == AiKeywordMode::Never {
2128        return extract_context_keywords(context);
2129    }
2130    
2131    // Step 1: Try rule-based extraction first (unless mode is Always)
2132    let keywords = if mode == AiKeywordMode::Always {
2133        Vec::new()  // Skip rule-based when Always mode
2134    } else {
2135        extract_context_keywords(context)
2136    };
2137    
2138    // Step 2: Check if we should use AI based on mode and keyword count
2139    if !mode.should_use_ai(keywords.len()) {
2140        return keywords;
2141    }
2142    
2143    // Step 3: If we should use AI and have a provider, do AI extraction
2144    if let Some(provider) = fast_provider {
2145        match extract_keywords_with_ai(context, provider).await {
2146            Ok(ai_keywords) if !ai_keywords.is_empty() => {
2147                log::debug!("AI extracted {} keywords: {:?}", ai_keywords.len(), ai_keywords);
2148                // In Auto mode, merge AI keywords with rule-based ones
2149                if mode == AiKeywordMode::Auto && !keywords.is_empty() {
2150                    let merged = keywords
2151                        .into_iter()
2152                        .chain(ai_keywords.into_iter())
2153                        .collect::<std::collections::HashSet<_>>();
2154                    return merged.into_iter().collect();
2155                }
2156                return ai_keywords;
2157            }
2158            Ok(_) => {
2159                log::debug!("AI returned no keywords, keeping rule-based results");
2160            }
2161            Err(e) => {
2162                log::warn!("AI keyword extraction failed: {}, keeping rule-based results", e);
2163            }
2164        }
2165    }
2166    
2167    // Return whatever we have (rule-based results)
2168    keywords
2169}
2170
2171// ============================================================================
2172// AI-Enhanced Memory Processing
2173// ============================================================================
2174
2175/// System prompt for AI memory summarization.
2176const MEMORY_SUMMARY_SYSTEM_PROMPT: &str = r#"你是一个记忆摘要助手。你的任务是将多条相关记忆合并为一条精炼的摘要记忆。
2177
2178摘要原则：
21791. 保留核心信息，去除冗余细节
21802. 使用简洁明确的一句话表达
21813. 保留关键的技术名词和决策结论
21824. 如果多条记忆主题相同，合并为一条综合性记忆
21835. 优先保留高价值的决策和解决方案
2184
2185输出格式（严格 JSON）：
2186```json
2187{
2188  "summary": "决定使用 PostgreSQL 作为主数据库，Redis 作为缓存层",
2189  "category": "decision",
2190  "importance": 90
2191}
2192```
2193
2194如果没有值得保留的信息，返回：
2195```json
2196{"summary": "", "category": "", "importance": 0}
2197```
2198
2199直接输出 JSON，不要加代码块包裹。"#;
2200
2201/// System prompt for AI conflict detection.
2202const MEMORY_CONFLICT_SYSTEM_PROMPT: &str = r#"你是一个记忆冲突检测助手。你的任务是判断两条记忆是否矛盾或需要更新。
2203
2204冲突类型：
22051. 直接矛盾：两条记忆结论相反（如"使用 PostgreSQL" vs "使用 MySQL"）
22062. 过时更新：新记忆明确替换旧记忆（如"改用 Redis" 替换 "使用 Memcached"）
22073. 补充关系：新记忆补充旧记忆（如"PostgreSQL 版本为 15" 补充 "使用 PostgreSQL"）
22084. 无关关系：两条记忆主题不同，不冲突
2209
2210输出格式（严格 JSON）：
2211```json
2212{
2213  "conflict_type": "direct_conflict",
2214  "should_replace": true,
2215  "reason": "两条记忆都是数据库选型决策，但选择了不同的数据库",
2216  "winner": "new"
2217}
2218```
2219
2220conflict_type 可选值：
2221- "direct_conflict": 直接矛盾，需要选择一条
2222- "outdated_update": 过时更新，新记忆替换旧记忆
2223- "supplement": 补充关系，两者可共存
2224- "no_conflict": 无关关系，不冲突
2225
2226should_replace: true 表示需要替换旧记忆，false 表示保留两者
2227winner: "new" 表示新记忆胜出，"old" 表示旧记忆胜出（仅在 direct_conflict 时有意义）
2228
2229直接输出 JSON，不要加代码块包裹。"#;
2230
2231/// System prompt for AI memory quality assessment.
2232const MEMORY_QUALITY_SYSTEM_PROMPT: &str = r#"你是一个记忆质量评估助手。你的任务是评估记忆的长期价值和重要程度。
2233
2234评估维度：
22351. 复用价值：这条信息在未来的���话中会被引用吗？
22362. 决策权重：这是重要的项目决策还是次要细节？
22373. 时效性：这条信息会很快过时吗？
22384. 独特性：这条信息是否足够独特，不与其他记忆重叠？
2239
2240评分标准：
2241- 90-100: 核心决策，长期有效，高复用价值（如数据库选型、框架选择）
2242- 70-89: 重要偏好或解决方案，中等复用价值
2243- 50-69: 有用的技术信息或发现，时效性中等
2244- 30-49: 一般性信息，复用价值较低
2245- 0-29: 过时或过于具体的细节，建议丢弃
2246
2247输出格式（严格 JSON）：
2248```json
2249{
2250  "quality_score": 85,
2251  "reason": "这是核心的技术选型决策，长期有效，高复用价值",
2252  "should_keep": true,
2253  "suggested_category": "decision"
2254}
2255```
2256
2257直接输出 JSON，不要加代码块包裹。"#;
2258
2259/// System prompt for AI memory merge.
2260const MEMORY_MERGE_SYSTEM_PROMPT: &str = r#"你是一个记忆合并助手。你的任务是将多条相似或相关的记忆合并为一条精炼的记忆。
2261
2262合并原则：
22631. 相同主题的记忆应合并为一条综合性记忆
22642. 保留所有关键信息，去除重复内容
22653. 使用简洁的一句话表达
22664. 合并后的记忆应比原记忆更全面但更简洁
22675. 如果记忆完全不相关，返回空结果表示不应合并
2268
2269输出格式（严格 JSON）：
2270```json
2271{
2272  "merged_content": "使用 PostgreSQL 作为主数据库（版本15），Redis 作为缓存层，通过连接池优化性能",
2273  "category": "technical",
2274  "importance": 75,
2275  "merged_from_count": 3,
2276  "summary_reason": "三条记忆都与数据库和缓存技术栈相关，合并为一条综合性技术栈记忆"
2277}
2278```
2279
2280如果不应合并，返回：
2281```json
2282{"merged_content": "", "category": "", "importance": 0, "merged_from_count": 0, "summary_reason": "记忆主题不同，不应合并"}
2283```
2284
2285直接输出 JSON，不要加代码块包裹。"#;
2286
2287/// Result of AI memory summarization.
2288#[derive(Debug, Clone, serde::Deserialize)]
2289pub struct MemorySummaryResult {
2290    pub summary: String,
2291    pub category: String,
2292    pub importance: f64,
2293}
2294
2295/// Result of AI conflict detection.
2296#[derive(Debug, Clone, serde::Deserialize)]
2297pub struct MemoryConflictResult {
2298    pub conflict_type: String,
2299    pub should_replace: bool,
2300    pub reason: String,
2301    pub winner: Option<String>,
2302}
2303
2304/// Result of AI quality assessment.
2305#[derive(Debug, Clone, serde::Deserialize)]
2306pub struct MemoryQualityResult {
2307    pub quality_score: f64,
2308    pub reason: String,
2309    pub should_keep: bool,
2310    pub suggested_category: Option<String>,
2311}
2312
2313/// Result of AI memory merge.
2314#[derive(Debug, Clone, serde::Deserialize)]
2315pub struct MemoryMergeResult {
2316    pub merged_content: String,
2317    pub category: String,
2318    pub importance: f64,
2319    pub merged_from_count: usize,
2320    pub summary_reason: String,
2321}
2322
2323/// AI-enhanced memory processor.
2324/// Provides advanced memory operations using AI.
2325pub struct AiMemoryProcessor {
2326    provider: Box<dyn crate::providers::Provider>,
2327    model: String,
2328}
2329
2330impl AiMemoryProcessor {
2331    /// Create a new AI memory processor.
2332    pub fn new(provider: Box<dyn crate::providers::Provider>, model: String) -> Self {
2333        Self { provider, model }
2334    }
2335    
2336    /// Summarize multiple memories into one concise memory.
2337    pub async fn summarize_memories(&self, memories: &[&MemoryEntry]) -> Result<Option<MemoryEntry>> {
2338        if memories.is_empty() {
2339            return Ok(None);
2340        }
2341        
2342        // Build input from memories
2343        let memories_text = memories
2344            .iter()
2345            .map(|m| format!("[{}] {}", m.category.display_name(), m.content))
2346            .collect::<Vec<_>>()
2347            .join("\n");
2348        
2349        let request = build_ai_request(
2350            MEMORY_SUMMARY_SYSTEM_PROMPT,
2351            &format!("请将以下记忆合并为一条精炼的摘要：\n\n{}", memories_text),
2352        );
2353        
2354        let response = self.provider.chat(request).await?;
2355        let response_text = extract_response_text(&response);
2356        
2357        let result: MemorySummaryResult = parse_json_response(&response_text)?;
2358        
2359        if result.summary.is_empty() {
2360            return Ok(None);
2361        }
2362        
2363        let category = parse_category(&result.category)?;
2364        let mut entry = MemoryEntry::new(category, result.summary, None);
2365        entry.importance = result.importance.clamp(0.0, 100.0);
2366        
2367        Ok(Some(entry))
2368    }
2369    
2370    /// Detect if two memories conflict using AI.
2371    pub async fn detect_conflict(&self, old: &MemoryEntry, new: &MemoryEntry) -> Result<MemoryConflictResult> {
2372        let input = format!(
2373            "旧记忆：[{}] {}\n新记忆：[{}] {}\n\n请判断这两条记忆是否存在冲突。",
2374            old.category.display_name(),
2375            old.content,
2376            new.category.display_name(),
2377            new.content
2378        );
2379        
2380        let request = build_ai_request(MEMORY_CONFLICT_SYSTEM_PROMPT, &input);
2381        let response = self.provider.chat(request).await?;
2382        let response_text = extract_response_text(&response);
2383        
2384        parse_json_response(&response_text)
2385    }
2386    
2387    /// Assess memory quality using AI.
2388    pub async fn assess_quality(&self, memory: &MemoryEntry) -> Result<MemoryQualityResult> {
2389        let input = format!(
2390            "记忆内容：[{}] {}\n\n请评估这条记忆的质量和长期价值。",
2391            memory.category.display_name(),
2392            memory.content
2393        );
2394        
2395        let request = build_ai_request(MEMORY_QUALITY_SYSTEM_PROMPT, &input);
2396        let response = self.provider.chat(request).await?;
2397        let response_text = extract_response_text(&response);
2398        
2399        parse_json_response(&response_text)
2400    }
2401    
2402    /// Merge multiple memories using AI.
2403    pub async fn merge_memories(&self, memories: &[&MemoryEntry]) -> Result<Option<MemoryEntry>> {
2404        if memories.len() < 2 {
2405            return Ok(None);
2406        }
2407        
2408        let memories_text = memories
2409            .iter()
2410            .map(|m| format!("[{}] {}", m.category.display_name(), m.content))
2411            .collect::<Vec<_>>()
2412            .join("\n");
2413        
2414        let request = build_ai_request(
2415            MEMORY_MERGE_SYSTEM_PROMPT,
2416            &format!("请判断以下记忆是否应该合并，如果应该则生成合并后的记忆：\n\n{}", memories_text),
2417        );
2418        
2419        let response = self.provider.chat(request).await?;
2420        let response_text = extract_response_text(&response);
2421        
2422        let result: MemoryMergeResult = parse_json_response(&response_text)?;
2423        
2424        if result.merged_content.is_empty() || result.merged_from_count == 0 {
2425            return Ok(None);
2426        }
2427        
2428        let category = parse_category(&result.category)?;
2429        let mut entry = MemoryEntry::new(category, result.merged_content, None);
2430        entry.importance = result.importance.clamp(0.0, 100.0);
2431        
2432        Ok(Some(entry))
2433    }
2434    
2435    /// Get the model name.
2436    pub fn model_name(&self) -> &str {
2437        &self.model
2438    }
2439}
2440
2441/// Build a standard AI request for memory processing.
2442fn build_ai_request(system_prompt: &str, user_input: &str) -> crate::providers::ChatRequest {
2443    use crate::providers::{ChatRequest, Message, MessageContent, Role};
2444    
2445    ChatRequest {
2446        messages: vec![Message {
2447            role: Role::User,
2448            content: MessageContent::Text(user_input.to_string()),
2449        }],
2450        tools: vec![],
2451        system: Some(system_prompt.to_string()),
2452        think: false,
2453        max_tokens: 512,
2454        server_tools: vec![],
2455        enable_caching: false,
2456    }
2457}
2458
2459/// Extract text from AI response.
2460fn extract_response_text(response: &crate::providers::ChatResponse) -> String {
2461    response.content
2462        .iter()
2463        .filter_map(|block| {
2464            if let crate::providers::ContentBlock::Text { text } = block {
2465                Some(text.clone())
2466            } else {
2467                None
2468            }
2469        })
2470        .collect::<Vec<_>>()
2471        .join("")
2472}
2473
2474/// Parse JSON response with cleanup.
2475fn parse_json_response<T: serde::de::DeserializeOwned>(json_text: &str) -> Result<T> {
2476    let cleaned = json_text
2477        .trim()
2478        .trim_start_matches("```json")
2479        .trim_start_matches("```")
2480        .trim_end_matches("```")
2481        .trim();
2482    
2483    serde_json::from_str(cleaned).map_err(|e| anyhow::anyhow!("JSON parse error: {}", e))
2484}
2485
2486/// Parse category string to MemoryCategory.
2487fn parse_category(s: &str) -> Result<MemoryCategory> {
2488    match s.to_lowercase().as_str() {
2489        "decision" | "决策" => Ok(MemoryCategory::Decision),
2490        "preference" | "偏好" => Ok(MemoryCategory::Preference),
2491        "solution" | "解决方案" => Ok(MemoryCategory::Solution),
2492        "finding" | "发现" => Ok(MemoryCategory::Finding),
2493        "technical" | "技术" => Ok(MemoryCategory::Technical),
2494        "structure" | "结构" => Ok(MemoryCategory::Structure),
2495        _ => anyhow::bail!("Unknown category: {}", s),
2496    }
2497}
2498
2499/// Configuration for AI-enhanced memory processing.
2500#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
2501pub struct AiMemoryConfig {
2502    /// Enable AI summarization.
2503    pub enable_summarization: bool,
2504    /// Enable AI conflict detection.
2505    pub enable_conflict_detection: bool,
2506    /// Enable AI quality assessment.
2507    pub enable_quality_assessment: bool,
2508    /// Enable AI memory merging.
2509    pub enable_merging: bool,
2510    /// Minimum memories to trigger summarization.
2511    pub summarize_threshold: usize,
2512    /// Quality threshold for keeping memories.
2513    pub quality_threshold: f64,
2514    /// Similarity threshold for merging.
2515    pub merge_similarity_threshold: f64,
2516}
2517
2518impl Default for AiMemoryConfig {
2519    fn default() -> Self {
2520        Self {
2521            enable_summarization: true,
2522            enable_conflict_detection: true,
2523            enable_quality_assessment: false,  // Optional, can be expensive
2524            enable_merging: true,
2525            summarize_threshold: 5,
2526            quality_threshold: 30.0,
2527            merge_similarity_threshold: 0.6,
2528        }
2529    }
2530}
2531
2532impl AiMemoryConfig {
2533    /// Create a minimal config (disable all AI features).
2534    pub fn minimal() -> Self {
2535        Self {
2536            enable_summarization: false,
2537            enable_conflict_detection: false,
2538            enable_quality_assessment: false,
2539            enable_merging: false,
2540            summarize_threshold: 10,
2541            quality_threshold: 20.0,
2542            merge_similarity_threshold: 0.8,
2543        }
2544    }
2545    
2546    /// Create an aggressive config (enable all AI features).
2547    pub fn aggressive() -> Self {
2548        Self {
2549            enable_summarization: true,
2550            enable_conflict_detection: true,
2551            enable_quality_assessment: true,
2552            enable_merging: true,
2553            summarize_threshold: 3,
2554            quality_threshold: 40.0,
2555            merge_similarity_threshold: 0.5,
2556        }
2557    }
2558    
2559    /// Parse from environment variable.
2560    pub fn from_env() -> Self {
2561        let enable_all = std::env::var("MEMORY_AI_ALL")
2562            .map(|v| v == "true" || v == "1")
2563            .unwrap_or(false);
2564        
2565        if enable_all {
2566            return Self::aggressive();
2567        }
2568        
2569        Self {
2570            enable_summarization: std::env::var("MEMORY_AI_SUMMARY")
2571                .map(|v| v != "false" && v != "0")
2572                .unwrap_or(true),
2573            enable_conflict_detection: std::env::var("MEMORY_AI_CONFLICT")
2574                .map(|v| v != "false" && v != "0")
2575                .unwrap_or(true),
2576            enable_quality_assessment: std::env::var("MEMORY_AI_QUALITY")
2577                .map(|v| v == "true" || v == "1")
2578                .unwrap_or(false),
2579            enable_merging: std::env::var("MEMORY_AI_MERGE")
2580                .map(|v| v != "false" && v != "0")
2581                .unwrap_or(true),
2582            summarize_threshold: std::env::var("MEMORY_SUMMARY_THRESHOLD")
2583                .and_then(|v| v.parse().map_err(|_| std::env::VarError::NotPresent))
2584                .unwrap_or(5),
2585            quality_threshold: std::env::var("MEMORY_QUALITY_THRESHOLD")
2586                .and_then(|v| v.parse().map_err(|_| std::env::VarError::NotPresent))
2587                .unwrap_or(30.0),
2588            merge_similarity_threshold: std::env::var("MEMORY_MERGE_THRESHOLD")
2589                .and_then(|v| v.parse().map_err(|_| std::env::VarError::NotPresent))
2590                .unwrap_or(0.6),
2591        }
2592    }
2593}
2594
2595/// Extended AutoMemory with AI-enhanced operations.
2596impl AutoMemory {
2597    /// Add memory with AI conflict detection.
2598    pub async fn add_memory_with_ai_conflict(
2599        &mut self,
2600        category: MemoryCategory,
2601        content: String,
2602        source_session: Option<String>,
2603        processor: Option<&AiMemoryProcessor>,
2604    ) -> Result<()> {
2605        // Check for duplicates first (rule-based, fast)
2606        if self.has_similar(&content) {
2607            return Ok(());
2608        }
2609        
2610        // Create new entry
2611        let new_entry = MemoryEntry::new(category, content.clone(), source_session);
2612        
2613        // Find potential conflicts (same category, similar topic)
2614        let potential_conflicts: Vec<(usize, &MemoryEntry)> = self.entries
2615            .iter()
2616            .enumerate()
2617            .filter(|(_, e)| {
2618                e.category == category && 
2619                Self::calculate_similarity(&e.content.to_lowercase(), &content.to_lowercase()) > 0.3
2620            })
2621            .collect();
2622        
2623        if let Some(processor) = processor {
2624            // Use AI to check each potential conflict
2625            for (idx, old_entry) in potential_conflicts {
2626                let result = processor.detect_conflict(old_entry, &new_entry).await?;
2627                
2628                if result.should_replace {
2629                    log::debug!("AI detected conflict: {} -> replacing '{}' with '{}'", 
2630                        result.conflict_type, old_entry.content, content);
2631                    self.entries.remove(idx);
2632                    self.invalidate_index();
2633                    break;
2634                }
2635            }
2636        } else {
2637            // Fallback to rule-based conflict detection
2638            if let Some(conflict_idx) = self.find_conflict(&content, category) {
2639                self.entries.remove(conflict_idx);
2640                self.invalidate_index();
2641            }
2642        }
2643        
2644        self.add(new_entry);
2645        Ok(())
2646    }
2647    
2648    /// Assess and filter memories by quality using AI.
2649    pub async fn assess_quality_with_ai(
2650        &mut self,
2651        processor: &AiMemoryProcessor,
2652        config: &AiMemoryConfig,
2653    ) -> Result<usize> {
2654        if !config.enable_quality_assessment {
2655            return Ok(0);
2656        }
2657        
2658        // Collect indices of non-manual entries first
2659        let indices_to_assess: Vec<usize> = self.entries
2660            .iter()
2661            .enumerate()
2662            .filter(|(_, entry)| !entry.is_manual)
2663            .map(|(i, _)| i)
2664            .collect();
2665        
2666        // Assess each entry and collect results
2667        let mut to_remove: Vec<usize> = Vec::new();
2668        let mut importance_updates: Vec<(usize, f64)> = Vec::new();
2669        
2670        for i in indices_to_assess {
2671            let entry = &self.entries[i];
2672            let result = processor.assess_quality(entry).await?;
2673            
2674            if !result.should_keep || result.quality_score < config.quality_threshold {
2675                log::debug!("AI quality assessment: removing '{}' (score: {:.1}, reason: {})",
2676                    entry.content, result.quality_score, result.reason);
2677                to_remove.push(i);
2678            } else {
2679                // Record importance update
2680                importance_updates.push((i, result.quality_score));
2681            }
2682        }
2683        
2684        // Apply importance updates
2685        for (i, score) in importance_updates {
2686            self.entries[i].importance = score;
2687        }
2688        
2689        let removed_count = to_remove.len();
2690        
2691        // Remove low-quality entries (in reverse order to preserve indices)
2692        for idx in to_remove.into_iter().rev() {
2693            self.entries.remove(idx);
2694        }
2695        
2696        if removed_count > 0 {
2697            self.invalidate_index();
2698            self.prune();
2699        }
2700        
2701        Ok(removed_count)
2702    }
2703    
2704    /// Merge similar memories using AI.
2705    pub async fn merge_similar_with_ai(
2706        &mut self,
2707        processor: &AiMemoryProcessor,
2708        config: &AiMemoryConfig,
2709    ) -> Result<usize> {
2710        if !config.enable_merging || self.entries.len() < 2 {
2711            return Ok(0);
2712        }
2713        
2714        let mut merged_count = 0;
2715        let mut to_remove: Vec<usize> = Vec::new();
2716        let mut new_entries: Vec<MemoryEntry> = Vec::new();
2717        
2718        // Find groups of similar memories
2719        let mut processed: std::collections::HashSet<usize> = std::collections::HashSet::new();
2720        
2721        for i in 0..self.entries.len() {
2722            if processed.contains(&i) {
2723                continue;
2724            }
2725            
2726            // Find similar entries to this one
2727            let mut similar_group: Vec<usize> = vec![i];
2728            
2729            for j in (i + 1)..self.entries.len() {
2730                if processed.contains(&j) {
2731                    continue;
2732                }
2733                
2734                let sim = Self::calculate_similarity(
2735                    &self.entries[i].content.to_lowercase(),
2736                    &self.entries[j].content.to_lowercase(),
2737                );
2738                
2739                if sim >= config.merge_similarity_threshold {
2740                    similar_group.push(j);
2741                }
2742            }
2743            
2744            // If we have a group, try to merge
2745            if similar_group.len() >= 2 {
2746                let group_entries: Vec<&MemoryEntry> = similar_group
2747                    .iter()
2748                    .map(|&idx| &self.entries[idx])
2749                    .collect();
2750                
2751                if let Some(merged) = processor.merge_memories(&group_entries).await? {
2752                    log::debug!("AI merged {} memories into: '{}'",
2753                        similar_group.len(), merged.content);
2754                    
2755                    new_entries.push(merged);
2756                    to_remove.extend(similar_group.iter().copied());
2757                    processed.extend(similar_group.iter().copied());
2758                    merged_count += similar_group.len() - 1;
2759                }
2760            }
2761        }
2762        
2763        // Remove merged entries (sorted and in reverse order)
2764        let mut sorted_remove: Vec<usize> = to_remove;
2765        sorted_remove.sort();
2766        for idx in sorted_remove.into_iter().rev() {
2767            self.entries.remove(idx);
2768        }
2769        
2770        // Add new merged entries
2771        for entry in new_entries {
2772            self.entries.push(entry);
2773        }
2774        
2775        if merged_count > 0 {
2776            self.invalidate_index();
2777            self.prune();
2778        }
2779        
2780        Ok(merged_count)
2781    }
2782    
2783    /// Generate AI-enhanced summary for prompt.
2784    pub async fn generate_ai_summary(
2785        &self,
2786        max_entries: usize,
2787        processor: Option<&AiMemoryProcessor>,
2788        config: Option<&AiMemoryConfig>,
2789    ) -> Result<String> {
2790        if self.entries.is_empty() {
2791            return Ok(String::new());
2792        }
2793        
2794        let default_config = AiMemoryConfig::default();
2795        let config = config.unwrap_or(&default_config);
2796        
2797        // If AI summarization is enabled and we have a processor
2798        if config.enable_summarization
2799            && let Some(processor) = processor
2800            && self.entries.len() >= config.summarize_threshold
2801        {
2802            
2803            // Group by category
2804            let mut by_category: HashMap<MemoryCategory, Vec<&MemoryEntry>> = HashMap::new();
2805            for entry in &self.entries {
2806                by_category.entry(entry.category).or_default().push(entry);
2807            }
2808            
2809            let mut summary = String::from("【跨会话记忆 (AI摘要)】\n\n");
2810            
2811            for (cat, entries) in by_category {
2812                if entries.is_empty() {
2813                    continue;
2814                }
2815                
2816                // Get top entries by importance
2817                let top_entries: Vec<&MemoryEntry> = entries
2818                    .iter()
2819                    .take(max_entries.min(entries.len()))
2820                    .copied()
2821                    .collect();
2822                
2823                // Try AI summarization for this category
2824                if let Some(ai_summary) = processor.summarize_memories(&top_entries).await? {
2825                    summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
2826                    summary.push_str(&format!("  {}\n\n", ai_summary.content));
2827                } else {
2828                    // Fallback to individual entries
2829                    summary.push_str(&format!("{} {}:\n", cat.icon(), cat.display_name()));
2830                    for entry in top_entries {
2831                        summary.push_str(&format!("  {}\n", entry.format_for_prompt()));
2832                    }
2833                    summary.push('\n');
2834                }
2835            }
2836            
2837            Ok(summary)
2838        } else {
2839            // Fallback to rule-based summary
2840            Ok(self.generate_contextual_summary("", max_entries))
2841        }
2842    }
2843}
2844
2845
2846
2847// ============================================================================
2848// Memory Detection (Fallback - Rule-based)
2849// ============================================================================
2850
2851/// Detect potential memory entries from conversation content.
2852/// This is the fallback method using rule-based detection (no AI).
2853/// For AI-based extraction, use AiMemoryExtractor.
2854pub fn detect_memories_fallback(text: &str, session_id: Option<&str>) -> Vec<MemoryEntry> {
2855    let mut entries = Vec::new();
2856    let text_lower = text.to_lowercase();
2857
2858    // Detection patterns for each category (filtered to avoid too generic keywords)
2859    let patterns: Vec<(MemoryCategory, Vec<&str>)> = vec![
2860        (MemoryCategory::Decision, vec![
2861            "决定", "决定使用", "选择使用", "采用", "decided to", "decision to", 
2862            "chose to", "adopted", "选定", "最终选择",
2863        ]),
2864        (MemoryCategory::Preference, vec![
2865            "我喜欢", "我偏好", "prefer to", "i prefer", "my preference is",
2866            "习惯用", "我习惯", "usually prefer", "偏好使用",
2867        ]),
2868        (MemoryCategory::Solution, vec![
2869            "修复了", "解决了", "fixed by", "solved by", "resolved by", 
2870            "通过添加", "通过修改", "通过删除", "解决方法是",
2871        ]),
2872        (MemoryCategory::Finding, vec![
2873            "发现", "注意到", "found that", "noticed that", "discovered", 
2874            "观察到", "api 端点", "位于", "located at", "关键发现",
2875        ]),
2876        (MemoryCategory::Technical, vec![
2877            "使用框架", "using framework", "built with", "基于", 
2878            "框架是", "技术栈", "依赖库",
2879        ]),
2880        (MemoryCategory::Structure, vec![
2881            "入口文件", "entry point is", "主文件是", "main file", 
2882            "配置文件", "config file", "核心文件",
2883        ]),
2884    ];
2885
2886    for (category, keywords) in patterns {
2887        for keyword in keywords {
2888            if text_lower.contains(keyword) {
2889                // Extract the relevant sentence or phrase
2890                let content = extract_memory_content(text, keyword);
2891                // Use higher threshold to avoid too generic content
2892                if !content.is_empty() && content.len() >= MIN_MEMORY_CONTENT_LENGTH {
2893                    let entry = MemoryEntry::new(
2894                        category,
2895                        content,
2896                        session_id.map(|s| s.to_string()),
2897                    );
2898                    entries.push(entry);
2899                }
2900            }
2901        }
2902    }
2903
2904    // Deduplicate by content similarity
2905    deduplicate_entries(entries)
2906}
2907
2908/// Detect memories from text using the rule-based fallback method.
2909/// This is kept for backward compatibility and for cases where AI is unavailable.
2910pub fn detect_memories_from_text(text: &str, session_id: Option<&str>) -> Vec<MemoryEntry> {
2911    detect_memories_fallback(text, session_id)
2912}
2913
2914/// Detect memories asynchronously using AI extractor.
2915/// Falls back to rule-based detection if AI fails or is unavailable.
2916pub async fn detect_memories_with_ai(
2917    text: &str,
2918    session_id: Option<&str>,
2919    extractor: Option<&dyn MemoryExtractor>,
2920) -> Result<Vec<MemoryEntry>> {
2921    if let Some(ai_extractor) = extractor {
2922        // Try AI extraction first
2923        match ai_extractor.extract(text, session_id).await {
2924            Ok(entries) if !entries.is_empty() => {
2925                return Ok(entries);
2926            }
2927            Ok(_) => {
2928                // AI returned empty, try fallback (silent)
2929            }
2930            Err(_) => {
2931                // AI extraction failed, try fallback (silent)
2932            }
2933        }
2934    }
2935    
2936    // Fallback to rule-based detection
2937    Ok(detect_memories_fallback(text, session_id))
2938}
2939
2940/// Deduplicate entries by content similarity.
2941/// Keeps longer (more detailed) entries when duplicates are found.
2942fn deduplicate_entries(entries: Vec<MemoryEntry>) -> Vec<MemoryEntry> {
2943    if entries.is_empty() {
2944        return entries;
2945    }
2946    
2947    // Sort by content length (longer first - keep more detailed entries)
2948    let mut sorted = entries;
2949    sorted.sort_by(|a, b| b.content.len().cmp(&a.content.len()));
2950    
2951    // Keep only unique entries
2952    let mut unique: Vec<MemoryEntry> = Vec::new();
2953    for entry in sorted {
2954        let entry_lower = entry.content.to_lowercase();
2955        
2956        // Check if already have similar entry
2957        let is_duplicate = unique.iter().any(|existing| {
2958            let existing_lower = existing.content.to_lowercase();
2959            
2960            // Exact match
2961            if existing_lower == entry_lower {
2962                return true;
2963            }
2964            
2965            // High similarity (same words mostly)
2966            let similarity = calculate_similarity(&existing_lower, &entry_lower);
2967            similarity >= 0.8
2968        });
2969        
2970        if !is_duplicate {
2971            unique.push(entry);
2972        }
2973        
2974        // Stop if we have enough entries
2975        if unique.len() >= MAX_DETECTED_ENTRIES {
2976            break;
2977        }
2978    }
2979    
2980    unique
2981}
2982
2983/// Extract memory content around a keyword.
2984fn extract_memory_content(text: &str, keyword: &str) -> String {
2985    let text_lower = text.to_lowercase();
2986    let keyword_lower = keyword.to_lowercase();
2987
2988    // Find keyword position
2989    let pos = match text_lower.find(&keyword_lower) {
2990        Some(p) => p,
2991        None => return String::new(),
2992    };
2993
2994    // Find sentence boundaries (sentence end markers)
2995    const SENTENCE_END_MARKERS: [char; 3] = ['.', '\n', '。'];
2996
2997    // For start: find the last sentence end marker before pos
2998    // Need to correctly find the next char boundary after multi-byte markers like '。'
2999    let start = text[..pos].rfind(SENTENCE_END_MARKERS)
3000        .map(|i| {
3001            // The marker char starts at byte position i
3002            // We need the byte position after this marker char
3003            // Use char_indices to find the next char's start position
3004            match text[i..].char_indices().nth(1) {
3005                Some((next_idx, _)) => i + next_idx,  // Next char starts at i + next_idx
3006                None => pos,  // Marker is at the end of prefix, start from keyword position
3007            }
3008        })
3009        .unwrap_or(0);
3010
3011    // For end: find the first sentence end marker after pos
3012    let end = text[pos..].find(SENTENCE_END_MARKERS)
3013        .map(|i| {
3014            let marker_pos = pos + i;
3015            // Find the byte position after the marker char
3016            match text[marker_pos..].char_indices().nth(1) {
3017                Some((next_idx, _)) => marker_pos + next_idx,
3018                None => text.len(),  // Marker at end of text
3019            }
3020        })
3021        .unwrap_or_else(|| {
3022            // No marker found: use MAX_MEMORY_CONTENT_LENGTH, but ensure valid UTF-8 boundary
3023            let max_end = pos + MAX_MEMORY_CONTENT_LENGTH;
3024            // Find the nearest valid char boundary
3025            if max_end >= text.len() {
3026                text.len()
3027            } else {
3028                // Walk backwards to find a valid boundary
3029                let mut boundary = max_end;
3030                while boundary > pos && !text.is_char_boundary(boundary) {
3031                    boundary -= 1;
3032                }
3033                boundary
3034            }
3035        });
3036
3037    // Ensure start and end are valid UTF-8 boundaries and start < end
3038    if start >= end || start > text.len() || end > text.len() {
3039        return String::new();
3040    }
3041
3042    let content = text[start..end].trim();
3043    
3044    // Quality check: reject content that looks like formatting output
3045    if is_low_quality_memory(content) {
3046        return String::new();
3047    }
3048    
3049    // Clean up and truncate
3050    if content.len() > MAX_MEMORY_CONTENT_LENGTH {
3051        truncate_str(content, MAX_MEMORY_CONTENT_LENGTH - 3)
3052    } else {
3053        content.to_string()
3054    }
3055}
3056
3057/// Check if extracted content is low quality (formatting artifacts, etc).
3058fn is_low_quality_memory(content: &str) -> bool {
3059    // Too short to be meaningful
3060    if content.len() < MIN_MEMORY_CONTENT_LENGTH {
3061        return true;
3062    }
3063    
3064    // Contains formatting characters (table borders, tree lines)
3065    let formatting_chars = ['│', '├', '└', '┌', '┐', '─', '═', '║', '╔', '╗', '╚', '╝'];
3066    if content.chars().any(|c| formatting_chars.contains(&c)) {
3067        return true;
3068    }
3069    
3070    // Starts with emoji (likely formatted output, not user intent)
3071    let first_char = content.chars().next().unwrap_or(' ');
3072    if !first_char.is_alphanumeric() && !first_char.is_ascii_punctuation() && first_char > '\u{FF}' {
3073        // Check if it's a common emoji prefix
3074        if content.starts_with("🎯") || content.starts_with("🔧") || content.starts_with("💡") ||
3075           content.starts_with("📚") || content.starts_with("🏗") || content.starts_with("👤") ||
3076           content.starts_with("⭐") || content.starts_with("📝") || content.starts_with("✅") ||
3077           content.starts_with("❌") || content.starts_with("⚠") {
3078            return true;
3079        }
3080    }
3081    
3082    // Contains memory system markers (self-referential)
3083    if content.contains("【自动记忆摘要】") || content.contains("[ACCUMULATED MEMORY]") ||
3084       content.contains("记忆统计") || content.contains("memory.json") {
3085        return true;
3086    }
3087    
3088    // Looks like a numbered list item without substance
3089    if content.starts_with("- ") && content.len() < 30 {
3090        return true;
3091    }
3092    
3093    // Contains mostly numbers/punctuation (likely code output)
3094    let alpha_count = content.chars().filter(|c| c.is_alphabetic()).count();
3095    let total_count = content.chars().count();
3096    if total_count > 0 && alpha_count < total_count / 4 {
3097        return true;
3098    }
3099    
3100    false
3101}
3102
3103// ============================================================================
3104// Rewind / Summarize Up To Here
3105// ============================================================================
3106
3107/// Result of a rewind/summarize operation.
3108#[derive(Debug, Clone)]
3109pub struct RewindResult {
3110    /// Original message count.
3111    pub original_count: usize,
3112    /// New message count after rewind.
3113    pub new_count: usize,
3114    /// Index where rewind was applied.
3115    pub rewind_index: usize,
3116    /// Summary generated for removed messages.
3117    pub summary: Option<String>,
3118    /// New message list (summary message + kept messages).
3119    pub new_messages: Vec<Message>,
3120}
3121
3122/// Summarize messages up to a specific index, keeping recent ones.
3123/// Returns the new message list with summary + kept messages.
3124pub async fn summarize_up_to(
3125    messages: &[Message],
3126    index: usize,
3127    compressor: Option<&dyn crate::compress::Compressor>,
3128) -> Result<RewindResult> {
3129    if index >= messages.len() {
3130        anyhow::bail!("rewind index {} out of bounds (messages: {})", index, messages.len());
3131    }
3132
3133    if index == 0 {
3134        // Nothing to summarize, return original messages
3135        return Ok(RewindResult {
3136            original_count: messages.len(),
3137            new_count: messages.len(),
3138            rewind_index: 0,
3139            summary: None,
3140            new_messages: messages.to_vec(),
3141        });
3142    }
3143
3144    let to_summarize = &messages[..index];
3145    let to_keep = &messages[index..];
3146
3147    // Generate summary
3148    let summary = if let Some(comp) = compressor {
3149        // Use AI compressor
3150        let segment = comp.summarize(to_summarize, &crate::compress::CompressionConfig::default()).await?;
3151        Some(segment.summary)
3152    } else {
3153        // Fallback to simple summary
3154        Some(generate_simple_summary(to_summarize))
3155    };
3156
3157    // Build summary message
3158    let summary_msg = create_summary_message(&summary, to_summarize.len());
3159
3160    // New message list: summary + kept messages
3161    let new_messages: Vec<Message> = std::iter::once(summary_msg)
3162        .chain(to_keep.iter().cloned())
3163        .collect();
3164    
3165    let new_count = new_messages.len();
3166
3167    Ok(RewindResult {
3168        original_count: messages.len(),
3169        new_count,
3170        rewind_index: index,
3171        summary,
3172        new_messages,
3173    })
3174}
3175
3176/// Create a summary message for injection.
3177fn create_summary_message(summary: &Option<String>, original_count: usize) -> Message {
3178    let content = match summary {
3179        Some(s) => format!("[对话摘要 - 原 {} 条消息]\n\n{}", original_count, s),
3180        None => format!("[对话摘要 - 原 {} 条消息已压缩]", original_count),
3181    };
3182
3183    Message {
3184        role: crate::providers::Role::User,
3185        content: crate::providers::MessageContent::Text(content),
3186    }
3187}
3188
3189/// Generate a simple summary without AI.
3190fn generate_simple_summary(messages: &[Message]) -> String {
3191    let mut parts: Vec<String> = Vec::new();
3192    
3193    // Extract key points from each message
3194    for msg in messages {
3195        if msg.role == crate::providers::Role::User {
3196            let text = match &msg.content {
3197                crate::providers::MessageContent::Text(t) => t,
3198                _ => continue,
3199            };
3200            // Take first significant line
3201            let first_line = text.lines().next().unwrap_or("");
3202            if first_line.len() > 20 {
3203                parts.push(truncate_str(first_line, 100));
3204            }
3205        }
3206    }
3207
3208    if parts.is_empty() {
3209        "对话已压缩".to_string()
3210    } else if parts.len() <= 5 {
3211        parts.join(" | ")
3212    } else {
3213        format!("{} ... (共 {} 个话题)", parts[0], parts.len())
3214    }
3215}
3216
3217// ============================================================================
3218// Semantic Search
3219// ============================================================================
3220
3221/// Cosine similarity calculation utility.
3222/// Used for vector-based semantic search when embedding API is available.
3223pub struct SemanticUtils;
3224
3225impl SemanticUtils {
3226    /// Calculate cosine similarity between two embeddings.
3227    /// 
3228    /// ## 余弦相似度公式
3229    /// 
3230    /// cos(A, B) = (A · B) / (|A| × |B|)
3231    /// 
3232    /// 取值范围：
3233    /// - 1.0 = 完全相同
3234    /// - 0.0 = 无关
3235    /// - -1.0 = 完全相反
3236    pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
3237        if a.len() != b.len() || a.is_empty() {
3238            return 0.0;
3239        }
3240        
3241        let dot_product = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum::<f32>();
3242        let norm_a = a.iter().map(|x| x * x).sum::<f32>().sqrt();
3243        let norm_b = b.iter().map(|x| x * x).sum::<f32>().sqrt();
3244        
3245        if norm_a == 0.0 || norm_b == 0.0 {
3246            return 0.0;
3247        }
3248        
3249        dot_product / (norm_a * norm_b)
3250    }
3251}
3252
3253
3254/// Semantic search without AI (using TF-IDF like approach).
3255/// 
3256/// ## TF-IDF 语义搜索
3257/// 
3258/// TF-IDF（Term Frequency-Inverse Document Frequency）是一种
3259/// 不需要 AI 模型的语义搜索方法。
3260/// 
3261/// ### 原理
3262/// 
3263/// 1. **TF（词频）**: 词在文档中出现的频率
3264///    TF(word, doc) = count(word in doc) / len(doc)
3265/// 
3266/// 2. **IDF（逆文档频率）**: 词在整个文档集合中的稀有程度
3267///    IDF(word) = log(total_docs / docs_containing_word)
3268/// 
3269/// 3. **TF-IDF**: TF × IDF
3270///    高 TF-IDF = 词在此文档中重要，但在其他文档中不常见
3271/// 
3272/// ### 示例
3273/// 
3274/// ```ignore
3275/// 文档1: "使用 PostgreSQL 数据库"
3276/// 文档2: "Redis 缓存配置"
3277/// 文档3: "数据库连接池设置"
3278/// 
3279/// 查询: "数据库"
3280/// 
3281/// TF-IDF("数据库", 文档1) = 1/3 × log(3/2) = 0.33 × 0.41 = 0.14
3282/// TF-IDF("数据库", 文档3) = 1/4 × log(3/2) = 0.25 × 0.41 = 0.10
3283/// 
3284/// 结果: 文档1 > 文档3 > 文档2
3285/// ```
3286pub struct TfIdfSearch {
3287    /// Word frequency in each document.
3288    doc_word_freq: HashMap<String, HashMap<String, f32>>,
3289    /// Total documents.
3290    total_docs: usize,
3291    /// IDF cache.
3292    idf_cache: HashMap<String, f32>,
3293}
3294
3295impl TfIdfSearch {
3296    /// Create a new TF-IDF search instance.
3297    pub fn new() -> Self {
3298        Self {
3299            doc_word_freq: HashMap::new(),
3300            total_docs: 0,
3301            idf_cache: HashMap::new(),
3302        }
3303    }
3304    
3305    /// Index all memories for TF-IDF search.
3306    pub fn index(&mut self, memory: &AutoMemory) {
3307        self.clear();
3308        self.total_docs = memory.entries.len();
3309        
3310        for entry in &memory.entries {
3311            let words = self.tokenize(&entry.content);
3312            let word_freq = self.compute_word_freq(&words);
3313            self.doc_word_freq.insert(entry.content.clone(), word_freq);
3314        }
3315        
3316        // Compute IDF for all words
3317        self.compute_idf();
3318    }
3319    
3320    /// Tokenize text into words.
3321    /// Supports both space-separated languages and CJK characters.
3322    fn tokenize(&self, text: &str) -> Vec<String> {
3323        let lower = text.to_lowercase();
3324        let mut tokens = Vec::new();
3325        
3326        // Split by whitespace first
3327        for word in lower.split_whitespace() {
3328            let trimmed = word.trim_matches(|c: char| !c.is_alphanumeric());
3329            if trimmed.len() > 1 {
3330                tokens.push(trimmed.to_string());
3331            }
3332            
3333            // For CJK characters, also add individual characters and bigrams
3334            let chars: Vec<char> = trimmed.chars().collect();
3335            let has_cjk = chars.iter().any(|c| Self::is_cjk(*c));
3336            
3337            if has_cjk {
3338                // Add individual CJK characters
3339                for c in &chars {
3340                    if Self::is_cjk(*c) {
3341                        tokens.push(c.to_string());
3342                    }
3343                }
3344                // Add bigrams for CJK
3345                for window in chars.windows(2) {
3346                    if Self::is_cjk(window[0]) || Self::is_cjk(window[1]) {
3347                        tokens.push(window.iter().collect::<String>());
3348                    }
3349                }
3350            }
3351        }
3352        
3353        tokens
3354    }
3355    
3356    /// Check if a character is CJK (Chinese/Japanese/Korean).
3357    fn is_cjk(c: char) -> bool {
3358        matches!(c,
3359            '\u{4E00}'..='\u{9FFF}' |   // CJK Unified Ideographs
3360            '\u{3400}'..='\u{4DBF}' |   // CJK Extension A
3361            '\u{F900}'..='\u{FAFF}' |   // CJK Compatibility Ideographs
3362            '\u{3000}'..='\u{303F}' |   // CJK Symbols and Punctuation
3363            '\u{3040}'..='\u{309F}' |   // Hiragana
3364            '\u{30A0}'..='\u{30FF}'     // Katakana
3365        )
3366    }
3367    
3368    /// Compute word frequency in a document.
3369    fn compute_word_freq(&self, words: &[String]) -> HashMap<String, f32> {
3370        let total = words.len() as f32;
3371        let mut freq = HashMap::new();
3372        
3373        for word in words {
3374            *freq.entry(word.clone()).or_insert(0.0) += 1.0;
3375        }
3376        
3377        // Normalize by total words
3378        for (_, count) in freq.iter_mut() {
3379            *count /= total;
3380        }
3381        
3382        freq
3383    }
3384    
3385    /// Compute IDF for all words.
3386    fn compute_idf(&mut self) {
3387        // Count documents containing each word
3388        let mut word_doc_count: HashMap<String, usize> = HashMap::new();
3389        
3390        for word_freq in &self.doc_word_freq {
3391            for word in word_freq.1.keys() {
3392                *word_doc_count.entry(word.clone()).or_insert(0) += 1;
3393            }
3394        }
3395        
3396        // Compute IDF
3397        for (word, count) in word_doc_count {
3398            let idf = (self.total_docs as f32 / count as f32).ln();
3399            self.idf_cache.insert(word, idf);
3400        }
3401    }
3402    
3403    /// Search using TF-IDF similarity.
3404    pub fn search(&self, query: &str, limit: Option<usize>) -> Vec<(String, f32)> {
3405        let query_words = self.tokenize(query);
3406        let query_freq = self.compute_word_freq(&query_words);
3407        
3408        let mut results: Vec<(String, f32)> = Vec::new();
3409        
3410        for (doc, doc_freq) in &self.doc_word_freq {
3411            // Compute TF-IDF dot product similarity
3412            let similarity = self.compute_similarity(&query_freq, doc_freq);
3413            
3414            if similarity > 0.0 {
3415                results.push((doc.clone(), similarity));
3416            }
3417        }
3418        
3419        // Sort by similarity
3420        results.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
3421        
3422        // Apply limit
3423        if let Some(max) = limit {
3424            results.into_iter().take(max).collect()
3425        } else {
3426            results
3427        }
3428    }
3429    
3430    /// Compute TF-IDF similarity between query and document.
3431    fn compute_similarity(&self, query_freq: &HashMap<String, f32>, doc_freq: &HashMap<String, f32>) -> f32 {
3432        let mut similarity = 0.0;
3433        
3434        for (word, tf_query) in query_freq {
3435            if let Some(tf_doc) = doc_freq.get(word)
3436                && let Some(idf) = self.idf_cache.get(word) {
3437                    // TF-IDF(query) × TF-IDF(doc)
3438                    similarity += tf_query * idf * tf_doc * idf;
3439                }
3440        }
3441        
3442        similarity
3443    }
3444    
3445    /// Clear all indices.
3446    pub fn clear(&mut self) {
3447        self.doc_word_freq.clear();
3448        self.idf_cache.clear();
3449        self.total_docs = 0;
3450    }
3451}
3452
3453impl Default for TfIdfSearch {
3454    fn default() -> Self {
3455        Self::new()
3456    }
3457}
3458
3459#[cfg(test)]
3460mod tests {
3461    use super::*;
3462
3463    #[test]
3464    fn test_memory_entry_creation() {
3465        let entry = MemoryEntry::new(
3466            MemoryCategory::Decision,
3467            "Decided to use PostgreSQL for database".to_string(),
3468            Some("session-123".to_string()),
3469        );
3470        assert_eq!(entry.category, MemoryCategory::Decision);
3471        assert_eq!(entry.importance, 90.0);
3472        assert!(!entry.is_manual);
3473    }
3474
3475    #[test]
3476    fn test_memory_reference_increase() {
3477        let mut entry = MemoryEntry::new(
3478            MemoryCategory::Finding,
3479            "API endpoint is at /api/v2".to_string(),
3480            None,
3481        );
3482        assert_eq!(entry.importance, 60.0);
3483        entry.mark_referenced();
3484        assert_eq!(entry.importance, 62.0);
3485        entry.mark_referenced();
3486        entry.mark_referenced();
3487        assert_eq!(entry.importance, 66.0);
3488    }
3489
3490    #[test]
3491    fn test_auto_memory_add_and_prune() {
3492        let mut memory = AutoMemory::new();
3493        memory.max_entries = 5;
3494
3495        for i in 0..10 {
3496            memory.add(MemoryEntry::new(
3497                MemoryCategory::Technical,
3498                format!("Note {}", i),
3499                None,
3500            ));
3501        }
3502
3503        // Should have pruned to max_entries
3504        assert!(memory.entries.len() <= memory.max_entries);
3505    }
3506
3507    #[test]
3508    fn test_duplicate_detection() {
3509        let mut memory = AutoMemory::new();
3510        memory.add_memory(
3511            MemoryCategory::Decision,
3512            "Use PostgreSQL".to_string(),
3513            None,
3514        );
3515        
3516        // Should not add duplicate
3517        memory.add_memory(
3518            MemoryCategory::Decision,
3519            "Use PostgreSQL".to_string(),
3520            None,
3521        );
3522        
3523        assert_eq!(memory.entries.len(), 1);
3524    }
3525
3526    #[test]
3527    fn test_memory_detection() {
3528        // Test decision detection
3529        let text = "我决定使用 React 作为前端框架";
3530        let entries = detect_memories_from_text(text, None);
3531        assert!(!entries.is_empty());
3532        assert_eq!(entries[0].category, MemoryCategory::Decision);
3533        
3534        // Test solution detection (with more specific pattern)
3535        let text2 = "解决了认证问题，通过添加 token refresh 机制";
3536        let entries2 = detect_memories_from_text(text2, None);
3537        assert!(!entries2.is_empty());
3538        assert_eq!(entries2[0].category, MemoryCategory::Solution);
3539        
3540        // Test preference detection
3541        let text3 = "我偏好使用 TypeScript 进行开发";
3542        let entries3 = detect_memories_from_text(text3, None);
3543        assert!(!entries3.is_empty());
3544        assert_eq!(entries3[0].category, MemoryCategory::Preference);
3545    }
3546
3547    #[test]
3548    fn test_category_importance() {
3549        assert!(MemoryCategory::Decision.default_importance() > MemoryCategory::Structure.default_importance());
3550        assert!(MemoryCategory::Solution.default_importance() > MemoryCategory::Technical.default_importance());
3551    }
3552
3553    #[test]
3554    fn test_top_n_entries() {
3555        let mut memory = AutoMemory::new();
3556        
3557        // Add entries with different importance
3558        memory.add(MemoryEntry::new(MemoryCategory::Decision, "Decision 1".into(), None));
3559        memory.add(MemoryEntry::new(MemoryCategory::Finding, "Finding 1".into(), None));
3560        memory.add(MemoryEntry::new(MemoryCategory::Structure, "Structure 1".into(), None));
3561
3562        let top = memory.top_n(2);
3563        assert_eq!(top.len(), 2);
3564        assert_eq!(top[0].category, MemoryCategory::Decision); // Highest importance
3565    }
3566
3567    #[test]
3568    fn test_similarity_calculation() {
3569        // Test exact match
3570        let sim = AutoMemory::calculate_similarity("hello world", "hello world");
3571        assert_eq!(sim, 1.0);
3572        
3573        // Test no match
3574        let sim = AutoMemory::calculate_similarity("hello world", "foo bar");
3575        assert_eq!(sim, 0.0);
3576        
3577        // Test partial match (50% overlap)
3578        let sim = AutoMemory::calculate_similarity("hello world", "hello there");
3579        assert!(sim > 0.0 && sim < 1.0);
3580        
3581        // Test empty input
3582        let sim = AutoMemory::calculate_similarity("", "hello");
3583        assert_eq!(sim, 0.0);
3584    }
3585    
3586    #[test]
3587    fn test_similarity_threshold() {
3588        let mut memory = AutoMemory::new();
3589        
3590        // Add a long enough entry (>= MIN_SIMILARITY_LENGTH)
3591        memory.add(MemoryEntry::new(
3592            MemoryCategory::Decision,
3593            "We decided to use PostgreSQL for our database system".to_string(),
3594            None,
3595        ));
3596        
3597        // Should not add similar entry
3598        memory.add_memory(
3599            MemoryCategory::Decision,
3600            "We decided to use PostgreSQL for our database backend".to_string(),
3601            None,
3602        );
3603        
3604        // Should have only 1 entry (similar detected)
3605        assert_eq!(memory.entries.len(), 1);
3606    }
3607    
3608    #[test]
3609    fn test_short_content_skipped() {
3610        let mut memory = AutoMemory::new();
3611        
3612        // Short content should be skipped by has_similar
3613        memory.add(MemoryEntry::new(
3614            MemoryCategory::Technical,
3615            "short".to_string(),  // Only 5 chars, below MIN_SIMILARITY_LENGTH
3616            None,
3617        ));
3618        
3619        // Another short entry should be added (not detected as similar)
3620        memory.add_memory(
3621            MemoryCategory::Technical,
3622            "brief".to_string(),
3623            None,
3624        );
3625        
3626        assert_eq!(memory.entries.len(), 2);
3627    }
3628    
3629    #[test]
3630    fn test_prune_preserves_manual() {
3631        let mut memory = AutoMemory::new();
3632        memory.max_entries = 3;
3633        
3634        // Add manual entry (should always be preserved)
3635        let mut manual = MemoryEntry::manual(MemoryCategory::Decision, "Manual decision".into());
3636        manual.importance = 10.0; // Low importance but manual
3637        memory.add(manual);
3638        
3639        // Add high importance auto entries
3640        for i in 0..5 {
3641            let entry = MemoryEntry::new(
3642                MemoryCategory::Decision,
3643                format!("Auto decision {}", i),
3644                None,
3645            );
3646            memory.add(entry);
3647        }
3648        
3649        // Manual entry should still exist after prune
3650        assert!(memory.entries.iter().any(|e| e.is_manual));
3651        assert!(memory.entries.len() <= memory.max_entries);
3652    }
3653    
3654    #[test]
3655    fn test_deduplicate_entries() {
3656        // Use more similar entries (should have similarity >= 0.8)
3657        let entries = vec![
3658            MemoryEntry::new(MemoryCategory::Decision, "We chose PostgreSQL database system for our backend".into(), None),
3659            MemoryEntry::new(MemoryCategory::Decision, "We chose PostgreSQL database system backend".into(), None),
3660            MemoryEntry::new(MemoryCategory::Decision, "Using Redis for caching layer".into(), None),
3661        ];
3662        
3663        let deduped = deduplicate_entries(entries);
3664        
3665        // Should deduplicate similar entries
3666        assert!(deduped.len() >= 1);
3667        assert!(deduped.len() <= 3);
3668        
3669        // Should keep longer (more detailed) entry when similar
3670        let pg_entries: Vec<_> = deduped.iter()
3671            .filter(|e| e.content.to_lowercase().contains("postgresql"))
3672            .collect();
3673        
3674        if pg_entries.len() == 1 {
3675            // Correctly deduplicated to one PostgreSQL entry
3676            // Should be the longer one
3677            assert!(pg_entries[0].content.contains("backend"));
3678        }
3679    }
3680    
3681    #[test]
3682    fn test_memory_detection_edge_cases() {
3683        // Empty input
3684        let entries = detect_memories_from_text("", None);
3685        assert!(entries.is_empty());
3686        
3687        // Very short input (below MIN_MEMORY_CONTENT_LENGTH)
3688        let entries = detect_memories_from_text("决定", None);
3689        assert!(entries.is_empty());
3690        
3691        // Input with only generic keywords
3692        let entries = detect_memories_from_text("使用", None);
3693        assert!(entries.is_empty());
3694        
3695        // Multiple matches in same text
3696        let text = "我决定使用React，解决了性能问题通过添加缓存机制";
3697        let entries = detect_memories_from_text(text, None);
3698        assert!(entries.len() <= MAX_DETECTED_ENTRIES);
3699    }
3700    
3701    #[test]
3702    fn test_importance_ceiling() {
3703        let mut entry = MemoryEntry::new(
3704            MemoryCategory::Decision,
3705            "Important decision".into(),
3706            None,
3707        );
3708        
3709        // Start at 90 (Decision default)
3710        assert_eq!(entry.importance, 90.0);
3711        
3712        // Reference many times
3713        for _ in 0..10 {
3714            entry.mark_referenced();
3715        }
3716        
3717        // Should cap at 100
3718        assert!(entry.importance <= 100.0);
3719    }
3720
3721    #[test]
3722    fn test_time_decay() {
3723        let mut memory = AutoMemory::new();
3724        memory.min_importance = 30.0;
3725        
3726        // Add manual entry (should never decay)
3727        let mut manual = MemoryEntry::manual(MemoryCategory::Decision, "Manual entry".into());
3728        manual.importance = 50.0;
3729        memory.add(manual);
3730        
3731        // Add auto entry with old reference date (simulate 60 days ago)
3732        let mut old_entry = MemoryEntry::new(
3733            MemoryCategory::Technical,
3734            "Old technical note".into(),
3735            None,
3736        );
3737        old_entry.importance = 60.0;
3738        // Set last_referenced to 60 days ago
3739        old_entry.last_referenced = Utc::now() - chrono::Duration::days(60);
3740        memory.add(old_entry);
3741        
3742        // Add recent entry (should not decay)
3743        let recent_entry = MemoryEntry::new(
3744            MemoryCategory::Finding,
3745            "Recent finding".into(),
3746            None,
3747        );
3748        memory.add(recent_entry);
3749        
3750        // Apply time decay
3751        memory.apply_time_decay();
3752        
3753        // Manual entry should not decay
3754        let manual_entry = memory.entries.iter().find(|e| e.is_manual);
3755        assert!(manual_entry.is_some());
3756        assert_eq!(manual_entry.unwrap().importance, 50.0);
3757        
3758        // Recent entry should not decay (still > 30 days threshold)
3759        let recent = memory.entries.iter().find(|e| e.content.contains("Recent"));
3760        assert!(recent.is_some());
3761        assert!(recent.unwrap().importance >= 60.0);  // Finding default
3762        
3763        // Old entry should have decayed
3764        let old = memory.entries.iter().find(|e| e.content.contains("Old"));
3765        if let Some(old_entry) = old {
3766            // Should have decayed (60 days - 30 days threshold = 30 days decay period)
3767            // Decay factor = 0.5^1 = 0.5, so importance = 60 * 0.5 = 30
3768            assert!(old_entry.importance < 60.0);
3769            // Should still be above minimum threshold
3770            assert!(old_entry.importance >= memory.min_importance * 0.5);
3771        }
3772    }
3773
3774    #[test]
3775    fn test_parse_memory_response() {
3776        // Test valid JSON response
3777        let json = r#"{"memories": [{"category": "decision", "content": "决定使用 PostgreSQL 作为数据库", "importance": 90}, {"category": "preference", "content": "我偏好 TypeScript 而非 JavaScript", "importance": 70}]}"#;
3778        let entries = parse_memory_response(json, None).unwrap();
3779        assert_eq!(entries.len(), 2);
3780        
3781        // Check both entries exist (order may change due to deduplication sorting)
3782        let has_decision = entries.iter().any(|e| e.category == MemoryCategory::Decision);
3783        let has_preference = entries.iter().any(|e| e.category == MemoryCategory::Preference);
3784        assert!(has_decision);
3785        assert!(has_preference);
3786        
3787        // Check importance values
3788        let decision_entry = entries.iter().find(|e| e.category == MemoryCategory::Decision);
3789        assert!(decision_entry.is_some());
3790        assert_eq!(decision_entry.unwrap().importance, 90.0);
3791        
3792        // Test empty response
3793        let empty_json = r#"{"memories": []}"#;
3794        let empty_entries = parse_memory_response(empty_json, None).unwrap();
3795        assert!(empty_entries.is_empty());
3796        
3797        // Test JSON with markdown code blocks
3798        let markdown_json = r#"```json
3799{"memories": [{"category": "solution", "content": "通过添加 middleware 修复认证问题", "importance": 85}]}
3800```"#;
3801        let markdown_entries = parse_memory_response(markdown_json, None).unwrap();
3802        assert_eq!(markdown_entries.len(), 1);
3803        assert_eq!(markdown_entries[0].category, MemoryCategory::Solution);
3804        
3805        // Test unknown category (should be skipped)
3806        let unknown_json = r#"{"memories": [{"category": "unknown", "content": "This should be skipped content", "importance": 50}]}"#;
3807        let unknown_entries = parse_memory_response(unknown_json, None).unwrap();
3808        assert!(unknown_entries.is_empty());
3809        
3810        // Test short content (should be skipped)
3811        let short_json = r#"{"memories": [{"category": "finding", "content": "short", "importance": 60}]}"#;
3812        let short_entries = parse_memory_response(short_json, None).unwrap();
3813        assert!(short_entries.is_empty());
3814    }
3815
3816    #[test]
3817    fn test_public_has_similar() {
3818        let mut memory = AutoMemory::new();
3819        
3820        // Add an entry
3821        memory.add(MemoryEntry::new(
3822            MemoryCategory::Decision,
3823            "We decided to use PostgreSQL for our main database system".to_string(),
3824            None,
3825        ));
3826        
3827        // Test exact match
3828        assert!(memory.has_similar("We decided to use PostgreSQL for our main database system"));
3829        
3830        // Test very similar content (high similarity > 0.7)
3831        // Original: "We decided to use PostgreSQL for our main database system"
3832        // Similar:  "We decided to use PostgreSQL for our main database backend"
3833        // Similarity = shared words / total unique words
3834        assert!(memory.has_similar("We decided to use PostgreSQL for our main database backend"));
3835        
3836        // Test moderately similar (should NOT match, < 0.7)
3837        assert!(!memory.has_similar("We decided to use Redis for caching"));
3838        
3839        // Test completely different content
3840        assert!(!memory.has_similar("The project uses React for frontend"));
3841        
3842        // Test short content (should return false)
3843        assert!(!memory.has_similar("short"));
3844    }
3845
3846    #[test]
3847    fn test_public_prune() {
3848        let mut memory = AutoMemory::new();
3849        memory.max_entries = 5;
3850        memory.min_importance = 30.0;
3851        
3852        // Add entries exceeding max
3853        for i in 0..10 {
3854            memory.add(MemoryEntry::new(
3855                MemoryCategory::Technical,
3856                format!("Technical note number {} with sufficient length", i),
3857                None,
3858            ));
3859        }
3860        
3861        // Manually prune
3862        memory.prune();
3863        
3864        // Should be within limit
3865        assert!(memory.entries.len() <= memory.max_entries);
3866    }
3867
3868    #[test]
3869    fn test_statistics() {
3870        let mut memory = AutoMemory::new();
3871        
3872        // Add various entries
3873        memory.add(MemoryEntry::new(MemoryCategory::Decision, "Decision one with enough content".to_string(), None));
3874        memory.add(MemoryEntry::new(MemoryCategory::Preference, "Preference for TypeScript over JavaScript".to_string(), None));
3875        memory.add(MemoryEntry::manual(MemoryCategory::Technical, "Manual technical note".to_string()));
3876        
3877        // Reference some entries
3878        memory.entries[0].mark_referenced();
3879        memory.entries[0].mark_referenced();
3880        memory.entries[0].mark_referenced();
3881        
3882        let stats = memory.generate_statistics();
3883        
3884        assert_eq!(stats.total, 3);
3885        assert_eq!(stats.manual, 1);
3886        assert_eq!(stats.auto, 2);
3887        assert_eq!(stats.highly_referenced, 1);  // First entry has 3 references
3888        assert!(stats.by_category.contains_key(&MemoryCategory::Decision));
3889        assert!(stats.by_category.contains_key(&MemoryCategory::Preference));
3890        assert!(stats.by_category.contains_key(&MemoryCategory::Technical));
3891        assert!(stats.avg_importance > 0.0);
3892    }
3893
3894    #[test]
3895    fn test_memory_config() {
3896        // Test default config
3897        let config = MemoryConfig::default();
3898        assert_eq!(config.max_entries, 100);
3899        assert_eq!(config.min_importance, 30.0);
3900        assert_eq!(config.decay_start_days, 30);
3901        assert_eq!(config.decay_rate, 0.5);
3902        
3903        // Test minimal config
3904        let minimal = MemoryConfig::minimal();
3905        assert_eq!(minimal.max_entries, 50);
3906        assert!(minimal.min_importance > config.min_importance);
3907        
3908        // Test archival config
3909        let archival = MemoryConfig::archival();
3910        assert_eq!(archival.max_entries, 500);
3911        assert!(archival.min_importance < config.min_importance);
3912        
3913        // Test with_max_entries
3914        let custom = MemoryConfig::with_max_entries(200);
3915        assert_eq!(custom.max_entries, 200);
3916        assert_eq!(custom.min_importance, 30.0);  // Other defaults preserved
3917    }
3918
3919    #[test]
3920    fn test_auto_memory_with_config() {
3921        let config = MemoryConfig::minimal();
3922        let mut memory = AutoMemory::with_config(config);
3923        
3924        assert_eq!(memory.max_entries, 50);
3925        assert_eq!(memory.min_importance, 50.0);
3926        
3927        // Add entries
3928        for i in 0..60 {
3929            memory.add(MemoryEntry::new(
3930                MemoryCategory::Technical,
3931                format!("Technical note {} with enough length for detection", i),
3932                None,
3933            ));
3934        }
3935        
3936        // Should prune to config limit
3937        assert!(memory.entries.len() <= 50);
3938    }
3939
3940    #[test]
3941    fn test_batch_add() {
3942        let mut memory = AutoMemory::new();
3943        
3944        // Batch add multiple entries
3945        let entries: Vec<MemoryEntry> = vec![
3946            MemoryEntry::new(MemoryCategory::Decision, "First decision with sufficient content".into(), None),
3947            MemoryEntry::new(MemoryCategory::Finding, "First finding with sufficient content".into(), None),
3948            MemoryEntry::new(MemoryCategory::Solution, "First solution with sufficient content".into(), None),
3949        ];
3950        
3951        memory.add_batch(entries);
3952        assert_eq!(memory.entries.len(), 3);
3953        
3954        // Batch add with duplicates
3955        let duplicate_entries: Vec<MemoryEntry> = vec![
3956            MemoryEntry::new(MemoryCategory::Decision, "First decision with sufficient content".into(), None),  // Duplicate
3957            MemoryEntry::new(MemoryCategory::Technical, "New technical note with sufficient content".into(), None),
3958        ];
3959        
3960        memory.add_batch(duplicate_entries);
3961        assert_eq!(memory.entries.len(), 4);  // Only 1 new entry added
3962    }
3963
3964    #[test]
3965    fn test_search_with_limit() {
3966        let mut memory = AutoMemory::new();
3967        
3968        // Add multiple entries with same keyword
3969        for i in 0..10 {
3970            memory.add(MemoryEntry::new(
3971                MemoryCategory::Technical,
3972                format!("PostgreSQL technical note {} with details", i),
3973                None,
3974            ));
3975        }
3976        
3977        // Search without limit
3978        let all = memory.search("postgresql");
3979        assert_eq!(all.len(), 10);
3980        
3981        // Search with limit
3982        let limited = memory.search_with_limit("postgresql", Some(5));
3983        assert_eq!(limited.len(), 5);
3984        
3985        // Should return highest importance first
3986        assert!(limited[0].importance >= limited[limited.len() - 1].importance);
3987    }
3988
3989    #[test]
3990    fn test_multi_keyword_search() {
3991        let mut memory = AutoMemory::new();
3992        
3993        memory.add(MemoryEntry::new(MemoryCategory::Decision, "Decided to use PostgreSQL".into(), None));
3994        memory.add(MemoryEntry::new(MemoryCategory::Technical, "Using Redis for caching".into(), None));
3995        memory.add(MemoryEntry::new(MemoryCategory::Solution, "Fixed by adding middleware".into(), None));
3996        
3997        // Search with multiple keywords
3998        let results = memory.search_multi(&["postgresql", "redis"]);
3999        assert_eq!(results.len(), 2);
4000        
4001        // Search with keyword that matches nothing
4002        let empty = memory.search_multi(&["mongodb"]);
4003        assert!(empty.is_empty());
4004    }
4005
4006    #[test]
4007    fn test_mark_referenced_with_increment() {
4008        let mut entry = MemoryEntry::new(
4009            MemoryCategory::Finding,
4010            "API endpoint location".into(),
4011            None,
4012        );
4013        
4014        assert_eq!(entry.importance, 60.0);
4015        
4016        // Custom increment
4017        entry.mark_referenced_with_increment(5.0);
4018        assert_eq!(entry.importance, 65.0);
4019        
4020        // Default increment (2.0)
4021        entry.mark_referenced();
4022        assert_eq!(entry.importance, 67.0);
4023        
4024        // Should cap at 100
4025        for _ in 0..20 {
4026            entry.mark_referenced_with_increment(10.0);
4027        }
4028        assert!(entry.importance <= 100.0);
4029    }
4030
4031    #[test]
4032    fn test_search_index() {
4033        let mut memory = AutoMemory::new();
4034        
4035        // Add multiple entries
4036        for i in 0..20 {
4037            memory.add(MemoryEntry::new(
4038                MemoryCategory::Technical,
4039                format!("PostgreSQL technical note {} with sufficient content length", i),
4040                None,
4041            ));
4042        }
4043        for i in 0..10 {
4044            memory.add(MemoryEntry::new(
4045                MemoryCategory::Decision,
4046                format!("Redis decision {} with sufficient content for testing", i),
4047                None,
4048            ));
4049        }
4050        
4051        // Rebuild index
4052        memory.rebuild_index();
4053        assert!(memory.search_index.is_some());
4054        
4055        // Test fast search
4056        let results = memory.search_fast("postgresql", Some(5));
4057        assert!(results.len() <= 5);
4058        assert!(results.iter().all(|e| e.content.to_lowercase().contains("postgresql")));
4059        
4060        // Test fast multi-keyword search
4061        let multi_results = memory.search_multi_fast(&["postgresql", "redis"]);
4062        assert!(multi_results.len() > 0);
4063        
4064        // Test fast category lookup
4065        let tech_entries = memory.by_category_fast(MemoryCategory::Technical);
4066        assert_eq!(tech_entries.len(), 20);
4067        
4068        let decision_entries = memory.by_category_fast(MemoryCategory::Decision);
4069        assert_eq!(decision_entries.len(), 10);
4070        
4071        // Test fast top_n
4072        let top = memory.top_n_fast(5);
4073        assert_eq!(top.len(), 5);
4074        // Results should be sorted by importance (Decision > Technical)
4075        assert!(top[0].importance >= top[top.len() - 1].importance);
4076    }
4077
4078    #[test]
4079    fn test_index_auto_rebuild() {
4080        let mut memory = AutoMemory::new();
4081        
4082        // Index should be None initially
4083        assert!(memory.search_index.is_none());
4084        
4085        // Fast search should auto-build index
4086        memory.add(MemoryEntry::new(
4087            MemoryCategory::Decision,
4088            "Test decision with sufficient content length".into(),
4089            None,
4090        ));
4091        
4092        let results = memory.search_fast("test", None);
4093        assert!(results.len() > 0);
4094        assert!(memory.search_index.is_some());  // Index auto-built
4095        
4096        // Modify memory should invalidate index
4097        memory.clear();
4098        assert!(memory.search_index.is_none());
4099        
4100        // Add new entry should rebuild on next search
4101        memory.add(MemoryEntry::new(
4102            MemoryCategory::Finding,
4103            "New finding with sufficient content".into(),
4104            None,
4105        ));
4106        let _ = memory.search_fast("finding", None);
4107        assert!(memory.search_index.is_some());
4108    }
4109
4110    #[test]
4111    fn test_cosine_similarity() {
4112        // Identical vectors
4113        let a = vec![1.0, 0.0, 0.0];
4114        let b = vec![1.0, 0.0, 0.0];
4115        assert_eq!(SemanticUtils::cosine_similarity(&a, &b), 1.0);
4116        
4117        // Orthogonal vectors (no similarity)
4118        let a = vec![1.0, 0.0, 0.0];
4119        let b = vec![0.0, 1.0, 0.0];
4120        assert!((SemanticUtils::cosine_similarity(&a, &b) - 0.0).abs() < 0.001);
4121        
4122        // Opposite vectors
4123        let a = vec![1.0, 0.0, 0.0];
4124        let b = vec![-1.0, 0.0, 0.0];
4125        assert!((SemanticUtils::cosine_similarity(&a, &b) - (-1.0)).abs() < 0.001);
4126        
4127        // Partial similarity
4128        let a = vec![1.0, 1.0, 0.0];
4129        let b = vec![1.0, 0.0, 0.0];
4130        let sim = SemanticUtils::cosine_similarity(&a, &b);
4131        assert!(sim > 0.0 && sim < 1.0);
4132        
4133        // Empty vectors
4134        let a: Vec<f32> = vec![];
4135        let b: Vec<f32> = vec![];
4136        assert_eq!(SemanticUtils::cosine_similarity(&a, &b), 0.0);
4137    }
4138
4139    #[test]
4140    fn test_tfidf_search() {
4141        let mut memory = AutoMemory::new();
4142        
4143        memory.add(MemoryEntry::new(MemoryCategory::Decision, "使用 PostgreSQL 作为主数据库系统".into(), None));
4144        memory.add(MemoryEntry::new(MemoryCategory::Technical, "Redis 缓存配置为 10 个连接".into(), None));
4145        memory.add(MemoryEntry::new(MemoryCategory::Solution, "通过添加 middleware 修复认证问题".into(), None));
4146        memory.add(MemoryEntry::new(MemoryCategory::Finding, "数据库连接池设置为 20".into(), None));
4147        
4148        let mut tfidf = TfIdfSearch::new();
4149        tfidf.index(&memory);
4150        
4151        // Search for "数据库" - should find PostgreSQL and 连接池 entries
4152        let results = tfidf.search("数据库", Some(5));
4153        assert!(!results.is_empty());
4154        // First result should contain "数据库"
4155        assert!(results[0].0.contains("数据库"));
4156        
4157        // Search for "Redis" - should find Redis entry
4158        let results = tfidf.search("redis", Some(5));
4159        assert!(!results.is_empty());
4160        assert!(results[0].0.to_lowercase().contains("redis"));
4161        
4162        // Search for something not in any entry
4163        let results = tfidf.search("mongodb", Some(5));
4164        assert!(results.is_empty());
4165    }
4166
4167    #[test]
4168    fn test_tfidf_ranking() {
4169        let mut memory = AutoMemory::new();
4170        
4171        // Add entries with varying relevance to "数据库"
4172        memory.add(MemoryEntry::new(MemoryCategory::Decision, "使用 PostgreSQL 数据库 作为主数据库".into(), None));
4173        memory.add(MemoryEntry::new(MemoryCategory::Technical, "数据库连接池配置".into(), None));
4174        memory.add(MemoryEntry::new(MemoryCategory::Solution, "修复了前端样式问题".into(), None));
4175        
4176        let mut tfidf = TfIdfSearch::new();
4177        tfidf.index(&memory);
4178        
4179        let results = tfidf.search("数据库", None);
4180        
4181        // Should rank entries with more "数据库" mentions higher
4182        if results.len() >= 2 {
4183            assert!(results[0].1 >= results[1].1);
4184        }
4185    }
4186
4187    #[test]
4188    fn test_conflict_detection() {
4189        let mut memory = AutoMemory::new();
4190        
4191        // Add initial decision
4192        memory.add_memory(
4193            MemoryCategory::Decision,
4194            "决定使用 PostgreSQL 作为主数据库".to_string(),
4195            None,
4196        );
4197        assert_eq!(memory.entries.len(), 1);
4198        assert!(memory.entries[0].content.contains("PostgreSQL"));
4199        
4200        // Add conflicting decision (same topic, different choice)
4201        memory.add_memory(
4202            MemoryCategory::Decision,
4203            "决定使用 MySQL 作为主数据库".to_string(),
4204            None,
4205        );
4206        
4207        // Should have replaced the old one
4208        assert_eq!(memory.entries.len(), 1);
4209        assert!(memory.entries[0].content.contains("MySQL"));
4210    }
4211
4212    #[test]
4213    fn test_conflict_with_change_signal() {
4214        let mut memory = AutoMemory::new();
4215        
4216        // Add initial preference
4217        memory.add_memory(
4218            MemoryCategory::Preference,
4219            "偏好使用 vim 编辑器".to_string(),
4220            None,
4221        );
4222        assert_eq!(memory.entries.len(), 1);
4223        
4224        // Add replacement with change signal
4225        memory.add_memory(
4226            MemoryCategory::Preference,
4227            "改用 vscode 编辑器，不再使用 vim".to_string(),
4228            None,
4229        );
4230        
4231        // Should have replaced
4232        assert_eq!(memory.entries.len(), 1);
4233        assert!(memory.entries[0].content.contains("vscode"));
4234    }
4235
4236    #[test]
4237    fn test_no_false_conflict() {
4238        let mut memory = AutoMemory::new();
4239        
4240        // Add two different decisions (different topics)
4241        memory.add_memory(
4242            MemoryCategory::Decision,
4243            "决定使用 PostgreSQL 作为主数据库".to_string(),
4244            None,
4245        );
4246        memory.add_memory(
4247            MemoryCategory::Decision,
4248            "决定使用 Redis 作为缓存系统".to_string(),
4249            None,
4250        );
4251        
4252        // Both should exist (different topics, no conflict)
4253        assert_eq!(memory.entries.len(), 2);
4254    }
4255
4256    #[test]
4257    fn test_contextual_summary() {
4258        let mut memory = AutoMemory::new();
4259        
4260        // Add various memories
4261        memory.add(MemoryEntry::new(MemoryCategory::Decision, "决定使用 PostgreSQL 作为主数据库".into(), None));
4262        memory.add(MemoryEntry::new(MemoryCategory::Technical, "前端使用 React 框架开发".into(), None));
4263        memory.add(MemoryEntry::new(MemoryCategory::Solution, "通过添加 Redis 缓存解决性能问题".into(), None));
4264        memory.add(MemoryEntry::new(MemoryCategory::Finding, "API 响应时间在 200ms 以内".into(), None));
4265        memory.add(MemoryEntry::new(MemoryCategory::Preference, "偏好使用 TypeScript 而非 JavaScript".into(), None));
4266        
4267        // Context about database - should prioritize database-related memories
4268        let db_summary = memory.generate_contextual_summary("数据库查询优化", 3);
4269        assert!(db_summary.contains("PostgreSQL"));
4270        
4271        // Context about frontend - should prioritize frontend-related memories
4272        let fe_summary = memory.generate_contextual_summary("React 组件开发", 3);
4273        assert!(fe_summary.contains("React"));
4274        
4275        // Empty context - should fall back to importance-based
4276        let empty_summary = memory.generate_contextual_summary("", 3);
4277        assert!(!empty_summary.is_empty());
4278    }
4279
4280    #[test]
4281    fn test_low_quality_memory_filter() {
4282        // Formatting artifacts should be rejected
4283        assert!(is_low_quality_memory("│  🎯 决策: 决定使用 PostgreSQL."));
4284        assert!(is_low_quality_memory("├── Structure: 入口文件是 main."));
4285        assert!(is_low_quality_memory("🔧 解决方案: 通过添加 middleware."));
4286        assert!(is_low_quality_memory("【自动记忆摘要】"));
4287        assert!(is_low_quality_memory("short"));
4288        
4289        // Real content should pass
4290        assert!(!is_low_quality_memory("决定使用 PostgreSQL 作为主数据库系统"));
4291        assert!(!is_low_quality_memory("通过添加 Redis 缓存层解决了性能问题"));
4292        assert!(!is_low_quality_memory("用户偏好使用 TypeScript 进行开发"));
4293    }
4294}
matrixcode_core/memory.rs

matrixcode_core/
memory.rs