Skip to main content

matrixcode_core/memory/
extractor.rs

1//! Memory extraction: AI-based and rule-based detection.
2
3use crate::truncate::truncate_chars;
4use anyhow::Result;
5use serde::Deserialize;
6
7use super::config::*;
8use super::entry::{MemoryCategory, MemoryEntry};
9use super::manager::AutoMemory;
10use super::conversation_pattern::{ConversationPattern, PatternType, PatternSource};
11use super::unified_extraction::{UnifiedExtractionResult, ExtractedKeywords};
12use crate::compress::FocusPoint;
13
14// ============================================================================
15// Memory Extractor Trait
16// ============================================================================
17
18/// Trait for memory extraction implementations.
19#[async_trait::async_trait]
20pub trait MemoryExtractor: Send + Sync {
21    /// Extract memories and focus points from conversation text using AI.
22    async fn extract(
23        &self,
24        text: &str,
25        session_id: Option<&str>,
26        project_path: Option<&str>,
27    ) -> Result<ExtractionResult>;
28
29    /// Get the model name used for extraction.
30    fn model_name(&self) -> &str;
31}
32
33/// Result of memory extraction (memories + focus points + conversation patterns).
34#[derive(Debug, Clone)]
35pub struct ExtractionResult {
36    pub memories: Vec<MemoryEntry>,
37    pub focus_points: Vec<FocusPoint>,
38    /// Extracted conversation patterns (reference and code patterns).
39    pub conversation_patterns: Vec<ConversationPattern>,
40}
41
42/// AI-based memory extractor using a fast/cheap model.
43pub struct AiMemoryExtractor {
44    provider: Box<dyn crate::providers::Provider>,
45    model: String,
46}
47
48impl AiMemoryExtractor {
49    /// Create a new AI memory extractor.
50    pub fn new(provider: Box<dyn crate::providers::Provider>, model: String) -> Self {
51        Self { provider, model }
52    }
53
54    /// Create a minimal extractor (for background tasks, uses simplified prompt).
55    /// This is more efficient for non-blocking background extraction.
56    pub fn new_minimal(model: String) -> Self {
57        // Create a minimal provider that uses the global config
58        // This is for background tasks, so we use a simplified approach
59        Self {
60            provider: crate::create_minimal_provider(&model),
61            model,
62        }
63    }
64}
65
66const MEMORY_EXTRACT_SYSTEM_PROMPT: &str = r#"你是记忆提取助手。从对话中提取值得长期记忆的关键信息。
67
68# 记忆类型
69
70<types>
71<type>
72    <name>decision</name>
73    <description>项目或技术选型的决定</description>
74    <when_to_save>用户明确做出技术决策时</when_to_save>
75    <body_structure>先写决策内容,然后 **Why:** 决策原因,**Context:** 适用场景</body_structure>
76</type>
77<type>
78    <name>preference</name>
79    <description>用户习惯或偏好</description>
80    <when_to_save>用户表达"我喜欢/习惯/偏好"时</when_to_save>
81    <body_structure>先写偏好内容,然后 **Why:** 偏好原因(如有)</body_structure>
82</type>
83<type>
84    <name>solution</name>
85    <description>解决问题的具体方法</description>
86    <when_to_save>问题成功解决且方法可复用时</when_to_save>
87    <body_structure>先写解决方案,然后 **Problem:** 解决的问题,**Key:** 关键步骤</body_structure>
88</type>
89<type>
90    <name>finding</name>
91    <description>重要发现或信息</description>
92    <when_to_save>发现非显而易见的信息时</when_to_save>
93</type>
94<type>
95    <name>technical</name>
96    <description>技术栈或框架信息</description>
97    <when_to_save>确认项目使用的技术时</when_to_save>
98</type>
99<type>
100    <name>structure</name>
101    <description>项目结构信息(重要!)</description>
102    <when_to_save>发现关键模块位置、核心文件路径、代码组织方式时</when_to_save>
103    <body_structure>先写结构描述,然后 **Location:** 具体路径,**Purpose:** 模块职责</body_structure>
104    <example>"上下文压缩模块位于 packages/core/src/compress/。**Location:** packages/core/src/compress/compressor.rs 是核心入口,**Purpose:** 负责上下文 token 优化"</example>
105</type>
106</types>
107
108# 不要保存什么到记忆中
109
110- Git 历史、最近更改 — git log/blame 是权威来源
111- 临时状态:进行中的任务、当前对话上下文
112- 错误信息和调试细节 — 问题解决后无需保留
113- 临时文件路径、临时变量名
114
115# 重要:应该保存的结构信息
116
117项目结构信息(structure 类型)应该保存,包括:
118- 关键模块的位置(如 "compress 模块在 packages/core/src/compress/")
119- 核心文件的功能(如 "agent/streaming.rs 负责流式响应处理")
120- 常见问题的定位路径(如 "上下文大小判断在 compressor.rs 的 estimate_tokens 函数")
121- 代码组织模式(如 "providers 模块实现了 Provider trait")
122
123这些信息能大幅减少未来会话的探索时间!
124
125# 对话模式提取
126
127当对话文本较长时(超过500字符),还要提取对话中使用的模式:
128
1291. **引用模式 (reference)**:用户如何引用之前的内容
130   - 示例:"正如前面所说"、"接着刚才的话题"、"as mentioned"、"previously"
131
1322. **代码模式 (code)**:对话中涉及的代码风格关键词
133   - 示例:语言关键词(fn, function, class)、代码块标记(```)
134
135模式提取规则:
136- 只提取明确出现的模式,不要推测
137- confidence 范围 0.0-1.0,越常见越低(常见模式置信度低)
138- 只在文本 > 500 字符时提取模式
139
140# 输出格式
141
142严格 JSON:
143{
144  "memories": [
145    {
146      "category": "decision",
147      "content": "采用 PostgreSQL 作为主数据库。**Why:** 性能要求和团队经验",
148      "importance": 85,
149      "keywords": ["PostgreSQL", "数据库", "database"],
150      "tags": ["backend", "storage"]
151    }
152  ],
153  "focus_points": [],
154  "conversation_patterns": [
155    {
156      "pattern_type": "reference",
157      "pattern": "正如我所说",
158      "confidence": 0.8
159    },
160    {
161      "pattern_type": "code",
162      "pattern": "fn ",
163      "confidence": 0.6
164    }
165  ]
166}
167
168关键词提取:3-5 个核心关键词(技术名词、项目名、关键概念)
169标签提取:1-3 个分类标签(backend、frontend、config、auth 等)
170
171只返回 JSON,不要其他解释。"#;
172
173#[async_trait::async_trait]
174impl MemoryExtractor for AiMemoryExtractor {
175    async fn extract(
176        &self,
177        text: &str,
178        session_id: Option<&str>,
179        project_path: Option<&str>,
180    ) -> Result<ExtractionResult> {
181        use crate::providers::{ChatRequest, Message, MessageContent, Role};
182
183        // Safely truncate to ~4000 chars respecting UTF-8 boundaries
184        let truncated = truncate_chars(text, 4000);
185
186        let request = ChatRequest {
187            messages: vec![Message {
188                role: Role::User,
189                content: MessageContent::Text(format!(
190                    "请从以下对话中提取值得记忆的关键信息和当前聚焦点:\n\n{}",
191                    truncated
192                )),
193            }],
194            tools: vec![],
195            system: Some(MEMORY_EXTRACT_SYSTEM_PROMPT.to_string()),
196            think: false,
197            max_tokens: 512,
198            server_tools: vec![],
199            enable_caching: false,
200        };
201
202        let response = self.provider.chat(request).await?;
203
204        let response_text = response
205            .content
206            .iter()
207            .filter_map(|b| {
208                if let crate::providers::ContentBlock::Text { text } = b {
209                    Some(text.clone())
210                } else {
211                    None
212                }
213            })
214            .collect::<Vec<_>>()
215            .join("");
216
217        parse_memory_response(&response_text, session_id, project_path)
218    }
219
220    fn model_name(&self) -> &str {
221        &self.model
222    }
223}
224
225fn parse_memory_response(
226    json_text: &str,
227    session_id: Option<&str>,
228    project_path: Option<&str>,
229) -> Result<ExtractionResult> {
230    let cleaned = json_text
231        .trim()
232        .trim_start_matches("```json")
233        .trim_start_matches("```")
234        .trim_end_matches("```")
235        .trim();
236
237    #[derive(Deserialize)]
238    struct MemoryResponse {
239        memories: Vec<MemoryItem>,
240        #[serde(default)]
241        focus_points: Vec<FocusPointItem>,
242        #[serde(default)]
243        conversation_patterns: Vec<ConversationPatternItem>,
244    }
245
246    #[derive(Deserialize)]
247    struct MemoryItem {
248        category: String,
249        content: String,
250        #[serde(default)]
251        importance: f64,
252        #[serde(default)]
253        keywords: Vec<String>,
254        #[serde(default)]
255        tags: Vec<String>,
256    }
257
258    #[derive(Deserialize)]
259    struct FocusPointItem {
260        topic: String,
261        #[serde(default)]
262        keywords: Vec<String>,
263        #[serde(default)]
264        entities: Vec<String>,
265        #[serde(default)]
266        core_question: Option<String>,
267        #[serde(default = "default_importance")]
268        importance: f32,
269        #[serde(default = "default_is_current")]
270        is_current: bool,
271    }
272
273    #[derive(Deserialize)]
274    struct ConversationPatternItem {
275        pattern_type: String,
276        pattern: String,
277        #[serde(default)]
278        confidence: f32,
279    }
280
281    fn default_importance() -> f32 { 0.7 }
282    fn default_is_current() -> bool { true }
283
284    let parsed: MemoryResponse = serde_json::from_str(cleaned)?;
285
286    // Parse memories
287    let entries = parsed
288        .memories
289        .into_iter()
290        .filter_map(|item| {
291            let category = match item.category.to_lowercase().as_str() {
292                "decision" => MemoryCategory::Decision,
293                "preference" => MemoryCategory::Preference,
294                "solution" => MemoryCategory::Solution,
295                "finding" => MemoryCategory::Finding,
296                "technical" => MemoryCategory::Technical,
297                "structure" => MemoryCategory::Structure,
298                _ => return None,
299            };
300
301            if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
302                return None;
303            }
304
305            let mut entry = MemoryEntry::new(
306                category,
307                item.content,
308                session_id.map(|s| s.to_string()),
309                project_path.map(|p| p.to_string()),
310            );
311            if item.importance > 0.0 {
312                entry.importance = item.importance.clamp(0.0, 100.0);
313            }
314            // Add AI-extracted keywords and tags with filtering
315            let valid_keywords: Vec<String> = item.keywords
316                .iter()
317                .filter(|k| k.len() >= 2 && !is_noise_word(k))
318                .cloned()
319                .collect();
320            
321            let valid_tags: Vec<String> = item.tags
322                .iter()
323                .filter(|t| t.len() >= 2 && !is_noise_word(t))
324                .cloned()
325                .collect();
326            
327            entry.tags.extend(valid_keywords);
328            entry.tags.extend(valid_tags);
329            entry.tags.dedup();
330            
331            // Limit tag count to avoid overwhelming
332            if entry.tags.len() > 10 {
333                entry.tags.truncate(10);
334            }
335
336            Some(entry)
337        })
338        .collect();
339
340    // Parse focus points
341    use chrono::Utc;
342    use crate::compress::FocusStatus;
343
344    let focus_points = parsed
345        .focus_points
346        .into_iter()
347        .map(|item| {
348            let mut focus = FocusPoint::new(
349                format!("focus-{}", Utc::now().timestamp()),
350                item.topic,
351                item.keywords,
352                item.entities,
353                item.core_question,
354                0,
355            );
356            focus.importance = item.importance.clamp(0.0, 1.0);
357            if !item.is_current {
358                focus.status = FocusStatus::Suspended;
359            }
360            focus
361        })
362        .collect();
363
364    // Parse conversation patterns
365    let conversation_patterns = parsed
366        .conversation_patterns
367        .into_iter()
368        .filter_map(|item| {
369            // Parse pattern type
370            let pattern_type = match item.pattern_type.to_lowercase().as_str() {
371                "reference" => PatternType::Reference,
372                "code" => PatternType::Code,
373                _ => return None, // Skip unknown pattern types
374            };
375
376            // Skip empty patterns
377            if item.pattern.trim().is_empty() {
378                return None;
379            }
380
381            // Create pattern with UserConversation source
382            let mut pattern = ConversationPattern::new(
383                pattern_type,
384                item.pattern,
385                PatternSource::UserConversation {
386                    example: String::new(), // Will be filled when pattern is used
387                },
388            );
389
390            // Set confidence (default to 0.5 if not specified or out of range)
391            pattern.confidence = if item.confidence > 0.0 {
392                item.confidence.clamp(0.0, 1.0)
393            } else {
394                0.5
395            };
396
397            Some(pattern)
398        })
399        .collect();
400
401    Ok(ExtractionResult {
402        memories: deduplicate_entries(entries),
403        focus_points,
404        conversation_patterns,
405    })
406}
407
408fn deduplicate_entries(entries: Vec<MemoryEntry>) -> Vec<MemoryEntry> {
409    let mut seen: Vec<String> = Vec::new();
410    entries
411        .into_iter()
412        .filter(|e| {
413            let content_lower = e.content.to_lowercase();
414            if seen.iter().any(|s| {
415                AutoMemory::calculate_similarity(s, &content_lower) >= SIMILARITY_THRESHOLD
416            }) {
417                false
418            } else {
419                seen.push(content_lower);
420                true
421            }
422        })
423        .take(MAX_DETECTED_ENTRIES)
424        .collect()
425}
426
427// ============================================================================
428// Rule-based Detection (uses KeywordsConfig)
429// ============================================================================
430
431/// Detect memories from text using hard-coded patterns.
432pub fn detect_memories_fallback(
433    text: &str,
434    session_id: Option<&str>,
435    project_path: Option<&str>,
436) -> Vec<MemoryEntry> {
437    let mut entries = Vec::new();
438    let text_lower = text.to_lowercase();
439
440    // Hard-coded patterns for each category
441    let patterns = [
442        (
443            MemoryCategory::Decision,
444            ["决定", "选择", "采用", "定下", "decided", "chose"],
445        ),
446        (
447            MemoryCategory::Preference,
448            ["偏好", "习惯", "喜欢", "首选", "prefer", "like"],
449        ),
450        (
451            MemoryCategory::Solution,
452            ["解决", "修复", "搞定", "改成", "fixed", "solved"],
453        ),
454        (
455            MemoryCategory::Finding,
456            ["发现", "原来", "原因", "定位", "found", "reason"],
457        ),
458        (
459            MemoryCategory::Technical,
460            ["技术栈", "框架", "用的", "基于", "stack", "using"],
461        ),
462        (
463            MemoryCategory::Structure,
464            ["入口", "主文件", "目录", "位于", "entry", "main"],
465        ),
466    ];
467
468    for (category, keywords) in patterns {
469        for keyword in keywords {
470            if text_lower.contains(&keyword.to_lowercase()) {
471                let content = extract_memory_content(text, keyword);
472                if !content.is_empty() && content.len() >= MIN_MEMORY_CONTENT_LENGTH {
473                    entries.push(MemoryEntry::new(
474                        category,
475                        content,
476                        session_id.map(|s| s.to_string()),
477                        project_path.map(|p| p.to_string()),
478                    ));
479                }
480            }
481        }
482    }
483
484    deduplicate_entries(entries)
485}
486
487/// Detect memories from text (wrapper for fallback).
488pub fn detect_memories_from_text(
489    text: &str,
490    session_id: Option<&str>,
491    project_path: Option<&str>,
492) -> Vec<MemoryEntry> {
493    detect_memories_fallback(text, session_id, project_path)
494}
495
496/// Smart detection: AI-first with rule-based fallback.
497///
498/// Priority order:
499/// 1. AI extraction (if text > 200 chars and extractor available)
500/// 2. Rule-based fallback (if AI fails or text too short)
501pub async fn detect_memories_smart(
502    text: &str,
503    session_id: Option<&str>,
504    project_path: Option<&str>,
505    extractor: Option<&AiMemoryExtractor>,
506) -> ExtractionResult {
507    let mode = AiDetectionMode::from_env();
508    let text_len = text.len();
509
510    // Determine if we should try AI first
511    // Only use AI for text > 200 chars (avoid API overhead for short texts)
512    let should_try_ai = mode != AiDetectionMode::Never && extractor.is_some() && text_len > 200;
513
514    // Debug log: show method and model
515    let model_name = extractor.map(|e| e.model_name()).unwrap_or("none");
516    crate::debug::debug_log().memory_ai_detection(
517        model_name,
518        0, // Will update after detection
519        text_len,
520        should_try_ai,
521    );
522
523    if should_try_ai && let Some(ex) = extractor {
524        if let Ok(result) = ex.extract(text, session_id, project_path).await {
525            // AI succeeded - use AI results entirely (skip hardcoded rules)
526            // Debug log: AI result
527            crate::debug::debug_log().memory_ai_detection(
528                ex.model_name(),
529                result.memories.len(),
530                text_len,
531                true,
532            );
533            return result;
534        }
535        // AI failed - try rule-based fallback for critical memories only
536        log::warn!("AI memory extraction failed, trying rule-based fallback for critical memories");
537        
538        // Extract only the most critical memory types (avoid noise)
539        let critical_memories = detect_critical_memories(text, session_id, project_path);
540        
541        crate::debug::debug_log().memory_ai_detection(
542            "rule-fallback",
543            critical_memories.len(),
544            text_len,
545            false,
546        );
547        
548        return ExtractionResult {
549            memories: critical_memories,
550            focus_points: vec![],
551            conversation_patterns: vec![],
552        };
553    }
554
555    // For short texts (< 200 chars), skip detection entirely (per user request)
556    // No rule-based fallback
557    ExtractionResult {
558        memories: vec![],
559        focus_points: vec![],
560        conversation_patterns: vec![],
561    }
562}
563
564/// Detect critical memories using rule-based patterns (fallback when AI fails).
565/// Only extracts high-value memory types: structure, technical, decision.
566fn detect_critical_memories(
567    text: &str,
568    session_id: Option<&str>,
569    project_path: Option<&str>,
570) -> Vec<MemoryEntry> {
571    // Critical patterns with high importance
572    let critical_patterns = [
573        // Structure information (highest priority)
574        (MemoryCategory::Structure, ["位于", "入口", "模块", "packages/", "src/"], 85.0),
575        // Technical information (high priority)
576        (MemoryCategory::Technical, ["技术栈", "框架", "基于", "使用", ""], 80.0),
577        // Decision information (important)
578        (MemoryCategory::Decision, ["决定", "选择", "采用", "", ""], 75.0),
579    ];
580    
581    let mut entries = Vec::new();
582    let text_lower = text.to_lowercase();
583    
584    for (category, keywords, importance) in critical_patterns {
585        // Check if any keyword matches
586        for keyword in keywords {
587            if text_lower.contains(&keyword.to_lowercase()) {
588                let content = extract_memory_content(text, keyword);
589                
590                if !content.is_empty() && content.len() >= MIN_MEMORY_CONTENT_LENGTH {
591                    let mut entry = MemoryEntry::new(
592                        category,
593                        content,
594                        session_id.map(|s| s.to_string()),
595                        project_path.map(|p| p.to_string()),
596                    );
597                    // Set importance for critical memories
598                    entry.importance = importance;
599                    entries.push(entry);
600                    break; // Only one entry per category
601                }
602            }
603        }
604    }
605    
606    // Deduplicate and limit
607    deduplicate_entries(entries)
608}
609
610fn extract_memory_content(text: &str, keyword: &str) -> String {
611    let text_lower = text.to_lowercase();
612    let keyword_lower = keyword.to_lowercase();
613
614    let pos = match text_lower.find(&keyword_lower) {
615        Some(p) => p,
616        None => return String::new(),
617    };
618
619    // Improved sentence boundary detection
620    let start = find_sentence_start(text, pos);
621    let end = find_sentence_end(text, pos);
622
623    let sentence = text[start..end].trim();
624
625    // Clean and format content
626    let cleaned = clean_memory_content(sentence);
627
628    if cleaned.len() > MAX_MEMORY_CONTENT_LENGTH {
629        // Intelligent truncation: preserve key information
630        truncate_intelligently(&cleaned, MAX_MEMORY_CONTENT_LENGTH)
631    } else {
632        cleaned
633    }
634}
635
636/// Find sentence start position (improved boundary detection).
637fn find_sentence_start(text: &str, pos: usize) -> usize {
638    // Look backwards for sentence boundary markers
639    let mut start = pos;
640    while start > 0 {
641        let prev_chars: Vec<char> = text.chars().collect();
642        let ch = prev_chars[start - 1];
643        // Sentence boundary markers
644        if ch == '.' || ch == '。' || ch == '\n' || ch == '!' || ch == '?' || ch == '!' || ch == '?' {
645            return start;
646        }
647        // Avoid splitting code blocks (check for ```)
648        if start >= 3 {
649            let slice: String = prev_chars[start - 3..start].iter().collect();
650            if slice == "```" {
651                return start - 3;
652            }
653        }
654        start -= 1;
655    }
656    0
657}
658
659/// Find sentence end position (improved boundary detection).
660fn find_sentence_end(text: &str, pos: usize) -> usize {
661    // Look forward for sentence boundary markers
662    let chars: Vec<char> = text.chars().collect();
663    let mut end = pos;
664    while end < chars.len() {
665        let ch = chars[end];
666        // Sentence boundary markers
667        if ch == '.' || ch == '。' || ch == '\n' || ch == '!' || ch == '?' || ch == '!' || ch == '?' {
668            return end + 1;
669        }
670        // Avoid splitting code blocks (check for ```)
671        if end + 3 <= chars.len() {
672            let slice: String = chars[end..end + 3].iter().collect();
673            if slice == "```" {
674                return end + 3;
675            }
676        }
677        end += 1;
678    }
679    text.len()
680}
681
682/// Clean memory content: remove Markdown markers, normalize format.
683fn clean_memory_content(content: &str) -> String {
684    // Remove Markdown bold/italic markers
685    let cleaned = content
686        .replace("**Why:**", "原因:")
687        .replace("**Context:**", "场景:")
688        .replace("**Location:**", "位置:")
689        .replace("**Purpose:**", "功能:")
690        .replace("**Problem:**", "问题:")
691        .replace("**Key:**", "关键:")
692        .replace("**", "")
693        .replace("`", "")
694        .replace("#", "");
695    
696    // Normalize whitespace
697    let cleaned = cleaned
698        .split_whitespace()
699        .collect::<Vec<_>>()
700        .join(" ");
701    
702    cleaned.trim().to_string()
703}
704
705/// Intelligent truncation: preserve key information (paths, names).
706fn truncate_intelligently(text: &str, max_len: usize) -> String {
707    if text.len() <= max_len {
708        return text.to_string();
709    }
710
711    // Try to preserve important parts:
712    // 1. Keep "Location:" information if present
713    // 2. Keep "Purpose:" information if present
714    // 3. Keep technical names/paths
715    
716    let parts: Vec<&str> = text.split_whitespace().collect();
717    let mut result = Vec::new();
718    let mut current_len = 0;
719    
720    // Priority keywords to keep
721    let priority_keywords = ["位置:", "Location:", "功能:", "Purpose:", "packages/", "src/", ".rs", ".ts", ".js", ".py"];
722    
723    // First pass: collect priority parts (use reference to avoid move)
724    for &part in &parts {
725        if priority_keywords.iter().any(|k| part.contains(k)) {
726            if current_len + part.len() + 1 <= max_len {
727                result.push(part);
728                current_len += part.len() + 1;
729            }
730        }
731    }
732    
733    // Second pass: add other parts if space available (use reference again)
734    if result.is_empty() || current_len < max_len / 2 {
735        for &part in &parts {
736            if !result.contains(&part) && current_len + part.len() + 1 <= max_len {
737                result.push(part);
738                current_len += part.len() + 1;
739            }
740        }
741    }
742    
743    if result.is_empty() {
744        // Simple truncation as last resort
745        text.chars().take(max_len).collect()
746    } else {
747        result.join(" ")
748    }
749}
750
751/// Infer category from content.
752pub fn infer_category_from_content(content: &str) -> MemoryCategory {
753    let lower = content.to_lowercase();
754
755    if lower.contains("决定")
756        || lower.contains("选择")
757        || lower.contains("采用")
758        || lower.contains("decided")
759    {
760        return MemoryCategory::Decision;
761    }
762    if lower.contains("喜欢")
763        || lower.contains("偏好")
764        || lower.contains("习惯")
765        || lower.contains("prefer")
766    {
767        return MemoryCategory::Preference;
768    }
769    if lower.contains("解决")
770        || lower.contains("修复")
771        || lower.contains("搞定")
772        || lower.contains("fixed")
773    {
774        return MemoryCategory::Solution;
775    }
776    if lower.contains("发现")
777        || lower.contains("原因")
778        || lower.contains("原来")
779        || lower.contains("found")
780    {
781        return MemoryCategory::Finding;
782    }
783    if lower.contains("技术")
784        || lower.contains("框架")
785        || lower.contains("库")
786        || lower.contains("tech")
787    {
788        return MemoryCategory::Technical;
789    }
790    if lower.contains("文件")
791        || lower.contains("目录")
792        || lower.contains("入口")
793        || lower.contains("file")
794    {
795        return MemoryCategory::Structure;
796    }
797
798    MemoryCategory::Finding // Default
799}
800
801// ============================================================================
802// Unified Extraction (One AI Call for All Information)
803// ============================================================================
804
805/// Unified extraction system prompt for extracting all information in one call.
806const UNIFIED_EXTRACTION_PROMPT: &str = r#"你是信息提取助手。从对话中一次性提取以下信息:
807
808## 1. 长期记忆 (memories) - 最重要!
809- decision: 技术决策(如"决定使用 PostgreSQL"、"采用 React 架构")
810- preference: 用户偏好(如"我喜欢简洁的代码风格"、"习惯用 VS Code")
811- solution: 解决方案(如"通过添加缓存解决了性能问题")
812- finding: 重要发现(如"发现内存泄漏的原因")
813- technical: 技术栈(如"项目使用 Rust + Tokio")
814- structure: **项目结构信息(优先保存!)**(如"compress 模块在 packages/core/src/compress/"、"上下文判断逻辑在 compressor.rs:518")
815
816## 结构信息的重要性
817
818项目结构信息(structure 类型)能大幅减少未来会话的探索时间,必须保存:
819- 关键模块位置:"Agent 循环在 packages/core/src/agent/run.rs"
820- 核心文件功能:"streaming.rs 负责 API 流式响应处理"
821- 问题定位路径:"上下文大小判断在 estimate_tokens 函数(compressor.rs:518-561)"
822- 代码组织模式:"providers 模块实现了 Provider trait"
823
824## 2. 当前焦点 (focus_points)
825- topic: 当前讨论的主题
826- keywords: 相关关键词
827- entities: 涉及的文件/函数/类名
828- core_question: 核心问题(可选)
829
830## 3. 对话模式 (conversation_patterns)
831- reference: 引用模式(如"正如前面所说"、"as mentioned"、"previously")
832- code: 代码模式(如"fn ", "function", "```", "class ")
833
834## 4. 焦点关键词 (focus_keywords)
835- transition: 话题转换词(如"换个话题", "switching", "however", "等等")
836- question: 提问词(如"怎么", "how", "为什么", "why", "请问")
837- task: 任务词(如"帮我", "implement", "创建", "create", "修复")
838- tech: 技术词(如"rust", "数据库", "api", "性能", "优化")
839
840## 输出格式(严格 JSON)
841
842```json
843{
844  "memories": [
845    {
846      "category": "structure",
847      "content": "上下文压缩模块位于 packages/core/src/compress/。**Location:** compressor.rs:518-561 是 estimate_tokens 函数,**Purpose:** 计算上下文 token 数量",
848      "importance": 80,
849      "keywords": ["compress", "estimate_tokens", "context"],
850      "tags": ["core", "context-management"]
851    },
852    {
853      "category": "decision",
854      "content": "采用 PostgreSQL 作为主数据库。**Why:** 性能要求",
855      "importance": 85,
856      "keywords": ["PostgreSQL", "数据库"],
857      "tags": ["backend", "storage"]
858    }
859  ],
860  "focus_points": [
861    {
862      "topic": "API 设计优化",
863      "keywords": ["API", "REST", "性能"],
864      "entities": ["api.rs", "handler"],
865      "core_question": "如何优化 API 响应时间?",
866      "importance": 0.8,
867      "is_current": true
868    }
869  ],
870  "conversation_patterns": [
871    {
872      "pattern_type": "reference",
873      "pattern": "正如我所说",
874      "confidence": 0.8
875    },
876    {
877      "pattern_type": "code",
878      "pattern": "fn ",
879      "confidence": 0.6
880    }
881  ],
882  "focus_keywords": {
883    "transition": ["换个话题", "switching"],
884    "question": ["怎么", "how"],
885    "task": ["帮我", "implement"],
886    "tech": ["rust", "性能"]
887  }
888}
889```
890
891## 规则
8921. structure 类型的记忆优先级最高,发现就保存
8932. 只提取明确出现的信息,不要推测
8943. 如果某类信息没有,返回空数组/对象
8954. importance 范围:memories 0-100,focus_points 0.0-1.0
8965. confidence 范围:0.0-1.0,常见模式置信度较低
8976. 关键词提取 3-5 个核心关键词
8987. 只返回 JSON,不要其他解释"#;
899
900/// Unified extraction prompt with focus selection.
901/// This prompt includes existing focuses and asks AI to select or create focus.
902const UNIFIED_EXTRACTION_WITH_FOCUS_PROMPT: &str = r#"你是信息提取和焦点决策助手。从对话中一次性完成以下任务:
903
904## 1. 焦点决策 (focus_decision) - 最重要!
905
906你会收到当前已有的焦点列表。请判断:
907
908### 选择现有焦点
909如果最新对话与某个现有焦点匹配:
910- selected_focus_id: 该焦点的 ID
911- need_new_focus: false
912- confidence: 匹配置信度 (0.0-1.0)
913
914### 创建新焦点
915如果没有任何现有焦点匹配:
916- selected_focus_id: null
917- need_new_focus: true
918- new_focus_topic: 新焦点主题
919- new_core_question: 核心问题
920- confidence: 创建置信度
921
922### 判断话题切换
923- is_topic_switch: 是否从某焦点切换到另一焦点
924- previous_focus_id: 切换前的焦点 ID(如果有)
925
926### 焦点类型 (focus_type)
927- problem_solving: 修复 bug、解决错误
928- task_execution: 实现功能、完成任务
929- knowledge_exploration: 学习、研究、探索
930- decision_making: 技术选型、架构设计
931- code_optimization: 性能优化、重构
932- general: 一般对话
933
934## 2. 长期记忆 (memories)
935- decision: 技术决策
936- preference: 用户偏好
937- solution: 解决方案
938- finding: 重要发现
939- technical: 技术栈
940- structure: 项目结构
941
942## 3. 焦点关键词 (focus_keywords)
943- transition: 话题转换词
944- question: 提问词
945- task: 任务词
946- tech: 技术词
947
948## 输出格式(严格 JSON)
949
950```json
951{
952  "focus_decision": {
953    "selected_focus_id": "focus-1",
954    "need_new_focus": false,
955    "new_focus_topic": null,
956    "new_core_question": null,
957    "confidence": 0.85,
958    "focus_type": "code_optimization",
959    "is_topic_switch": true,
960    "previous_focus_id": "focus-2",
961    "focus_keywords": ["API", "latency", "performance"],
962    "related_entities": ["api.rs", "handle_request()"],
963    "reasoning": "用户从数据库切换到 API 性能话题"
964  },
965  "memories": [...],
966  "focus_keywords": {
967    "transition": ["换个话题"],
968    "question": ["怎么"],
969    "task": ["优化"],
970    "tech": ["api", "性能"]
971  }
972}
973```
974
975## 规则
9761. focus_decision 是最重要的输出,必须仔细判断
9772. 现有焦点列表会随对话文本一起提供
9783. 如果现有焦点都不匹配,必须标记 need_new_focus=true
9794. confidence 反映你对决策的确信程度
9805. 只返回 JSON,不要其他解释"#;
981
982/// Unified extractor that extracts all information in a single AI call.
983///
984/// This replaces the separate AiMemoryExtractor and FocusExtractor,
985/// reducing API calls and providing consistent extraction.
986pub struct UnifiedExtractor {
987    provider: Box<dyn crate::providers::Provider>,
988    model: String,
989}
990
991impl UnifiedExtractor {
992    /// Create a new unified extractor.
993    pub fn new(provider: Box<dyn crate::providers::Provider>, model: String) -> Self {
994        Self { provider, model }
995    }
996
997    /// Create a minimal unified extractor for background tasks.
998    pub fn new_minimal(model: String) -> Self {
999        Self {
1000            provider: crate::create_minimal_provider(&model),
1001            model,
1002        }
1003    }
1004
1005    /// Extract all information from conversation text in a single AI call.
1006    pub async fn extract_unified(
1007        &self,
1008        text: &str,
1009        session_id: Option<&str>,
1010        project_path: Option<&str>,
1011    ) -> Result<UnifiedExtractionResult> {
1012        use crate::providers::{ChatRequest, Message, MessageContent, Role};
1013
1014        // Safely truncate to ~4000 chars respecting UTF-8 boundaries
1015        let truncated = truncate_chars(text, 4000);
1016
1017        let request = ChatRequest {
1018            messages: vec![Message {
1019                role: Role::User,
1020                content: MessageContent::Text(format!(
1021                    "请从以下对话中提取所有信息:\n\n{}",
1022                    truncated
1023                )),
1024            }],
1025            tools: vec![],
1026            system: Some(UNIFIED_EXTRACTION_PROMPT.to_string()),
1027            think: false,
1028            max_tokens: 1024, // Larger token limit for unified extraction
1029            server_tools: vec![],
1030            enable_caching: false,
1031        };
1032
1033        let response = self.provider.chat(request).await?;
1034
1035        let response_text = response
1036            .content
1037            .iter()
1038            .filter_map(|b| {
1039                if let crate::providers::ContentBlock::Text { text } = b {
1040                    Some(text.clone())
1041                } else {
1042                    None
1043                }
1044            })
1045            .collect::<Vec<_>>()
1046            .join("");
1047
1048        parse_unified_response(&response_text, session_id, project_path)
1049    }
1050
1051    /// Extract all information WITH focus selection in a single AI call.
1052    ///
1053    /// This method receives existing focuses and asks AI to select the best match
1054    /// or create a new focus if none matches. This ensures focus continuity.
1055    ///
1056    /// # Arguments
1057    /// * `text` - Conversation text to analyze
1058    /// * `existing_foci` - Current focus points from FocusManager (id, topic, keywords)
1059    /// * `session_id` - Optional session ID
1060    /// * `project_path` - Optional project path
1061    ///
1062    /// # Returns
1063    /// UnifiedExtractionResult with focus_decision field populated
1064    pub async fn extract_unified_with_foci(
1065        &self,
1066        text: &str,
1067        existing_foci: &[(&str, &str, &[String])], // (id, topic, keywords)
1068        session_id: Option<&str>,
1069        project_path: Option<&str>,
1070    ) -> Result<UnifiedExtractionResult> {
1071        use crate::providers::{ChatRequest, Message, MessageContent, Role};
1072
1073        // Safely truncate to ~4000 chars respecting UTF-8 boundaries
1074        let truncated = truncate_chars(text, 4000);
1075
1076        // Format existing focuses for AI
1077        let foci_text = if existing_foci.is_empty() {
1078            "(当前没有现有焦点)".to_string()
1079        } else {
1080            let mut foci_list = Vec::new();
1081            for (id, topic, keywords) in existing_foci {
1082                foci_list.push(format!(
1083                    "- ID: {}\n  主题: {}\n  关键词: {}",
1084                    id,
1085                    topic,
1086                    keywords.join(", ")
1087                ));
1088            }
1089            format!("现有焦点列表:\n{}", foci_list.join("\n"))
1090        };
1091
1092        let user_prompt = format!(
1093            "{}\n\n最新对话:\n{}\n\n请判断最新对话与现有焦点的匹配关系,并做出焦点决策。",
1094            foci_text,
1095            truncated
1096        );
1097
1098        let request = ChatRequest {
1099            messages: vec![Message {
1100                role: Role::User,
1101                content: MessageContent::Text(user_prompt),
1102            }],
1103            tools: vec![],
1104            system: Some(UNIFIED_EXTRACTION_WITH_FOCUS_PROMPT.to_string()),
1105            think: false,
1106            max_tokens: 1024,
1107            server_tools: vec![],
1108            enable_caching: false,
1109        };
1110
1111        let response = self.provider.chat(request).await?;
1112
1113        let response_text = response
1114            .content
1115            .iter()
1116            .filter_map(|b| {
1117                if let crate::providers::ContentBlock::Text { text } = b {
1118                    Some(text.clone())
1119                } else {
1120                    None
1121                }
1122            })
1123            .collect::<Vec<_>>()
1124            .join("");
1125
1126        parse_unified_response_with_focus(&response_text, session_id, project_path)
1127    }
1128
1129    /// Get the model name used for extraction.
1130    pub fn model_name(&self) -> &str {
1131        &self.model
1132    }
1133}
1134
1135/// Parse unified extraction response from AI.
1136fn parse_unified_response(
1137    json_text: &str,
1138    session_id: Option<&str>,
1139    project_path: Option<&str>,
1140) -> Result<UnifiedExtractionResult> {
1141    let cleaned = json_text
1142        .trim()
1143        .trim_start_matches("```json")
1144        .trim_start_matches("```")
1145        .trim_end_matches("```")
1146        .trim();
1147
1148    #[derive(Deserialize)]
1149    struct UnifiedResponse {
1150        #[serde(default)]
1151        memories: Vec<MemoryItem>,
1152        #[serde(default)]
1153        focus_points: Vec<FocusPointItem>,
1154        #[serde(default)]
1155        conversation_patterns: Vec<ConversationPatternItem>,
1156        #[serde(default)]
1157        focus_keywords: FocusKeywordsItem,
1158    }
1159
1160    #[derive(Deserialize, Default)]
1161    struct FocusKeywordsItem {
1162        #[serde(default)]
1163        transition: Vec<String>,
1164        #[serde(default)]
1165        question: Vec<String>,
1166        #[serde(default)]
1167        task: Vec<String>,
1168        #[serde(default)]
1169        tech: Vec<String>,
1170    }
1171
1172    #[derive(Deserialize)]
1173    struct MemoryItem {
1174        category: String,
1175        content: String,
1176        #[serde(default)]
1177        importance: f64,
1178        #[serde(default)]
1179        keywords: Vec<String>,
1180        #[serde(default)]
1181        tags: Vec<String>,
1182    }
1183
1184    #[derive(Deserialize)]
1185    struct FocusPointItem {
1186        topic: String,
1187        #[serde(default)]
1188        keywords: Vec<String>,
1189        #[serde(default)]
1190        entities: Vec<String>,
1191        #[serde(default)]
1192        core_question: Option<String>,
1193        #[serde(default = "default_importance")]
1194        importance: f32,
1195        #[serde(default = "default_is_current")]
1196        is_current: bool,
1197    }
1198
1199    #[derive(Deserialize)]
1200    struct ConversationPatternItem {
1201        pattern_type: String,
1202        pattern: String,
1203        #[serde(default)]
1204        confidence: f32,
1205    }
1206
1207    fn default_importance() -> f32 { 0.7 }
1208    fn default_is_current() -> bool { true }
1209
1210    let parsed: UnifiedResponse = serde_json::from_str(cleaned)?;
1211
1212    // Parse memories (reuse existing logic)
1213    let entries = parsed
1214        .memories
1215        .into_iter()
1216        .filter_map(|item| {
1217            let category = match item.category.to_lowercase().as_str() {
1218                "decision" => MemoryCategory::Decision,
1219                "preference" => MemoryCategory::Preference,
1220                "solution" => MemoryCategory::Solution,
1221                "finding" => MemoryCategory::Finding,
1222                "technical" => MemoryCategory::Technical,
1223                "structure" => MemoryCategory::Structure,
1224                _ => return None,
1225            };
1226
1227            if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
1228                return None;
1229            }
1230
1231            let mut entry = MemoryEntry::new(
1232                category,
1233                item.content,
1234                session_id.map(|s| s.to_string()),
1235                project_path.map(|p| p.to_string()),
1236            );
1237            if item.importance > 0.0 {
1238                entry.importance = item.importance.clamp(0.0, 100.0);
1239            }
1240            if !item.keywords.is_empty() {
1241                entry.tags.extend(item.keywords);
1242            }
1243            if !item.tags.is_empty() {
1244                entry.tags.extend(item.tags);
1245            }
1246            entry.tags.dedup();
1247
1248            Some(entry)
1249        })
1250        .collect();
1251
1252    // Parse focus points (reuse existing logic)
1253    use chrono::Utc;
1254    use crate::compress::FocusStatus;
1255
1256    let focus_points = parsed
1257        .focus_points
1258        .into_iter()
1259        .map(|item| {
1260            let mut focus = FocusPoint::new(
1261                format!("focus-{}", Utc::now().timestamp()),
1262                item.topic,
1263                item.keywords,
1264                item.entities,
1265                item.core_question,
1266                0,
1267            );
1268            focus.importance = item.importance.clamp(0.0, 1.0);
1269            if !item.is_current {
1270                focus.status = FocusStatus::Suspended;
1271            }
1272            focus
1273        })
1274        .collect();
1275
1276    // Parse conversation patterns (reuse existing logic)
1277    let conversation_patterns = parsed
1278        .conversation_patterns
1279        .into_iter()
1280        .filter_map(|item| {
1281            let pattern_type = match item.pattern_type.to_lowercase().as_str() {
1282                "reference" => PatternType::Reference,
1283                "code" => PatternType::Code,
1284                _ => return None,
1285            };
1286
1287            if item.pattern.trim().is_empty() {
1288                return None;
1289            }
1290
1291            let mut pattern = ConversationPattern::new(
1292                pattern_type,
1293                item.pattern,
1294                PatternSource::UserConversation {
1295                    example: String::new(),
1296                },
1297            );
1298
1299            pattern.confidence = if item.confidence > 0.0 {
1300                item.confidence.clamp(0.0, 1.0)
1301            } else {
1302                0.5
1303            };
1304
1305            Some(pattern)
1306        })
1307        .collect();
1308
1309    // Parse focus keywords
1310    let focus_keywords = ExtractedKeywords {
1311        transition: parsed.focus_keywords.transition,
1312        question: parsed.focus_keywords.question,
1313        task: parsed.focus_keywords.task,
1314        tech: parsed.focus_keywords.tech,
1315    };
1316
1317    Ok(UnifiedExtractionResult {
1318        memories: deduplicate_entries(entries),
1319        focus_points,
1320        conversation_patterns,
1321        focus_keywords,
1322        focus_decision: None, // Not populated in basic extraction
1323    })
1324}
1325
1326/// Parse unified extraction response with focus decision from AI.
1327fn parse_unified_response_with_focus(
1328    json_text: &str,
1329    session_id: Option<&str>,
1330    project_path: Option<&str>,
1331) -> Result<UnifiedExtractionResult> {
1332    let cleaned = json_text
1333        .trim()
1334        .trim_start_matches("```json")
1335        .trim_start_matches("```")
1336        .trim_end_matches("```")
1337        .trim();
1338
1339    #[derive(Deserialize)]
1340    struct UnifiedResponseWithFocus {
1341        #[serde(default)]
1342        focus_decision: Option<FocusDecisionItem>,
1343        #[serde(default)]
1344        memories: Vec<MemoryItem>,
1345        #[serde(default)]
1346        focus_keywords: FocusKeywordsItem,
1347    }
1348
1349    #[derive(Deserialize)]
1350    struct FocusDecisionItem {
1351        #[serde(default)]
1352        selected_focus_id: Option<String>,
1353        #[serde(default)]
1354        need_new_focus: bool,
1355        #[serde(default)]
1356        new_focus_topic: Option<String>,
1357        #[serde(default)]
1358        new_core_question: Option<String>,
1359        #[serde(default)]
1360        confidence: f32,
1361        #[serde(default)]
1362        focus_type: String,
1363        #[serde(default)]
1364        is_topic_switch: bool,
1365        #[serde(default)]
1366        previous_focus_id: Option<String>,
1367        #[serde(default)]
1368        focus_keywords: Vec<String>,
1369        #[serde(default)]
1370        related_entities: Vec<String>,
1371        #[serde(default)]
1372        reasoning: String,
1373    }
1374
1375    #[derive(Deserialize, Default)]
1376    struct FocusKeywordsItem {
1377        #[serde(default)]
1378        transition: Vec<String>,
1379        #[serde(default)]
1380        question: Vec<String>,
1381        #[serde(default)]
1382        task: Vec<String>,
1383        #[serde(default)]
1384        tech: Vec<String>,
1385    }
1386
1387    #[derive(Deserialize)]
1388    struct MemoryItem {
1389        category: String,
1390        content: String,
1391        #[serde(default)]
1392        importance: f64,
1393        #[serde(default)]
1394        keywords: Vec<String>,
1395        #[serde(default)]
1396        tags: Vec<String>,
1397    }
1398
1399    let parsed: UnifiedResponseWithFocus = serde_json::from_str(cleaned)?;
1400
1401    // Parse focus decision
1402    let focus_decision = parsed.focus_decision.map(|item| {
1403        use super::unified_extraction::{FocusDecision, FocusType};
1404
1405        let focus_type = match item.focus_type.to_lowercase().as_str() {
1406            "problem_solving" => FocusType::ProblemSolving,
1407            "task_execution" => FocusType::TaskExecution,
1408            "knowledge_exploration" => FocusType::KnowledgeExploration,
1409            "decision_making" => FocusType::DecisionMaking,
1410            "code_optimization" => FocusType::CodeOptimization,
1411            _ => FocusType::General,
1412        };
1413
1414        FocusDecision {
1415            selected_focus_id: item.selected_focus_id,
1416            need_new_focus: item.need_new_focus,
1417            new_focus_topic: item.new_focus_topic,
1418            new_core_question: item.new_core_question,
1419            confidence: item.confidence.clamp(0.0, 1.0),
1420            focus_type,
1421            is_topic_switch: item.is_topic_switch,
1422            previous_focus_id: item.previous_focus_id,
1423            focus_keywords: item.focus_keywords,
1424            related_entities: item.related_entities,
1425            reasoning: item.reasoning,
1426        }
1427    });
1428
1429    // Parse memories (reuse existing logic)
1430    let entries = parsed
1431        .memories
1432        .into_iter()
1433        .filter_map(|item| {
1434            let category = match item.category.to_lowercase().as_str() {
1435                "decision" => MemoryCategory::Decision,
1436                "preference" => MemoryCategory::Preference,
1437                "solution" => MemoryCategory::Solution,
1438                "finding" => MemoryCategory::Finding,
1439                "technical" => MemoryCategory::Technical,
1440                "structure" => MemoryCategory::Structure,
1441                _ => return None,
1442            };
1443
1444            if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
1445                return None;
1446            }
1447
1448            let mut entry = MemoryEntry::new(
1449                category,
1450                item.content,
1451                session_id.map(|s| s.to_string()),
1452                project_path.map(|p| p.to_string()),
1453            );
1454            if item.importance > 0.0 {
1455                entry.importance = item.importance.clamp(0.0, 100.0);
1456            }
1457            if !item.keywords.is_empty() {
1458                entry.tags.extend(item.keywords);
1459            }
1460            if !item.tags.is_empty() {
1461                entry.tags.extend(item.tags);
1462            }
1463            entry.tags.dedup();
1464
1465            Some(entry)
1466        })
1467        .collect();
1468
1469    // Parse focus keywords
1470    let focus_keywords = ExtractedKeywords {
1471        transition: parsed.focus_keywords.transition,
1472        question: parsed.focus_keywords.question,
1473        task: parsed.focus_keywords.task,
1474        tech: parsed.focus_keywords.tech,
1475    };
1476
1477    Ok(UnifiedExtractionResult {
1478        memories: deduplicate_entries(entries),
1479        focus_points: Vec::new(), // Not used in focus selection mode
1480        conversation_patterns: Vec::new(), // Not used in focus selection mode
1481        focus_keywords,
1482        focus_decision,
1483    })
1484}
1485
1486/// Smart unified extraction: AI-first with graceful fallback.
1487///
1488/// Uses UnifiedExtractor for single API call extraction.
1489pub async fn detect_unified_smart(
1490    text: &str,
1491    session_id: Option<&str>,
1492    project_path: Option<&str>,
1493    extractor: Option<&UnifiedExtractor>,
1494) -> UnifiedExtractionResult {
1495    let mode = AiDetectionMode::from_env();
1496    let text_len = text.len();
1497
1498    // Only use AI for text > 200 chars
1499    let should_try_ai = mode != AiDetectionMode::Never && extractor.is_some() && text_len > 200;
1500
1501    if should_try_ai && let Some(ex) = extractor {
1502        if let Ok(result) = ex.extract_unified(text, session_id, project_path).await {
1503            return result;
1504        }
1505        // AI failed - skip detection for this turn
1506        log::warn!("Unified extraction failed, skipping detection for this turn");
1507    }
1508
1509    // Return empty result for short texts or failed AI
1510    UnifiedExtractionResult::default()
1511}
1512
1513/// Check if a word is a noise word (should be filtered from tags).
1514fn is_noise_word(word: &str) -> bool {
1515    let noise_words = [
1516        // English noise words
1517        "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
1518        "have", "has", "had", "do", "does", "did", "will", "would", "could",
1519        "should", "may", "might", "must", "shall", "can", "need", "dare",
1520        "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
1521        "from", "as", "into", "through", "during", "before", "after",
1522        "above", "below", "between", "under", "again", "further", "then",
1523        "once", "here", "there", "when", "where", "why", "how", "all", "each",
1524        "few", "more", "most", "other", "some", "such", "no", "nor", "not",
1525        "only", "own", "same", "so", "than", "too", "very", "just", "and",
1526        "but", "if", "or", "because", "until", "while", "although", "though",
1527        // Chinese noise words
1528        "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", "都",
1529        "一", "个", "也", "很", "要", "这", "那", "他", "她", "它", "们",
1530        "为", "与", "以", "及", "或", "但", "如", "而", "因", "所", "能",
1531        "会", "可", "把", "被", "让", "给", "从", "到", "对", "向", "比",
1532        "等", "时", "地", "得", "着", "过", "来", "去", "上", "下", "里",
1533        "中", "外", "前", "后", "左", "右", "好", "多", "少", "大", "小",
1534        "高", "低", "长", "短", "快", "慢", "新", "旧", "早", "晚", "真",
1535        "假", "全", "每", "各", "哪", "什么", "怎么", "怎样", "如何",
1536        "为什么", "因为", "所以", "如果", "虽然", "但是", "然后", "接着",
1537        "最后", "开始", "结束", "一直", "总是", "有时", "常常", "经常",
1538    ];
1539    
1540    noise_words.contains(&word.to_lowercase().as_str())
1541}
1542
1543#[cfg(test)]
1544mod tests {
1545    use super::*;
1546
1547    // =========================================================================
1548    // Conversation Pattern Parsing Tests
1549    // =========================================================================
1550
1551    #[test]
1552    fn test_parse_memory_response_with_patterns() {
1553        let json = r#"{
1554            "memories": [],
1555            "focus_points": [],
1556            "conversation_patterns": [
1557                {
1558                    "pattern_type": "reference",
1559                    "pattern": "正如我所说",
1560                    "confidence": 0.8
1561                },
1562                {
1563                    "pattern_type": "code",
1564                    "pattern": "fn ",
1565                    "confidence": 0.6
1566                }
1567            ]
1568        }"#;
1569
1570        let result = parse_memory_response(json, None, None).unwrap();
1571        assert_eq!(result.memories.len(), 0);
1572        assert_eq!(result.focus_points.len(), 0);
1573        assert_eq!(result.conversation_patterns.len(), 2);
1574
1575        // Check first pattern (reference)
1576        let ref_pattern = &result.conversation_patterns[0];
1577        assert_eq!(ref_pattern.pattern_type, PatternType::Reference);
1578        assert_eq!(ref_pattern.pattern, "正如我所说");
1579        assert_eq!(ref_pattern.confidence, 0.8);
1580        assert!(ref_pattern.is_active);
1581
1582        // Check second pattern (code)
1583        let code_pattern = &result.conversation_patterns[1];
1584        assert_eq!(code_pattern.pattern_type, PatternType::Code);
1585        assert_eq!(code_pattern.pattern, "fn ");
1586        assert_eq!(code_pattern.confidence, 0.6);
1587    }
1588
1589    #[test]
1590    fn test_parse_memory_response_patterns_default_confidence() {
1591        let json = r#"{
1592            "memories": [],
1593            "focus_points": [],
1594            "conversation_patterns": [
1595                {
1596                    "pattern_type": "reference",
1597                    "pattern": "as mentioned"
1598                }
1599            ]
1600        }"#;
1601
1602        let result = parse_memory_response(json, None, None).unwrap();
1603        assert_eq!(result.conversation_patterns.len(), 1);
1604
1605        // Default confidence should be 0.5
1606        let pattern = &result.conversation_patterns[0];
1607        assert_eq!(pattern.confidence, 0.5);
1608    }
1609
1610    #[test]
1611    fn test_parse_memory_response_patterns_empty() {
1612        let json = r#"{
1613            "memories": [],
1614            "focus_points": []
1615        }"#;
1616
1617        let result = parse_memory_response(json, None, None).unwrap();
1618        assert_eq!(result.conversation_patterns.len(), 0);
1619    }
1620
1621    #[test]
1622    fn test_parse_memory_response_patterns_invalid_type() {
1623        let json = r#"{
1624            "memories": [],
1625            "focus_points": [],
1626            "conversation_patterns": [
1627                {
1628                    "pattern_type": "invalid_type",
1629                    "pattern": "test",
1630                    "confidence": 0.5
1631                },
1632                {
1633                    "pattern_type": "reference",
1634                    "pattern": "valid pattern",
1635                    "confidence": 0.7
1636                }
1637            ]
1638        }"#;
1639
1640        let result = parse_memory_response(json, None, None).unwrap();
1641        // Invalid pattern type should be skipped
1642        assert_eq!(result.conversation_patterns.len(), 1);
1643        assert_eq!(result.conversation_patterns[0].pattern, "valid pattern");
1644    }
1645
1646    #[test]
1647    fn test_parse_memory_response_patterns_empty_string() {
1648        let json = r#"{
1649            "memories": [],
1650            "focus_points": [],
1651            "conversation_patterns": [
1652                {
1653                    "pattern_type": "reference",
1654                    "pattern": "",
1655                    "confidence": 0.5
1656                },
1657                {
1658                    "pattern_type": "code",
1659                    "pattern": "   ",
1660                    "confidence": 0.5
1661                },
1662                {
1663                    "pattern_type": "reference",
1664                    "pattern": "valid",
1665                    "confidence": 0.8
1666                }
1667            ]
1668        }"#;
1669
1670        let result = parse_memory_response(json, None, None).unwrap();
1671        // Empty patterns should be skipped
1672        assert_eq!(result.conversation_patterns.len(), 1);
1673        assert_eq!(result.conversation_patterns[0].pattern, "valid");
1674    }
1675
1676    #[test]
1677    fn test_parse_memory_response_patterns_confidence_clamped() {
1678        let json = r#"{
1679            "memories": [],
1680            "focus_points": [],
1681            "conversation_patterns": [
1682                {
1683                    "pattern_type": "reference",
1684                    "pattern": "test1",
1685                    "confidence": 1.5
1686                },
1687                {
1688                    "pattern_type": "code",
1689                    "pattern": "test2",
1690                    "confidence": -0.3
1691                }
1692            ]
1693        }"#;
1694
1695        let result = parse_memory_response(json, None, None).unwrap();
1696        assert_eq!(result.conversation_patterns.len(), 2);
1697
1698        // Confidence should be clamped to [0.0, 1.0]
1699        assert_eq!(result.conversation_patterns[0].confidence, 1.0);
1700        // Negative confidence should use default 0.5 (since <= 0.0 triggers default)
1701        assert_eq!(result.conversation_patterns[1].confidence, 0.5);
1702    }
1703
1704    #[test]
1705    fn test_parse_memory_response_patterns_source() {
1706        let json = r#"{
1707            "memories": [],
1708            "focus_points": [],
1709            "conversation_patterns": [
1710                {
1711                    "pattern_type": "reference",
1712                    "pattern": "PR #123",
1713                    "confidence": 0.9
1714                }
1715            ]
1716        }"#;
1717
1718        let result = parse_memory_response(json, None, None).unwrap();
1719        let pattern = &result.conversation_patterns[0];
1720
1721        // Source should be UserConversation
1722        match &pattern.source {
1723            PatternSource::UserConversation { example } => {
1724                assert_eq!(example, "");
1725            }
1726            _ => panic!("Expected UserConversation source"),
1727        }
1728    }
1729
1730    #[test]
1731    fn test_parse_memory_response_backward_compatible() {
1732        // Old format without conversation_patterns should still work
1733        let json = r#"{
1734            "memories": [
1735                {
1736                    "category": "decision",
1737                    "content": "使用 Rust 作为主要语言",
1738                    "importance": 80,
1739                    "keywords": ["Rust"],
1740                    "tags": ["backend"]
1741                }
1742            ],
1743            "focus_points": [
1744                {
1745                    "topic": "API设计",
1746                    "keywords": ["API", "REST"],
1747                    "importance": 0.8
1748                }
1749            ]
1750        }"#;
1751
1752        let result = parse_memory_response(json, None, None).unwrap();
1753        assert_eq!(result.memories.len(), 1);
1754        assert_eq!(result.focus_points.len(), 1);
1755        assert_eq!(result.conversation_patterns.len(), 0);
1756
1757        // Verify memory content
1758        assert_eq!(result.memories[0].category, MemoryCategory::Decision);
1759        assert!(result.memories[0].content.contains("Rust"));
1760    }
1761
1762    #[test]
1763    fn test_parse_memory_response_with_code_block_markers() {
1764        // JSON wrapped in code block markers should still parse
1765        let json = r#"```json
1766{
1767    "memories": [],
1768    "focus_points": [],
1769    "conversation_patterns": [
1770        {
1771            "pattern_type": "code",
1772            "pattern": "```",
1773            "confidence": 0.7
1774        }
1775    ]
1776}
1777```"#;
1778
1779        let result = parse_memory_response(json, None, None).unwrap();
1780        assert_eq!(result.conversation_patterns.len(), 1);
1781        assert_eq!(result.conversation_patterns[0].pattern, "```");
1782    }
1783
1784    // =========================================================================
1785    // ExtractionResult Tests
1786    // =========================================================================
1787
1788    #[test]
1789    fn test_extraction_result_has_patterns_field() {
1790        let result = ExtractionResult {
1791            memories: vec![],
1792            focus_points: vec![],
1793            conversation_patterns: vec![
1794                ConversationPattern::new(
1795                    PatternType::Reference,
1796                    "test pattern",
1797                    PatternSource::Manual,
1798                ),
1799            ],
1800        };
1801
1802        assert_eq!(result.conversation_patterns.len(), 1);
1803    }
1804
1805    #[test]
1806    fn test_extraction_result_clone() {
1807        let result = ExtractionResult {
1808            memories: vec![],
1809            focus_points: vec![],
1810            conversation_patterns: vec![
1811                ConversationPattern::new(
1812                    PatternType::Code,
1813                    "fn test()",
1814                    PatternSource::Manual,
1815                ),
1816            ],
1817        };
1818
1819        let cloned = result.clone();
1820        assert_eq!(cloned.conversation_patterns.len(), 1);
1821        assert_eq!(cloned.conversation_patterns[0].pattern, "fn test()");
1822    }
1823
1824    #[test]
1825    fn test_extraction_result_empty_patterns() {
1826        // Test ExtractionResult with empty patterns
1827        let result = ExtractionResult {
1828            memories: vec![],
1829            focus_points: vec![],
1830            conversation_patterns: vec![],
1831        };
1832
1833        assert!(result.conversation_patterns.is_empty());
1834        assert!(result.memories.is_empty());
1835        assert!(result.focus_points.is_empty());
1836    }
1837
1838    // =========================================================================
1839    // AI Prompt Validation Tests
1840    // =========================================================================
1841
1842    #[test]
1843    fn test_memory_extract_prompt_contains_patterns_guidance() {
1844        // Verify the prompt includes conversation pattern extraction guidance
1845        assert!(
1846            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("对话模式提取"),
1847            "Prompt should contain pattern extraction guidance"
1848        );
1849        assert!(
1850            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("reference"),
1851            "Prompt should mention reference pattern type"
1852        );
1853        assert!(
1854            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("code"),
1855            "Prompt should mention code pattern type"
1856        );
1857    }
1858
1859    #[test]
1860    fn test_memory_extract_prompt_contains_trigger_condition() {
1861        // Verify the prompt mentions >500 chars trigger condition
1862        assert!(
1863            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("500"),
1864            "Prompt should mention 500 chars trigger condition"
1865        );
1866        assert!(
1867            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("> 500") || MEMORY_EXTRACT_SYSTEM_PROMPT.contains("超过500"),
1868            "Prompt should specify > 500 chars condition"
1869        );
1870    }
1871
1872    #[test]
1873    fn test_memory_extract_prompt_contains_output_format() {
1874        // Verify the prompt shows correct JSON output format with patterns
1875        assert!(
1876            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("conversation_patterns"),
1877            "Prompt should show conversation_patterns in output format"
1878        );
1879        assert!(
1880            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("pattern_type"),
1881            "Prompt should show pattern_type field"
1882        );
1883        assert!(
1884            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("confidence"),
1885            "Prompt should show confidence field"
1886        );
1887    }
1888
1889    // =========================================================================
1890    // Integration Tests - Combined Extraction
1891    // =========================================================================
1892
1893    #[test]
1894    fn test_parse_memory_response_full_integration() {
1895        // Test complete extraction with memories, focus_points, and patterns together
1896        let json = r#"{
1897            "memories": [
1898                {
1899                    "category": "decision",
1900                    "content": "使用 Rust 作为主要语言。**Why:** 性能要求",
1901                    "importance": 85,
1902                    "keywords": ["Rust"],
1903                    "tags": ["backend"]
1904                }
1905            ],
1906            "focus_points": [
1907                {
1908                    "topic": "API设计",
1909                    "keywords": ["API", "REST"],
1910                    "entities": ["User", "Order"],
1911                    "importance": 0.8
1912                }
1913            ],
1914            "conversation_patterns": [
1915                {
1916                    "pattern_type": "reference",
1917                    "pattern": "正如我所说",
1918                    "confidence": 0.9
1919                },
1920                {
1921                    "pattern_type": "code",
1922                    "pattern": "fn ",
1923                    "confidence": 0.7
1924                }
1925            ]
1926        }"#;
1927
1928        let result = parse_memory_response(json, Some("session-123"), Some("/project/path")).unwrap();
1929
1930        // Verify all three components
1931        assert_eq!(result.memories.len(), 1);
1932        assert_eq!(result.focus_points.len(), 1);
1933        assert_eq!(result.conversation_patterns.len(), 2);
1934
1935        // Check memory
1936        assert_eq!(result.memories[0].category, MemoryCategory::Decision);
1937        assert!(result.memories[0].content.contains("Rust"));
1938
1939        // Check focus point
1940        assert_eq!(result.focus_points[0].topic, "API设计");
1941
1942        // Check patterns
1943        assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
1944        assert_eq!(result.conversation_patterns[1].pattern_type, PatternType::Code);
1945    }
1946
1947    #[test]
1948    fn test_parse_memory_response_mixed_valid_invalid_patterns() {
1949        // Test with mix of valid and invalid patterns
1950        let json = r#"{
1951            "memories": [],
1952            "focus_points": [],
1953            "conversation_patterns": [
1954                {
1955                    "pattern_type": "reference",
1956                    "pattern": "valid pattern 1",
1957                    "confidence": 0.8
1958                },
1959                {
1960                    "pattern_type": "unknown_type",
1961                    "pattern": "should be skipped",
1962                    "confidence": 0.5
1963                },
1964                {
1965                    "pattern_type": "code",
1966                    "pattern": "fn valid",
1967                    "confidence": 0.6
1968                },
1969                {
1970                    "pattern_type": "reference",
1971                    "pattern": "",
1972                    "confidence": 0.9
1973                }
1974            ]
1975        }"#;
1976
1977        let result = parse_memory_response(json, None, None).unwrap();
1978
1979        // Should only have 2 valid patterns
1980        assert_eq!(result.conversation_patterns.len(), 2);
1981        assert_eq!(result.conversation_patterns[0].pattern, "valid pattern 1");
1982        assert_eq!(result.conversation_patterns[1].pattern, "fn valid");
1983    }
1984
1985    #[test]
1986    fn test_parse_memory_response_patterns_with_session_and_project() {
1987        // Verify session_id and project_path are passed through correctly
1988        // (patterns don't use them, but the function accepts them)
1989        let json = r#"{
1990            "memories": [
1991                {
1992                    "category": "technical",
1993                    "content": "Using PostgreSQL database",
1994                    "importance": 70,
1995                    "keywords": ["PostgreSQL"],
1996                    "tags": ["database"]
1997                }
1998            ],
1999            "focus_points": [],
2000            "conversation_patterns": [
2001                {
2002                    "pattern_type": "reference",
2003                    "pattern": "as mentioned",
2004                    "confidence": 0.7
2005                }
2006            ]
2007        }"#;
2008
2009        let result = parse_memory_response(json, Some("test-session"), Some("/test/project")).unwrap();
2010
2011        // Memory should have source_session and project_path
2012        assert_eq!(result.memories[0].source_session, Some("test-session".to_string()));
2013        assert_eq!(result.memories[0].project_path, Some("/test/project".to_string()));
2014
2015        // Pattern should be parsed correctly
2016        assert_eq!(result.conversation_patterns.len(), 1);
2017    }
2018
2019    #[test]
2020    fn test_parse_memory_response_all_pattern_types() {
2021        // Test both supported pattern types
2022        let json = r#"{
2023            "memories": [],
2024            "focus_points": [],
2025            "conversation_patterns": [
2026                {
2027                    "pattern_type": "reference",
2028                    "pattern": "previously discussed",
2029                    "confidence": 0.8
2030                },
2031                {
2032                    "pattern_type": "Reference",
2033                    "pattern": "case insensitive",
2034                    "confidence": 0.7
2035                },
2036                {
2037                    "pattern_type": "CODE",
2038                    "pattern": "function ",
2039                    "confidence": 0.6
2040                },
2041                {
2042                    "pattern_type": "code",
2043                    "pattern": "class ",
2044                    "confidence": 0.5
2045                }
2046            ]
2047        }"#;
2048
2049        let result = parse_memory_response(json, None, None).unwrap();
2050
2051        // All should be parsed (case-insensitive pattern_type)
2052        assert_eq!(result.conversation_patterns.len(), 4);
2053
2054        // Check types
2055        assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
2056        assert_eq!(result.conversation_patterns[1].pattern_type, PatternType::Reference);
2057        assert_eq!(result.conversation_patterns[2].pattern_type, PatternType::Code);
2058        assert_eq!(result.conversation_patterns[3].pattern_type, PatternType::Code);
2059    }
2060
2061    #[test]
2062    fn test_extraction_result_debug_trait() {
2063        // Test that ExtractionResult implements Debug
2064        let result = ExtractionResult {
2065            memories: vec![],
2066            focus_points: vec![],
2067            conversation_patterns: vec![
2068                ConversationPattern::new(
2069                    PatternType::Reference,
2070                    "test",
2071                    PatternSource::Manual,
2072                ),
2073            ],
2074        };
2075
2076        let debug_str = format!("{:?}", result);
2077        assert!(debug_str.contains("ExtractionResult"));
2078        assert!(debug_str.contains("conversation_patterns"));
2079    }
2080
2081    // =========================================================================
2082    // Unified Extraction Tests
2083    // =========================================================================
2084
2085    #[test]
2086    fn test_parse_unified_response_full() {
2087        let json = r#"{
2088            "memories": [
2089                {
2090                    "category": "decision",
2091                    "content": "使用 Rust 作为主要语言",
2092                    "importance": 85,
2093                    "keywords": ["Rust"],
2094                    "tags": ["backend"]
2095                }
2096            ],
2097            "focus_points": [
2098                {
2099                    "topic": "API设计",
2100                    "keywords": ["API", "REST"],
2101                    "entities": ["User", "Order"],
2102                    "core_question": "如何优化 API?",
2103                    "importance": 0.8,
2104                    "is_current": true
2105                }
2106            ],
2107            "conversation_patterns": [
2108                {
2109                    "pattern_type": "reference",
2110                    "pattern": "正如我所说",
2111                    "confidence": 0.8
2112                }
2113            ],
2114            "focus_keywords": {
2115                "transition": ["换个话题"],
2116                "question": ["怎么"],
2117                "task": ["帮我"],
2118                "tech": ["rust"]
2119            }
2120        }"#;
2121
2122        let result = parse_unified_response(json, Some("session-123"), Some("/project")).unwrap();
2123
2124        // Verify all components
2125        assert_eq!(result.memories.len(), 1);
2126        assert_eq!(result.memories[0].category, MemoryCategory::Decision);
2127        assert!(result.memories[0].content.contains("Rust"));
2128
2129        assert_eq!(result.focus_points.len(), 1);
2130        assert_eq!(result.focus_points[0].topic, "API设计");
2131
2132        assert_eq!(result.conversation_patterns.len(), 1);
2133        assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
2134
2135        assert!(!result.focus_keywords.is_empty());
2136        assert_eq!(result.focus_keywords.transition.len(), 1);
2137        assert_eq!(result.focus_keywords.question.len(), 1);
2138        assert_eq!(result.focus_keywords.task.len(), 1);
2139        assert_eq!(result.focus_keywords.tech.len(), 1);
2140    }
2141
2142    #[test]
2143    fn test_parse_unified_response_empty() {
2144        let json = r#"{
2145            "memories": [],
2146            "focus_points": [],
2147            "conversation_patterns": [],
2148            "focus_keywords": {
2149                "transition": [],
2150                "question": [],
2151                "task": [],
2152                "tech": []
2153            }
2154        }"#;
2155
2156        let result = parse_unified_response(json, None, None).unwrap();
2157
2158        assert!(result.memories.is_empty());
2159        assert!(result.focus_points.is_empty());
2160        assert!(result.conversation_patterns.is_empty());
2161        assert!(result.focus_keywords.is_empty());
2162    }
2163
2164    #[test]
2165    fn test_parse_unified_response_partial() {
2166        // Test with only memories (no focus_keywords)
2167        let json = r#"{
2168            "memories": [
2169                {
2170                    "category": "technical",
2171                    "content": "使用 PostgreSQL 作为主数据库存储",
2172                    "importance": 70
2173                }
2174            ]
2175        }"#;
2176
2177        let result = parse_unified_response(json, None, None).unwrap();
2178
2179        assert_eq!(result.memories.len(), 1);
2180        assert!(result.focus_points.is_empty());
2181        assert!(result.conversation_patterns.is_empty());
2182        assert!(result.focus_keywords.is_empty());
2183    }
2184
2185    #[test]
2186    fn test_parse_unified_response_with_code_block() {
2187        let json = r#"```json
2188{
2189    "memories": [],
2190    "focus_points": [],
2191    "conversation_patterns": [],
2192    "focus_keywords": {
2193        "transition": ["switching"],
2194        "question": [],
2195        "task": [],
2196        "tech": []
2197    }
2198}
2199```"#;
2200
2201        let result = parse_unified_response(json, None, None).unwrap();
2202
2203        assert_eq!(result.focus_keywords.transition.len(), 1);
2204        assert_eq!(result.focus_keywords.transition[0], "switching");
2205    }
2206
2207    #[test]
2208    fn test_unified_extraction_result_default() {
2209        let result = UnifiedExtractionResult::default();
2210        assert!(result.memories.is_empty());
2211        assert!(result.focus_points.is_empty());
2212        assert!(result.conversation_patterns.is_empty());
2213        assert!(result.focus_keywords.is_empty());
2214    }
2215
2216    #[test]
2217    fn test_unified_extraction_prompt_contains_all_sections() {
2218        // Verify the unified prompt contains all extraction sections
2219        assert!(UNIFIED_EXTRACTION_PROMPT.contains("长期记忆"));
2220        assert!(UNIFIED_EXTRACTION_PROMPT.contains("当前焦点"));
2221        assert!(UNIFIED_EXTRACTION_PROMPT.contains("对话模式"));
2222        assert!(UNIFIED_EXTRACTION_PROMPT.contains("焦点关键词"));
2223    }
2224
2225    #[test]
2226    fn test_unified_extraction_prompt_contains_keyword_categories() {
2227        assert!(UNIFIED_EXTRACTION_PROMPT.contains("transition"));
2228        assert!(UNIFIED_EXTRACTION_PROMPT.contains("question"));
2229        assert!(UNIFIED_EXTRACTION_PROMPT.contains("task"));
2230        assert!(UNIFIED_EXTRACTION_PROMPT.contains("tech"));
2231    }
2232
2233    #[test]
2234    fn test_parse_unified_response_keywords_merged() {
2235        let json = r#"{
2236            "memories": [],
2237            "focus_points": [],
2238            "conversation_patterns": [],
2239            "focus_keywords": {
2240                "transition": ["换个话题", "switching", "however"],
2241                "question": ["怎么", "how", "为什么"],
2242                "task": ["帮我", "implement", "创建"],
2243                "tech": ["rust", "数据库", "api"]
2244            }
2245        }"#;
2246
2247        let result = parse_unified_response(json, None, None).unwrap();
2248
2249        assert_eq!(result.focus_keywords.transition.len(), 3);
2250        assert_eq!(result.focus_keywords.question.len(), 3);
2251        assert_eq!(result.focus_keywords.task.len(), 3);
2252        assert_eq!(result.focus_keywords.tech.len(), 3);
2253        assert_eq!(result.focus_keywords.total_count(), 12);
2254    }
2255}