matrixcode_core/memory/
extractor.rs

1//! Memory extraction: AI-based and rule-based detection.
2
3use crate::truncate::truncate_chars;
4use anyhow::Result;
5use serde::Deserialize;
6
7use super::config::*;
8use super::entry::{MemoryCategory, MemoryEntry};
9use super::manager::AutoMemory;
10use super::conversation_pattern::{ConversationPattern, PatternType, PatternSource};
11use super::unified_extraction::{UnifiedExtractionResult, ExtractedKeywords};
12use crate::compress::FocusPoint;
13
14// ============================================================================
15// Memory Extractor Trait
16// ============================================================================
17
18/// Trait for memory extraction implementations.
19#[async_trait::async_trait]
20pub trait MemoryExtractor: Send + Sync {
21    /// Extract memories and focus points from conversation text using AI.
22    async fn extract(
23        &self,
24        text: &str,
25        session_id: Option<&str>,
26        project_path: Option<&str>,
27    ) -> Result<ExtractionResult>;
28
29    /// Get the model name used for extraction.
30    fn model_name(&self) -> &str;
31}
32
33/// Result of memory extraction (memories + focus points + conversation patterns).
34#[derive(Debug, Clone)]
35pub struct ExtractionResult {
36    pub memories: Vec<MemoryEntry>,
37    pub focus_points: Vec<FocusPoint>,
38    /// Extracted conversation patterns (reference and code patterns).
39    pub conversation_patterns: Vec<ConversationPattern>,
40}
41
42/// AI-based memory extractor using a fast/cheap model.
43pub struct AiMemoryExtractor {
44    provider: Box<dyn crate::providers::Provider>,
45    model: String,
46}
47
48impl AiMemoryExtractor {
49    /// Create a new AI memory extractor.
50    pub fn new(provider: Box<dyn crate::providers::Provider>, model: String) -> Self {
51        Self { provider, model }
52    }
53
54    /// Create a minimal extractor (for background tasks, uses simplified prompt).
55    /// This is more efficient for non-blocking background extraction.
56    pub fn new_minimal(model: String) -> Self {
57        // Create a minimal provider that uses the global config
58        // This is for background tasks, so we use a simplified approach
59        Self {
60            provider: crate::create_minimal_provider(&model),
61            model,
62        }
63    }
64}
65
66const MEMORY_EXTRACT_SYSTEM_PROMPT: &str = r#"你是记忆提取助手。从对话中提取值得长期记忆的关键信息。
67
68# 记忆类型
69
70<types>
71<type>
72    <name>decision</name>
73    <description>项目或技术选型的决定</description>
74    <when_to_save>用户明确做出技术决策时</when_to_save>
75    <body_structure>先写决策内容，然后 **Why:** 决策原因，**Context:** 适用场景</body_structure>
76</type>
77<type>
78    <name>preference</name>
79    <description>用户习惯或偏好</description>
80    <when_to_save>用户表达"我喜欢/习惯/偏好"时</when_to_save>
81    <body_structure>先写偏好内容，然后 **Why:** 偏好原因（如有）</body_structure>
82</type>
83<type>
84    <name>solution</name>
85    <description>解决问题的具体方法</description>
86    <when_to_save>问题成功解决且方法可复用时</when_to_save>
87    <body_structure>先写解决方案，然后 **Problem:** 解决的问题，**Key:** 关键步骤</body_structure>
88</type>
89<type>
90    <name>finding</name>
91    <description>重要发现或信息</description>
92    <when_to_save>发现非显而易见的信息时</when_to_save>
93</type>
94<type>
95    <name>technical</name>
96    <description>技术栈或框架信息</description>
97    <when_to_save>确认项目使用的技术时</when_to_save>
98</type>
99<type>
100    <name>structure</name>
101    <description>项目结构信息</description>
102    <when_to_save>发现关键入口或核心文件时</when_to_save>
103</type>
104</types>
105
106# 不要保存什么到记忆中
107
108- 代码路径、文件名、目录结构 — 可从项目实时获取
109- Git 历史、最近更改 — git log/blame 是权威来源
110- 临时状态：进行中的任务、当前对话上下文
111- 已在 CLAUDE.md/MATRIX.md 中记录的内容
112- 错误信息和调试细节 — 问题解决后无需保留
113
114这些排除规则即使当用户要求保存时也适用。
115如果他们要求保存临时信息，问："有什么 surprising 或 non-obvious 的部分？"
116
117# 对话模式提取
118
119当对话文本较长时（超过500字符），还要提取对话中使用的模式：
120
1211. **引用模式 (reference)**：用户如何引用之前的内容
122   - 示例："正如前面所说"、"接着刚才的话题"、"as mentioned"、"previously"
123
1242. **代码模式 (code)**：对话中涉及的代码风格关键词
125   - 示例：语言关键词（fn, function, class）、代码块标记（```）
126
127模式提取规则：
128- 只提取明确出现的模式，不要推测
129- confidence 范围 0.0-1.0，越常见越低（常见模式置信度低）
130- 只在文本 > 500 字符时提取模式
131
132# 输出格式
133
134严格 JSON：
135{
136  "memories": [
137    {
138      "category": "decision",
139      "content": "采用 PostgreSQL 作为主数据库。**Why:** 性能要求和团队经验",
140      "importance": 85,
141      "keywords": ["PostgreSQL", "数据库", "database"],
142      "tags": ["backend", "storage"]
143    }
144  ],
145  "focus_points": [],
146  "conversation_patterns": [
147    {
148      "pattern_type": "reference",
149      "pattern": "正如我所说",
150      "confidence": 0.8
151    },
152    {
153      "pattern_type": "code",
154      "pattern": "fn ",
155      "confidence": 0.6
156    }
157  ]
158}
159
160关键词提取：3-5 个核心关键词（技术名词、项目名、关键概念）
161标签提取：1-3 个分类标签（backend、frontend、config、auth 等）
162
163只返回 JSON，不要其他解释。"#;
164
165#[async_trait::async_trait]
166impl MemoryExtractor for AiMemoryExtractor {
167    async fn extract(
168        &self,
169        text: &str,
170        session_id: Option<&str>,
171        project_path: Option<&str>,
172    ) -> Result<ExtractionResult> {
173        use crate::providers::{ChatRequest, Message, MessageContent, Role};
174
175        // Safely truncate to ~4000 chars respecting UTF-8 boundaries
176        let truncated = truncate_chars(text, 4000);
177
178        let request = ChatRequest {
179            messages: vec![Message {
180                role: Role::User,
181                content: MessageContent::Text(format!(
182                    "请从以下对话中提取值得记忆的关键信息和当前聚焦点：\n\n{}",
183                    truncated
184                )),
185            }],
186            tools: vec![],
187            system: Some(MEMORY_EXTRACT_SYSTEM_PROMPT.to_string()),
188            think: false,
189            max_tokens: 512,
190            server_tools: vec![],
191            enable_caching: false,
192        };
193
194        let response = self.provider.chat(request).await?;
195
196        let response_text = response
197            .content
198            .iter()
199            .filter_map(|b| {
200                if let crate::providers::ContentBlock::Text { text } = b {
201                    Some(text.clone())
202                } else {
203                    None
204                }
205            })
206            .collect::<Vec<_>>()
207            .join("");
208
209        parse_memory_response(&response_text, session_id, project_path)
210    }
211
212    fn model_name(&self) -> &str {
213        &self.model
214    }
215}
216
217fn parse_memory_response(
218    json_text: &str,
219    session_id: Option<&str>,
220    project_path: Option<&str>,
221) -> Result<ExtractionResult> {
222    let cleaned = json_text
223        .trim()
224        .trim_start_matches("```json")
225        .trim_start_matches("```")
226        .trim_end_matches("```")
227        .trim();
228
229    #[derive(Deserialize)]
230    struct MemoryResponse {
231        memories: Vec<MemoryItem>,
232        #[serde(default)]
233        focus_points: Vec<FocusPointItem>,
234        #[serde(default)]
235        conversation_patterns: Vec<ConversationPatternItem>,
236    }
237
238    #[derive(Deserialize)]
239    struct MemoryItem {
240        category: String,
241        content: String,
242        #[serde(default)]
243        importance: f64,
244        #[serde(default)]
245        keywords: Vec<String>,
246        #[serde(default)]
247        tags: Vec<String>,
248    }
249
250    #[derive(Deserialize)]
251    struct FocusPointItem {
252        topic: String,
253        #[serde(default)]
254        keywords: Vec<String>,
255        #[serde(default)]
256        entities: Vec<String>,
257        #[serde(default)]
258        core_question: Option<String>,
259        #[serde(default = "default_importance")]
260        importance: f32,
261        #[serde(default = "default_is_current")]
262        is_current: bool,
263    }
264
265    #[derive(Deserialize)]
266    struct ConversationPatternItem {
267        pattern_type: String,
268        pattern: String,
269        #[serde(default)]
270        confidence: f32,
271    }
272
273    fn default_importance() -> f32 { 0.7 }
274    fn default_is_current() -> bool { true }
275
276    let parsed: MemoryResponse = serde_json::from_str(cleaned)?;
277
278    // Parse memories
279    let entries = parsed
280        .memories
281        .into_iter()
282        .filter_map(|item| {
283            let category = match item.category.to_lowercase().as_str() {
284                "decision" => MemoryCategory::Decision,
285                "preference" => MemoryCategory::Preference,
286                "solution" => MemoryCategory::Solution,
287                "finding" => MemoryCategory::Finding,
288                "technical" => MemoryCategory::Technical,
289                "structure" => MemoryCategory::Structure,
290                _ => return None,
291            };
292
293            if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
294                return None;
295            }
296
297            let mut entry = MemoryEntry::new(
298                category,
299                item.content,
300                session_id.map(|s| s.to_string()),
301                project_path.map(|p| p.to_string()),
302            );
303            if item.importance > 0.0 {
304                entry.importance = item.importance.clamp(0.0, 100.0);
305            }
306            // Add AI-extracted keywords and tags
307            if !item.keywords.is_empty() {
308                entry.tags.extend(item.keywords);
309            }
310            if !item.tags.is_empty() {
311                entry.tags.extend(item.tags);
312            }
313            entry.tags.dedup();
314
315            Some(entry)
316        })
317        .collect();
318
319    // Parse focus points
320    use chrono::Utc;
321    use crate::compress::FocusStatus;
322
323    let focus_points = parsed
324        .focus_points
325        .into_iter()
326        .map(|item| {
327            let mut focus = FocusPoint::new(
328                format!("focus-{}", Utc::now().timestamp()),
329                item.topic,
330                item.keywords,
331                item.entities,
332                item.core_question,
333                0,
334            );
335            focus.importance = item.importance.clamp(0.0, 1.0);
336            if !item.is_current {
337                focus.status = FocusStatus::Suspended;
338            }
339            focus
340        })
341        .collect();
342
343    // Parse conversation patterns
344    let conversation_patterns = parsed
345        .conversation_patterns
346        .into_iter()
347        .filter_map(|item| {
348            // Parse pattern type
349            let pattern_type = match item.pattern_type.to_lowercase().as_str() {
350                "reference" => PatternType::Reference,
351                "code" => PatternType::Code,
352                _ => return None, // Skip unknown pattern types
353            };
354
355            // Skip empty patterns
356            if item.pattern.trim().is_empty() {
357                return None;
358            }
359
360            // Create pattern with UserConversation source
361            let mut pattern = ConversationPattern::new(
362                pattern_type,
363                item.pattern,
364                PatternSource::UserConversation {
365                    example: String::new(), // Will be filled when pattern is used
366                },
367            );
368
369            // Set confidence (default to 0.5 if not specified or out of range)
370            pattern.confidence = if item.confidence > 0.0 {
371                item.confidence.clamp(0.0, 1.0)
372            } else {
373                0.5
374            };
375
376            Some(pattern)
377        })
378        .collect();
379
380    Ok(ExtractionResult {
381        memories: deduplicate_entries(entries),
382        focus_points,
383        conversation_patterns,
384    })
385}
386
387fn deduplicate_entries(entries: Vec<MemoryEntry>) -> Vec<MemoryEntry> {
388    let mut seen: Vec<String> = Vec::new();
389    entries
390        .into_iter()
391        .filter(|e| {
392            let content_lower = e.content.to_lowercase();
393            if seen.iter().any(|s| {
394                AutoMemory::calculate_similarity(s, &content_lower) >= SIMILARITY_THRESHOLD
395            }) {
396                false
397            } else {
398                seen.push(content_lower);
399                true
400            }
401        })
402        .take(MAX_DETECTED_ENTRIES)
403        .collect()
404}
405
406// ============================================================================
407// Rule-based Detection (uses KeywordsConfig)
408// ============================================================================
409
410/// Detect memories from text using hard-coded patterns.
411pub fn detect_memories_fallback(
412    text: &str,
413    session_id: Option<&str>,
414    project_path: Option<&str>,
415) -> Vec<MemoryEntry> {
416    let mut entries = Vec::new();
417    let text_lower = text.to_lowercase();
418
419    // Hard-coded patterns for each category
420    let patterns = [
421        (
422            MemoryCategory::Decision,
423            ["决定", "选择", "采用", "定下", "decided", "chose"],
424        ),
425        (
426            MemoryCategory::Preference,
427            ["偏好", "习惯", "喜欢", "首选", "prefer", "like"],
428        ),
429        (
430            MemoryCategory::Solution,
431            ["解决", "修复", "搞定", "改成", "fixed", "solved"],
432        ),
433        (
434            MemoryCategory::Finding,
435            ["发现", "原来", "原因", "定位", "found", "reason"],
436        ),
437        (
438            MemoryCategory::Technical,
439            ["技术栈", "框架", "用的", "基于", "stack", "using"],
440        ),
441        (
442            MemoryCategory::Structure,
443            ["入口", "主文件", "目录", "位于", "entry", "main"],
444        ),
445    ];
446
447    for (category, keywords) in patterns {
448        for keyword in keywords {
449            if text_lower.contains(&keyword.to_lowercase()) {
450                let content = extract_memory_content(text, keyword);
451                if !content.is_empty() && content.len() >= MIN_MEMORY_CONTENT_LENGTH {
452                    entries.push(MemoryEntry::new(
453                        category,
454                        content,
455                        session_id.map(|s| s.to_string()),
456                        project_path.map(|p| p.to_string()),
457                    ));
458                }
459            }
460        }
461    }
462
463    deduplicate_entries(entries)
464}
465
466/// Detect memories from text (wrapper for fallback).
467pub fn detect_memories_from_text(
468    text: &str,
469    session_id: Option<&str>,
470    project_path: Option<&str>,
471) -> Vec<MemoryEntry> {
472    detect_memories_fallback(text, session_id, project_path)
473}
474
475/// Smart detection: AI-first with rule-based fallback.
476///
477/// Priority order:
478/// 1. AI extraction (if text > 200 chars and extractor available)
479/// 2. Rule-based fallback (if AI fails or text too short)
480pub async fn detect_memories_smart(
481    text: &str,
482    session_id: Option<&str>,
483    project_path: Option<&str>,
484    extractor: Option<&AiMemoryExtractor>,
485) -> ExtractionResult {
486    let mode = AiDetectionMode::from_env();
487    let text_len = text.len();
488
489    // Determine if we should try AI first
490    // Only use AI for text > 200 chars (avoid API overhead for short texts)
491    let should_try_ai = mode != AiDetectionMode::Never && extractor.is_some() && text_len > 200;
492
493    // Debug log: show method and model
494    let model_name = extractor.map(|e| e.model_name()).unwrap_or("none");
495    crate::debug::debug_log().memory_ai_detection(
496        model_name,
497        0, // Will update after detection
498        text_len,
499        should_try_ai,
500    );
501
502    if should_try_ai && let Some(ex) = extractor {
503        if let Ok(result) = ex.extract(text, session_id, project_path).await {
504            // AI succeeded - use AI results entirely (skip hardcoded rules)
505            // Debug log: AI result
506            crate::debug::debug_log().memory_ai_detection(
507                ex.model_name(),
508                result.memories.len(),
509                text_len,
510                true,
511            );
512            return result;
513        }
514        // AI failed - log and skip rule-based fallback (per user request)
515        log::warn!("AI memory extraction failed, skipping detection for this turn");
516        return ExtractionResult {
517            memories: vec![],
518            focus_points: vec![],
519            conversation_patterns: vec![],
520        };
521    }
522
523    // For short texts (< 200 chars), skip detection entirely (per user request)
524    // No rule-based fallback
525    ExtractionResult {
526        memories: vec![],
527        focus_points: vec![],
528        conversation_patterns: vec![],
529    }
530}
531
532fn extract_memory_content(text: &str, keyword: &str) -> String {
533    let text_lower = text.to_lowercase();
534    let keyword_lower = keyword.to_lowercase();
535
536    let pos = match text_lower.find(&keyword_lower) {
537        Some(p) => p,
538        None => return String::new(),
539    };
540
541    // Find sentence containing the keyword
542    let start = text[..pos]
543        .rfind(['.', '。', '\n'])
544        .map(|i| i + 1)
545        .unwrap_or(0);
546
547    let end = text[pos..]
548        .find(['.', '。', '\n'])
549        .map(|i| pos + i + 1)
550        .unwrap_or(text.len());
551
552    let sentence = text[start..end].trim();
553
554    if sentence.len() > MAX_MEMORY_CONTENT_LENGTH {
555        sentence[..MAX_MEMORY_CONTENT_LENGTH].to_string()
556    } else {
557        sentence.to_string()
558    }
559}
560
561/// Infer category from content.
562pub fn infer_category_from_content(content: &str) -> MemoryCategory {
563    let lower = content.to_lowercase();
564
565    if lower.contains("决定")
566        || lower.contains("选择")
567        || lower.contains("采用")
568        || lower.contains("decided")
569    {
570        return MemoryCategory::Decision;
571    }
572    if lower.contains("喜欢")
573        || lower.contains("偏好")
574        || lower.contains("习惯")
575        || lower.contains("prefer")
576    {
577        return MemoryCategory::Preference;
578    }
579    if lower.contains("解决")
580        || lower.contains("修复")
581        || lower.contains("搞定")
582        || lower.contains("fixed")
583    {
584        return MemoryCategory::Solution;
585    }
586    if lower.contains("发现")
587        || lower.contains("原因")
588        || lower.contains("原来")
589        || lower.contains("found")
590    {
591        return MemoryCategory::Finding;
592    }
593    if lower.contains("技术")
594        || lower.contains("框架")
595        || lower.contains("库")
596        || lower.contains("tech")
597    {
598        return MemoryCategory::Technical;
599    }
600    if lower.contains("文件")
601        || lower.contains("目录")
602        || lower.contains("入口")
603        || lower.contains("file")
604    {
605        return MemoryCategory::Structure;
606    }
607
608    MemoryCategory::Finding // Default
609}
610
611// ============================================================================
612// Unified Extraction (One AI Call for All Information)
613// ============================================================================
614
615/// Unified extraction system prompt for extracting all information in one call.
616const UNIFIED_EXTRACTION_PROMPT: &str = r#"你是信息提取助手。从对话中一次性提取以下信息：
617
618## 1. 长期记忆 (memories)
619- decision: 技术决策（如"决定使用 PostgreSQL"、"采用 React 架构"）
620- preference: 用户偏好（如"我喜欢简洁的代码风格"、"习惯用 VS Code"）
621- solution: 解决方案（如"通过添加缓存解决了性能问题"）
622- finding: 重要发现（如"发现内存泄漏的原因"）
623- technical: 技术栈（如"项目使用 Rust + Tokio"）
624- structure: 项目结构（如"主入口是 src/main.rs"）
625
626## 2. 当前焦点 (focus_points)
627- topic: 当前讨论的主题
628- keywords: 相关关键词
629- entities: 涉及的文件/函数/类名
630- core_question: 核心问题（可选）
631
632## 3. 对话模式 (conversation_patterns)
633- reference: 引用模式（如"正如前面所说"、"as mentioned"、"previously"）
634- code: 代码模式（如"fn ", "function", "```", "class "）
635
636## 4. 焦点关键词 (focus_keywords)
637- transition: 话题转换词（如"换个话题", "switching", "however", "等等"）
638- question: 提问词（如"怎么", "how", "为什么", "why", "请问"）
639- task: 任务词（如"帮我", "implement", "创建", "create", "修复"）
640- tech: 技术词（如"rust", "数据库", "api", "性能", "优化"）
641
642## 输出格式（严格 JSON）
643
644```json
645{
646  "memories": [
647    {
648      "category": "decision",
649      "content": "采用 PostgreSQL 作为主数据库。**Why:** 性能要求",
650      "importance": 85,
651      "keywords": ["PostgreSQL", "数据库"],
652      "tags": ["backend", "storage"]
653    }
654  ],
655  "focus_points": [
656    {
657      "topic": "API 设计优化",
658      "keywords": ["API", "REST", "性能"],
659      "entities": ["api.rs", "handler"],
660      "core_question": "如何优化 API 响应时间？",
661      "importance": 0.8,
662      "is_current": true
663    }
664  ],
665  "conversation_patterns": [
666    {
667      "pattern_type": "reference",
668      "pattern": "正如我所说",
669      "confidence": 0.8
670    },
671    {
672      "pattern_type": "code",
673      "pattern": "fn ",
674      "confidence": 0.6
675    }
676  ],
677  "focus_keywords": {
678    "transition": ["换个话题", "switching"],
679    "question": ["怎么", "how"],
680    "task": ["帮我", "implement"],
681    "tech": ["rust", "性能"]
682  }
683}
684```
685
686## 规则
6871. 只提取明确出现的信息，不要推测
6882. 如果某类信息没有，返回空数组/对象
6893. importance 范围：memories 0-100，focus_points 0.0-1.0
6904. confidence 范围：0.0-1.0，常见模式置信度较低
6915. 关键词提取 3-5 个核心关键词
6926. 只返回 JSON，不要其他解释"#;
693
694/// Unified extraction prompt with focus selection.
695/// This prompt includes existing focuses and asks AI to select or create focus.
696const UNIFIED_EXTRACTION_WITH_FOCUS_PROMPT: &str = r#"你是信息提取和焦点决策助手。从对话中一次性完成以下任务：
697
698## 1. 焦点决策 (focus_decision) - 最重要！
699
700你会收到当前已有的焦点列表。请判断：
701
702### 选择现有焦点
703如果最新对话与某个现有焦点匹配：
704- selected_focus_id: 该焦点的 ID
705- need_new_focus: false
706- confidence: 匹配置信度 (0.0-1.0)
707
708### 创建新焦点
709如果没有任何现有焦点匹配：
710- selected_focus_id: null
711- need_new_focus: true
712- new_focus_topic: 新焦点主题
713- new_core_question: 核心问题
714- confidence: 创建置信度
715
716### 判断话题切换
717- is_topic_switch: 是否从某焦点切换到另一焦点
718- previous_focus_id: 切换前的焦点 ID（如果有）
719
720### 焦点类型 (focus_type)
721- problem_solving: 修复 bug、解决错误
722- task_execution: 实现功能、完成任务
723- knowledge_exploration: 学习、研究、探索
724- decision_making: 技术选型、架构设计
725- code_optimization: 性能优化、重构
726- general: 一般对话
727
728## 2. 长期记忆 (memories)
729- decision: 技术决策
730- preference: 用户偏好
731- solution: 解决方案
732- finding: 重要发现
733- technical: 技术栈
734- structure: 项目结构
735
736## 3. 焦点关键词 (focus_keywords)
737- transition: 话题转换词
738- question: 提问词
739- task: 任务词
740- tech: 技术词
741
742## 输出格式（严格 JSON）
743
744```json
745{
746  "focus_decision": {
747    "selected_focus_id": "focus-1",
748    "need_new_focus": false,
749    "new_focus_topic": null,
750    "new_core_question": null,
751    "confidence": 0.85,
752    "focus_type": "code_optimization",
753    "is_topic_switch": true,
754    "previous_focus_id": "focus-2",
755    "focus_keywords": ["API", "latency", "performance"],
756    "related_entities": ["api.rs", "handle_request()"],
757    "reasoning": "用户从数据库切换到 API 性能话题"
758  },
759  "memories": [...],
760  "focus_keywords": {
761    "transition": ["换个话题"],
762    "question": ["怎么"],
763    "task": ["优化"],
764    "tech": ["api", "性能"]
765  }
766}
767```
768
769## 规则
7701. focus_decision 是最重要的输出，必须仔细判断
7712. 现有焦点列表会随对话文本一起提供
7723. 如果现有焦点都不匹配，必须标记 need_new_focus=true
7734. confidence 反映你对决策的确信程度
7745. 只返回 JSON，不要其他解释"#;
775
776/// Unified extractor that extracts all information in a single AI call.
777///
778/// This replaces the separate AiMemoryExtractor and FocusExtractor,
779/// reducing API calls and providing consistent extraction.
780pub struct UnifiedExtractor {
781    provider: Box<dyn crate::providers::Provider>,
782    model: String,
783}
784
785impl UnifiedExtractor {
786    /// Create a new unified extractor.
787    pub fn new(provider: Box<dyn crate::providers::Provider>, model: String) -> Self {
788        Self { provider, model }
789    }
790
791    /// Create a minimal unified extractor for background tasks.
792    pub fn new_minimal(model: String) -> Self {
793        Self {
794            provider: crate::create_minimal_provider(&model),
795            model,
796        }
797    }
798
799    /// Extract all information from conversation text in a single AI call.
800    pub async fn extract_unified(
801        &self,
802        text: &str,
803        session_id: Option<&str>,
804        project_path: Option<&str>,
805    ) -> Result<UnifiedExtractionResult> {
806        use crate::providers::{ChatRequest, Message, MessageContent, Role};
807
808        // Safely truncate to ~4000 chars respecting UTF-8 boundaries
809        let truncated = truncate_chars(text, 4000);
810
811        let request = ChatRequest {
812            messages: vec![Message {
813                role: Role::User,
814                content: MessageContent::Text(format!(
815                    "请从以下对话中提取所有信息：\n\n{}",
816                    truncated
817                )),
818            }],
819            tools: vec![],
820            system: Some(UNIFIED_EXTRACTION_PROMPT.to_string()),
821            think: false,
822            max_tokens: 1024, // Larger token limit for unified extraction
823            server_tools: vec![],
824            enable_caching: false,
825        };
826
827        let response = self.provider.chat(request).await?;
828
829        let response_text = response
830            .content
831            .iter()
832            .filter_map(|b| {
833                if let crate::providers::ContentBlock::Text { text } = b {
834                    Some(text.clone())
835                } else {
836                    None
837                }
838            })
839            .collect::<Vec<_>>()
840            .join("");
841
842        parse_unified_response(&response_text, session_id, project_path)
843    }
844
845    /// Extract all information WITH focus selection in a single AI call.
846    ///
847    /// This method receives existing focuses and asks AI to select the best match
848    /// or create a new focus if none matches. This ensures focus continuity.
849    ///
850    /// # Arguments
851    /// * `text` - Conversation text to analyze
852    /// * `existing_foci` - Current focus points from FocusManager (id, topic, keywords)
853    /// * `session_id` - Optional session ID
854    /// * `project_path` - Optional project path
855    ///
856    /// # Returns
857    /// UnifiedExtractionResult with focus_decision field populated
858    pub async fn extract_unified_with_foci(
859        &self,
860        text: &str,
861        existing_foci: &[(&str, &str, &[String])], // (id, topic, keywords)
862        session_id: Option<&str>,
863        project_path: Option<&str>,
864    ) -> Result<UnifiedExtractionResult> {
865        use crate::providers::{ChatRequest, Message, MessageContent, Role};
866
867        // Safely truncate to ~4000 chars respecting UTF-8 boundaries
868        let truncated = truncate_chars(text, 4000);
869
870        // Format existing focuses for AI
871        let foci_text = if existing_foci.is_empty() {
872            "（当前没有现有焦点）".to_string()
873        } else {
874            let mut foci_list = Vec::new();
875            for (id, topic, keywords) in existing_foci {
876                foci_list.push(format!(
877                    "- ID: {}\n  主题: {}\n  关键词: {}",
878                    id,
879                    topic,
880                    keywords.join(", ")
881                ));
882            }
883            format!("现有焦点列表：\n{}", foci_list.join("\n"))
884        };
885
886        let user_prompt = format!(
887            "{}\n\n最新对话：\n{}\n\n请判断最新对话与现有焦点的匹配关系，并做出焦点决策。",
888            foci_text,
889            truncated
890        );
891
892        let request = ChatRequest {
893            messages: vec![Message {
894                role: Role::User,
895                content: MessageContent::Text(user_prompt),
896            }],
897            tools: vec![],
898            system: Some(UNIFIED_EXTRACTION_WITH_FOCUS_PROMPT.to_string()),
899            think: false,
900            max_tokens: 1024,
901            server_tools: vec![],
902            enable_caching: false,
903        };
904
905        let response = self.provider.chat(request).await?;
906
907        let response_text = response
908            .content
909            .iter()
910            .filter_map(|b| {
911                if let crate::providers::ContentBlock::Text { text } = b {
912                    Some(text.clone())
913                } else {
914                    None
915                }
916            })
917            .collect::<Vec<_>>()
918            .join("");
919
920        parse_unified_response_with_focus(&response_text, session_id, project_path)
921    }
922
923    /// Get the model name used for extraction.
924    pub fn model_name(&self) -> &str {
925        &self.model
926    }
927}
928
929/// Parse unified extraction response from AI.
930fn parse_unified_response(
931    json_text: &str,
932    session_id: Option<&str>,
933    project_path: Option<&str>,
934) -> Result<UnifiedExtractionResult> {
935    let cleaned = json_text
936        .trim()
937        .trim_start_matches("```json")
938        .trim_start_matches("```")
939        .trim_end_matches("```")
940        .trim();
941
942    #[derive(Deserialize)]
943    struct UnifiedResponse {
944        #[serde(default)]
945        memories: Vec<MemoryItem>,
946        #[serde(default)]
947        focus_points: Vec<FocusPointItem>,
948        #[serde(default)]
949        conversation_patterns: Vec<ConversationPatternItem>,
950        #[serde(default)]
951        focus_keywords: FocusKeywordsItem,
952    }
953
954    #[derive(Deserialize, Default)]
955    struct FocusKeywordsItem {
956        #[serde(default)]
957        transition: Vec<String>,
958        #[serde(default)]
959        question: Vec<String>,
960        #[serde(default)]
961        task: Vec<String>,
962        #[serde(default)]
963        tech: Vec<String>,
964    }
965
966    #[derive(Deserialize)]
967    struct MemoryItem {
968        category: String,
969        content: String,
970        #[serde(default)]
971        importance: f64,
972        #[serde(default)]
973        keywords: Vec<String>,
974        #[serde(default)]
975        tags: Vec<String>,
976    }
977
978    #[derive(Deserialize)]
979    struct FocusPointItem {
980        topic: String,
981        #[serde(default)]
982        keywords: Vec<String>,
983        #[serde(default)]
984        entities: Vec<String>,
985        #[serde(default)]
986        core_question: Option<String>,
987        #[serde(default = "default_importance")]
988        importance: f32,
989        #[serde(default = "default_is_current")]
990        is_current: bool,
991    }
992
993    #[derive(Deserialize)]
994    struct ConversationPatternItem {
995        pattern_type: String,
996        pattern: String,
997        #[serde(default)]
998        confidence: f32,
999    }
1000
1001    fn default_importance() -> f32 { 0.7 }
1002    fn default_is_current() -> bool { true }
1003
1004    let parsed: UnifiedResponse = serde_json::from_str(cleaned)?;
1005
1006    // Parse memories (reuse existing logic)
1007    let entries = parsed
1008        .memories
1009        .into_iter()
1010        .filter_map(|item| {
1011            let category = match item.category.to_lowercase().as_str() {
1012                "decision" => MemoryCategory::Decision,
1013                "preference" => MemoryCategory::Preference,
1014                "solution" => MemoryCategory::Solution,
1015                "finding" => MemoryCategory::Finding,
1016                "technical" => MemoryCategory::Technical,
1017                "structure" => MemoryCategory::Structure,
1018                _ => return None,
1019            };
1020
1021            if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
1022                return None;
1023            }
1024
1025            let mut entry = MemoryEntry::new(
1026                category,
1027                item.content,
1028                session_id.map(|s| s.to_string()),
1029                project_path.map(|p| p.to_string()),
1030            );
1031            if item.importance > 0.0 {
1032                entry.importance = item.importance.clamp(0.0, 100.0);
1033            }
1034            if !item.keywords.is_empty() {
1035                entry.tags.extend(item.keywords);
1036            }
1037            if !item.tags.is_empty() {
1038                entry.tags.extend(item.tags);
1039            }
1040            entry.tags.dedup();
1041
1042            Some(entry)
1043        })
1044        .collect();
1045
1046    // Parse focus points (reuse existing logic)
1047    use chrono::Utc;
1048    use crate::compress::FocusStatus;
1049
1050    let focus_points = parsed
1051        .focus_points
1052        .into_iter()
1053        .map(|item| {
1054            let mut focus = FocusPoint::new(
1055                format!("focus-{}", Utc::now().timestamp()),
1056                item.topic,
1057                item.keywords,
1058                item.entities,
1059                item.core_question,
1060                0,
1061            );
1062            focus.importance = item.importance.clamp(0.0, 1.0);
1063            if !item.is_current {
1064                focus.status = FocusStatus::Suspended;
1065            }
1066            focus
1067        })
1068        .collect();
1069
1070    // Parse conversation patterns (reuse existing logic)
1071    let conversation_patterns = parsed
1072        .conversation_patterns
1073        .into_iter()
1074        .filter_map(|item| {
1075            let pattern_type = match item.pattern_type.to_lowercase().as_str() {
1076                "reference" => PatternType::Reference,
1077                "code" => PatternType::Code,
1078                _ => return None,
1079            };
1080
1081            if item.pattern.trim().is_empty() {
1082                return None;
1083            }
1084
1085            let mut pattern = ConversationPattern::new(
1086                pattern_type,
1087                item.pattern,
1088                PatternSource::UserConversation {
1089                    example: String::new(),
1090                },
1091            );
1092
1093            pattern.confidence = if item.confidence > 0.0 {
1094                item.confidence.clamp(0.0, 1.0)
1095            } else {
1096                0.5
1097            };
1098
1099            Some(pattern)
1100        })
1101        .collect();
1102
1103    // Parse focus keywords
1104    let focus_keywords = ExtractedKeywords {
1105        transition: parsed.focus_keywords.transition,
1106        question: parsed.focus_keywords.question,
1107        task: parsed.focus_keywords.task,
1108        tech: parsed.focus_keywords.tech,
1109    };
1110
1111    Ok(UnifiedExtractionResult {
1112        memories: deduplicate_entries(entries),
1113        focus_points,
1114        conversation_patterns,
1115        focus_keywords,
1116        focus_decision: None, // Not populated in basic extraction
1117    })
1118}
1119
1120/// Parse unified extraction response with focus decision from AI.
1121fn parse_unified_response_with_focus(
1122    json_text: &str,
1123    session_id: Option<&str>,
1124    project_path: Option<&str>,
1125) -> Result<UnifiedExtractionResult> {
1126    let cleaned = json_text
1127        .trim()
1128        .trim_start_matches("```json")
1129        .trim_start_matches("```")
1130        .trim_end_matches("```")
1131        .trim();
1132
1133    #[derive(Deserialize)]
1134    struct UnifiedResponseWithFocus {
1135        #[serde(default)]
1136        focus_decision: Option<FocusDecisionItem>,
1137        #[serde(default)]
1138        memories: Vec<MemoryItem>,
1139        #[serde(default)]
1140        focus_keywords: FocusKeywordsItem,
1141    }
1142
1143    #[derive(Deserialize)]
1144    struct FocusDecisionItem {
1145        #[serde(default)]
1146        selected_focus_id: Option<String>,
1147        #[serde(default)]
1148        need_new_focus: bool,
1149        #[serde(default)]
1150        new_focus_topic: Option<String>,
1151        #[serde(default)]
1152        new_core_question: Option<String>,
1153        #[serde(default)]
1154        confidence: f32,
1155        #[serde(default)]
1156        focus_type: String,
1157        #[serde(default)]
1158        is_topic_switch: bool,
1159        #[serde(default)]
1160        previous_focus_id: Option<String>,
1161        #[serde(default)]
1162        focus_keywords: Vec<String>,
1163        #[serde(default)]
1164        related_entities: Vec<String>,
1165        #[serde(default)]
1166        reasoning: String,
1167    }
1168
1169    #[derive(Deserialize, Default)]
1170    struct FocusKeywordsItem {
1171        #[serde(default)]
1172        transition: Vec<String>,
1173        #[serde(default)]
1174        question: Vec<String>,
1175        #[serde(default)]
1176        task: Vec<String>,
1177        #[serde(default)]
1178        tech: Vec<String>,
1179    }
1180
1181    #[derive(Deserialize)]
1182    struct MemoryItem {
1183        category: String,
1184        content: String,
1185        #[serde(default)]
1186        importance: f64,
1187        #[serde(default)]
1188        keywords: Vec<String>,
1189        #[serde(default)]
1190        tags: Vec<String>,
1191    }
1192
1193    let parsed: UnifiedResponseWithFocus = serde_json::from_str(cleaned)?;
1194
1195    // Parse focus decision
1196    let focus_decision = parsed.focus_decision.map(|item| {
1197        use super::unified_extraction::{FocusDecision, FocusType};
1198
1199        let focus_type = match item.focus_type.to_lowercase().as_str() {
1200            "problem_solving" => FocusType::ProblemSolving,
1201            "task_execution" => FocusType::TaskExecution,
1202            "knowledge_exploration" => FocusType::KnowledgeExploration,
1203            "decision_making" => FocusType::DecisionMaking,
1204            "code_optimization" => FocusType::CodeOptimization,
1205            _ => FocusType::General,
1206        };
1207
1208        FocusDecision {
1209            selected_focus_id: item.selected_focus_id,
1210            need_new_focus: item.need_new_focus,
1211            new_focus_topic: item.new_focus_topic,
1212            new_core_question: item.new_core_question,
1213            confidence: item.confidence.clamp(0.0, 1.0),
1214            focus_type,
1215            is_topic_switch: item.is_topic_switch,
1216            previous_focus_id: item.previous_focus_id,
1217            focus_keywords: item.focus_keywords,
1218            related_entities: item.related_entities,
1219            reasoning: item.reasoning,
1220        }
1221    });
1222
1223    // Parse memories (reuse existing logic)
1224    let entries = parsed
1225        .memories
1226        .into_iter()
1227        .filter_map(|item| {
1228            let category = match item.category.to_lowercase().as_str() {
1229                "decision" => MemoryCategory::Decision,
1230                "preference" => MemoryCategory::Preference,
1231                "solution" => MemoryCategory::Solution,
1232                "finding" => MemoryCategory::Finding,
1233                "technical" => MemoryCategory::Technical,
1234                "structure" => MemoryCategory::Structure,
1235                _ => return None,
1236            };
1237
1238            if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
1239                return None;
1240            }
1241
1242            let mut entry = MemoryEntry::new(
1243                category,
1244                item.content,
1245                session_id.map(|s| s.to_string()),
1246                project_path.map(|p| p.to_string()),
1247            );
1248            if item.importance > 0.0 {
1249                entry.importance = item.importance.clamp(0.0, 100.0);
1250            }
1251            if !item.keywords.is_empty() {
1252                entry.tags.extend(item.keywords);
1253            }
1254            if !item.tags.is_empty() {
1255                entry.tags.extend(item.tags);
1256            }
1257            entry.tags.dedup();
1258
1259            Some(entry)
1260        })
1261        .collect();
1262
1263    // Parse focus keywords
1264    let focus_keywords = ExtractedKeywords {
1265        transition: parsed.focus_keywords.transition,
1266        question: parsed.focus_keywords.question,
1267        task: parsed.focus_keywords.task,
1268        tech: parsed.focus_keywords.tech,
1269    };
1270
1271    Ok(UnifiedExtractionResult {
1272        memories: deduplicate_entries(entries),
1273        focus_points: Vec::new(), // Not used in focus selection mode
1274        conversation_patterns: Vec::new(), // Not used in focus selection mode
1275        focus_keywords,
1276        focus_decision,
1277    })
1278}
1279
1280/// Smart unified extraction: AI-first with graceful fallback.
1281///
1282/// Uses UnifiedExtractor for single API call extraction.
1283pub async fn detect_unified_smart(
1284    text: &str,
1285    session_id: Option<&str>,
1286    project_path: Option<&str>,
1287    extractor: Option<&UnifiedExtractor>,
1288) -> UnifiedExtractionResult {
1289    let mode = AiDetectionMode::from_env();
1290    let text_len = text.len();
1291
1292    // Only use AI for text > 200 chars
1293    let should_try_ai = mode != AiDetectionMode::Never && extractor.is_some() && text_len > 200;
1294
1295    if should_try_ai && let Some(ex) = extractor {
1296        if let Ok(result) = ex.extract_unified(text, session_id, project_path).await {
1297            return result;
1298        }
1299        // AI failed - skip detection for this turn
1300        log::warn!("Unified extraction failed, skipping detection for this turn");
1301    }
1302
1303    // Return empty result for short texts or failed AI
1304    UnifiedExtractionResult::default()
1305}
1306
1307#[cfg(test)]
1308mod tests {
1309    use super::*;
1310
1311    // =========================================================================
1312    // Conversation Pattern Parsing Tests
1313    // =========================================================================
1314
1315    #[test]
1316    fn test_parse_memory_response_with_patterns() {
1317        let json = r#"{
1318            "memories": [],
1319            "focus_points": [],
1320            "conversation_patterns": [
1321                {
1322                    "pattern_type": "reference",
1323                    "pattern": "正如我所说",
1324                    "confidence": 0.8
1325                },
1326                {
1327                    "pattern_type": "code",
1328                    "pattern": "fn ",
1329                    "confidence": 0.6
1330                }
1331            ]
1332        }"#;
1333
1334        let result = parse_memory_response(json, None, None).unwrap();
1335        assert_eq!(result.memories.len(), 0);
1336        assert_eq!(result.focus_points.len(), 0);
1337        assert_eq!(result.conversation_patterns.len(), 2);
1338
1339        // Check first pattern (reference)
1340        let ref_pattern = &result.conversation_patterns[0];
1341        assert_eq!(ref_pattern.pattern_type, PatternType::Reference);
1342        assert_eq!(ref_pattern.pattern, "正如我所说");
1343        assert_eq!(ref_pattern.confidence, 0.8);
1344        assert!(ref_pattern.is_active);
1345
1346        // Check second pattern (code)
1347        let code_pattern = &result.conversation_patterns[1];
1348        assert_eq!(code_pattern.pattern_type, PatternType::Code);
1349        assert_eq!(code_pattern.pattern, "fn ");
1350        assert_eq!(code_pattern.confidence, 0.6);
1351    }
1352
1353    #[test]
1354    fn test_parse_memory_response_patterns_default_confidence() {
1355        let json = r#"{
1356            "memories": [],
1357            "focus_points": [],
1358            "conversation_patterns": [
1359                {
1360                    "pattern_type": "reference",
1361                    "pattern": "as mentioned"
1362                }
1363            ]
1364        }"#;
1365
1366        let result = parse_memory_response(json, None, None).unwrap();
1367        assert_eq!(result.conversation_patterns.len(), 1);
1368
1369        // Default confidence should be 0.5
1370        let pattern = &result.conversation_patterns[0];
1371        assert_eq!(pattern.confidence, 0.5);
1372    }
1373
1374    #[test]
1375    fn test_parse_memory_response_patterns_empty() {
1376        let json = r#"{
1377            "memories": [],
1378            "focus_points": []
1379        }"#;
1380
1381        let result = parse_memory_response(json, None, None).unwrap();
1382        assert_eq!(result.conversation_patterns.len(), 0);
1383    }
1384
1385    #[test]
1386    fn test_parse_memory_response_patterns_invalid_type() {
1387        let json = r#"{
1388            "memories": [],
1389            "focus_points": [],
1390            "conversation_patterns": [
1391                {
1392                    "pattern_type": "invalid_type",
1393                    "pattern": "test",
1394                    "confidence": 0.5
1395                },
1396                {
1397                    "pattern_type": "reference",
1398                    "pattern": "valid pattern",
1399                    "confidence": 0.7
1400                }
1401            ]
1402        }"#;
1403
1404        let result = parse_memory_response(json, None, None).unwrap();
1405        // Invalid pattern type should be skipped
1406        assert_eq!(result.conversation_patterns.len(), 1);
1407        assert_eq!(result.conversation_patterns[0].pattern, "valid pattern");
1408    }
1409
1410    #[test]
1411    fn test_parse_memory_response_patterns_empty_string() {
1412        let json = r#"{
1413            "memories": [],
1414            "focus_points": [],
1415            "conversation_patterns": [
1416                {
1417                    "pattern_type": "reference",
1418                    "pattern": "",
1419                    "confidence": 0.5
1420                },
1421                {
1422                    "pattern_type": "code",
1423                    "pattern": "   ",
1424                    "confidence": 0.5
1425                },
1426                {
1427                    "pattern_type": "reference",
1428                    "pattern": "valid",
1429                    "confidence": 0.8
1430                }
1431            ]
1432        }"#;
1433
1434        let result = parse_memory_response(json, None, None).unwrap();
1435        // Empty patterns should be skipped
1436        assert_eq!(result.conversation_patterns.len(), 1);
1437        assert_eq!(result.conversation_patterns[0].pattern, "valid");
1438    }
1439
1440    #[test]
1441    fn test_parse_memory_response_patterns_confidence_clamped() {
1442        let json = r#"{
1443            "memories": [],
1444            "focus_points": [],
1445            "conversation_patterns": [
1446                {
1447                    "pattern_type": "reference",
1448                    "pattern": "test1",
1449                    "confidence": 1.5
1450                },
1451                {
1452                    "pattern_type": "code",
1453                    "pattern": "test2",
1454                    "confidence": -0.3
1455                }
1456            ]
1457        }"#;
1458
1459        let result = parse_memory_response(json, None, None).unwrap();
1460        assert_eq!(result.conversation_patterns.len(), 2);
1461
1462        // Confidence should be clamped to [0.0, 1.0]
1463        assert_eq!(result.conversation_patterns[0].confidence, 1.0);
1464        // Negative confidence should use default 0.5 (since <= 0.0 triggers default)
1465        assert_eq!(result.conversation_patterns[1].confidence, 0.5);
1466    }
1467
1468    #[test]
1469    fn test_parse_memory_response_patterns_source() {
1470        let json = r#"{
1471            "memories": [],
1472            "focus_points": [],
1473            "conversation_patterns": [
1474                {
1475                    "pattern_type": "reference",
1476                    "pattern": "PR #123",
1477                    "confidence": 0.9
1478                }
1479            ]
1480        }"#;
1481
1482        let result = parse_memory_response(json, None, None).unwrap();
1483        let pattern = &result.conversation_patterns[0];
1484
1485        // Source should be UserConversation
1486        match &pattern.source {
1487            PatternSource::UserConversation { example } => {
1488                assert_eq!(example, "");
1489            }
1490            _ => panic!("Expected UserConversation source"),
1491        }
1492    }
1493
1494    #[test]
1495    fn test_parse_memory_response_backward_compatible() {
1496        // Old format without conversation_patterns should still work
1497        let json = r#"{
1498            "memories": [
1499                {
1500                    "category": "decision",
1501                    "content": "使用 Rust 作为主要语言",
1502                    "importance": 80,
1503                    "keywords": ["Rust"],
1504                    "tags": ["backend"]
1505                }
1506            ],
1507            "focus_points": [
1508                {
1509                    "topic": "API设计",
1510                    "keywords": ["API", "REST"],
1511                    "importance": 0.8
1512                }
1513            ]
1514        }"#;
1515
1516        let result = parse_memory_response(json, None, None).unwrap();
1517        assert_eq!(result.memories.len(), 1);
1518        assert_eq!(result.focus_points.len(), 1);
1519        assert_eq!(result.conversation_patterns.len(), 0);
1520
1521        // Verify memory content
1522        assert_eq!(result.memories[0].category, MemoryCategory::Decision);
1523        assert!(result.memories[0].content.contains("Rust"));
1524    }
1525
1526    #[test]
1527    fn test_parse_memory_response_with_code_block_markers() {
1528        // JSON wrapped in code block markers should still parse
1529        let json = r#"```json
1530{
1531    "memories": [],
1532    "focus_points": [],
1533    "conversation_patterns": [
1534        {
1535            "pattern_type": "code",
1536            "pattern": "```",
1537            "confidence": 0.7
1538        }
1539    ]
1540}
1541```"#;
1542
1543        let result = parse_memory_response(json, None, None).unwrap();
1544        assert_eq!(result.conversation_patterns.len(), 1);
1545        assert_eq!(result.conversation_patterns[0].pattern, "```");
1546    }
1547
1548    // =========================================================================
1549    // ExtractionResult Tests
1550    // =========================================================================
1551
1552    #[test]
1553    fn test_extraction_result_has_patterns_field() {
1554        let result = ExtractionResult {
1555            memories: vec![],
1556            focus_points: vec![],
1557            conversation_patterns: vec![
1558                ConversationPattern::new(
1559                    PatternType::Reference,
1560                    "test pattern",
1561                    PatternSource::Manual,
1562                ),
1563            ],
1564        };
1565
1566        assert_eq!(result.conversation_patterns.len(), 1);
1567    }
1568
1569    #[test]
1570    fn test_extraction_result_clone() {
1571        let result = ExtractionResult {
1572            memories: vec![],
1573            focus_points: vec![],
1574            conversation_patterns: vec![
1575                ConversationPattern::new(
1576                    PatternType::Code,
1577                    "fn test()",
1578                    PatternSource::Manual,
1579                ),
1580            ],
1581        };
1582
1583        let cloned = result.clone();
1584        assert_eq!(cloned.conversation_patterns.len(), 1);
1585        assert_eq!(cloned.conversation_patterns[0].pattern, "fn test()");
1586    }
1587
1588    #[test]
1589    fn test_extraction_result_empty_patterns() {
1590        // Test ExtractionResult with empty patterns
1591        let result = ExtractionResult {
1592            memories: vec![],
1593            focus_points: vec![],
1594            conversation_patterns: vec![],
1595        };
1596
1597        assert!(result.conversation_patterns.is_empty());
1598        assert!(result.memories.is_empty());
1599        assert!(result.focus_points.is_empty());
1600    }
1601
1602    // =========================================================================
1603    // AI Prompt Validation Tests
1604    // =========================================================================
1605
1606    #[test]
1607    fn test_memory_extract_prompt_contains_patterns_guidance() {
1608        // Verify the prompt includes conversation pattern extraction guidance
1609        assert!(
1610            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("对话模式提取"),
1611            "Prompt should contain pattern extraction guidance"
1612        );
1613        assert!(
1614            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("reference"),
1615            "Prompt should mention reference pattern type"
1616        );
1617        assert!(
1618            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("code"),
1619            "Prompt should mention code pattern type"
1620        );
1621    }
1622
1623    #[test]
1624    fn test_memory_extract_prompt_contains_trigger_condition() {
1625        // Verify the prompt mentions >500 chars trigger condition
1626        assert!(
1627            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("500"),
1628            "Prompt should mention 500 chars trigger condition"
1629        );
1630        assert!(
1631            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("> 500") || MEMORY_EXTRACT_SYSTEM_PROMPT.contains("超过500"),
1632            "Prompt should specify > 500 chars condition"
1633        );
1634    }
1635
1636    #[test]
1637    fn test_memory_extract_prompt_contains_output_format() {
1638        // Verify the prompt shows correct JSON output format with patterns
1639        assert!(
1640            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("conversation_patterns"),
1641            "Prompt should show conversation_patterns in output format"
1642        );
1643        assert!(
1644            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("pattern_type"),
1645            "Prompt should show pattern_type field"
1646        );
1647        assert!(
1648            MEMORY_EXTRACT_SYSTEM_PROMPT.contains("confidence"),
1649            "Prompt should show confidence field"
1650        );
1651    }
1652
1653    // =========================================================================
1654    // Integration Tests - Combined Extraction
1655    // =========================================================================
1656
1657    #[test]
1658    fn test_parse_memory_response_full_integration() {
1659        // Test complete extraction with memories, focus_points, and patterns together
1660        let json = r#"{
1661            "memories": [
1662                {
1663                    "category": "decision",
1664                    "content": "使用 Rust 作为主要语言。**Why:** 性能要求",
1665                    "importance": 85,
1666                    "keywords": ["Rust"],
1667                    "tags": ["backend"]
1668                }
1669            ],
1670            "focus_points": [
1671                {
1672                    "topic": "API设计",
1673                    "keywords": ["API", "REST"],
1674                    "entities": ["User", "Order"],
1675                    "importance": 0.8
1676                }
1677            ],
1678            "conversation_patterns": [
1679                {
1680                    "pattern_type": "reference",
1681                    "pattern": "正如我所说",
1682                    "confidence": 0.9
1683                },
1684                {
1685                    "pattern_type": "code",
1686                    "pattern": "fn ",
1687                    "confidence": 0.7
1688                }
1689            ]
1690        }"#;
1691
1692        let result = parse_memory_response(json, Some("session-123"), Some("/project/path")).unwrap();
1693
1694        // Verify all three components
1695        assert_eq!(result.memories.len(), 1);
1696        assert_eq!(result.focus_points.len(), 1);
1697        assert_eq!(result.conversation_patterns.len(), 2);
1698
1699        // Check memory
1700        assert_eq!(result.memories[0].category, MemoryCategory::Decision);
1701        assert!(result.memories[0].content.contains("Rust"));
1702
1703        // Check focus point
1704        assert_eq!(result.focus_points[0].topic, "API设计");
1705
1706        // Check patterns
1707        assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
1708        assert_eq!(result.conversation_patterns[1].pattern_type, PatternType::Code);
1709    }
1710
1711    #[test]
1712    fn test_parse_memory_response_mixed_valid_invalid_patterns() {
1713        // Test with mix of valid and invalid patterns
1714        let json = r#"{
1715            "memories": [],
1716            "focus_points": [],
1717            "conversation_patterns": [
1718                {
1719                    "pattern_type": "reference",
1720                    "pattern": "valid pattern 1",
1721                    "confidence": 0.8
1722                },
1723                {
1724                    "pattern_type": "unknown_type",
1725                    "pattern": "should be skipped",
1726                    "confidence": 0.5
1727                },
1728                {
1729                    "pattern_type": "code",
1730                    "pattern": "fn valid",
1731                    "confidence": 0.6
1732                },
1733                {
1734                    "pattern_type": "reference",
1735                    "pattern": "",
1736                    "confidence": 0.9
1737                }
1738            ]
1739        }"#;
1740
1741        let result = parse_memory_response(json, None, None).unwrap();
1742
1743        // Should only have 2 valid patterns
1744        assert_eq!(result.conversation_patterns.len(), 2);
1745        assert_eq!(result.conversation_patterns[0].pattern, "valid pattern 1");
1746        assert_eq!(result.conversation_patterns[1].pattern, "fn valid");
1747    }
1748
1749    #[test]
1750    fn test_parse_memory_response_patterns_with_session_and_project() {
1751        // Verify session_id and project_path are passed through correctly
1752        // (patterns don't use them, but the function accepts them)
1753        let json = r#"{
1754            "memories": [
1755                {
1756                    "category": "technical",
1757                    "content": "Using PostgreSQL database",
1758                    "importance": 70,
1759                    "keywords": ["PostgreSQL"],
1760                    "tags": ["database"]
1761                }
1762            ],
1763            "focus_points": [],
1764            "conversation_patterns": [
1765                {
1766                    "pattern_type": "reference",
1767                    "pattern": "as mentioned",
1768                    "confidence": 0.7
1769                }
1770            ]
1771        }"#;
1772
1773        let result = parse_memory_response(json, Some("test-session"), Some("/test/project")).unwrap();
1774
1775        // Memory should have source_session and project_path
1776        assert_eq!(result.memories[0].source_session, Some("test-session".to_string()));
1777        assert_eq!(result.memories[0].project_path, Some("/test/project".to_string()));
1778
1779        // Pattern should be parsed correctly
1780        assert_eq!(result.conversation_patterns.len(), 1);
1781    }
1782
1783    #[test]
1784    fn test_parse_memory_response_all_pattern_types() {
1785        // Test both supported pattern types
1786        let json = r#"{
1787            "memories": [],
1788            "focus_points": [],
1789            "conversation_patterns": [
1790                {
1791                    "pattern_type": "reference",
1792                    "pattern": "previously discussed",
1793                    "confidence": 0.8
1794                },
1795                {
1796                    "pattern_type": "Reference",
1797                    "pattern": "case insensitive",
1798                    "confidence": 0.7
1799                },
1800                {
1801                    "pattern_type": "CODE",
1802                    "pattern": "function ",
1803                    "confidence": 0.6
1804                },
1805                {
1806                    "pattern_type": "code",
1807                    "pattern": "class ",
1808                    "confidence": 0.5
1809                }
1810            ]
1811        }"#;
1812
1813        let result = parse_memory_response(json, None, None).unwrap();
1814
1815        // All should be parsed (case-insensitive pattern_type)
1816        assert_eq!(result.conversation_patterns.len(), 4);
1817
1818        // Check types
1819        assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
1820        assert_eq!(result.conversation_patterns[1].pattern_type, PatternType::Reference);
1821        assert_eq!(result.conversation_patterns[2].pattern_type, PatternType::Code);
1822        assert_eq!(result.conversation_patterns[3].pattern_type, PatternType::Code);
1823    }
1824
1825    #[test]
1826    fn test_extraction_result_debug_trait() {
1827        // Test that ExtractionResult implements Debug
1828        let result = ExtractionResult {
1829            memories: vec![],
1830            focus_points: vec![],
1831            conversation_patterns: vec![
1832                ConversationPattern::new(
1833                    PatternType::Reference,
1834                    "test",
1835                    PatternSource::Manual,
1836                ),
1837            ],
1838        };
1839
1840        let debug_str = format!("{:?}", result);
1841        assert!(debug_str.contains("ExtractionResult"));
1842        assert!(debug_str.contains("conversation_patterns"));
1843    }
1844
1845    // =========================================================================
1846    // Unified Extraction Tests
1847    // =========================================================================
1848
1849    #[test]
1850    fn test_parse_unified_response_full() {
1851        let json = r#"{
1852            "memories": [
1853                {
1854                    "category": "decision",
1855                    "content": "使用 Rust 作为主要语言",
1856                    "importance": 85,
1857                    "keywords": ["Rust"],
1858                    "tags": ["backend"]
1859                }
1860            ],
1861            "focus_points": [
1862                {
1863                    "topic": "API设计",
1864                    "keywords": ["API", "REST"],
1865                    "entities": ["User", "Order"],
1866                    "core_question": "如何优化 API？",
1867                    "importance": 0.8,
1868                    "is_current": true
1869                }
1870            ],
1871            "conversation_patterns": [
1872                {
1873                    "pattern_type": "reference",
1874                    "pattern": "正如我所说",
1875                    "confidence": 0.8
1876                }
1877            ],
1878            "focus_keywords": {
1879                "transition": ["换个话题"],
1880                "question": ["怎么"],
1881                "task": ["帮我"],
1882                "tech": ["rust"]
1883            }
1884        }"#;
1885
1886        let result = parse_unified_response(json, Some("session-123"), Some("/project")).unwrap();
1887
1888        // Verify all components
1889        assert_eq!(result.memories.len(), 1);
1890        assert_eq!(result.memories[0].category, MemoryCategory::Decision);
1891        assert!(result.memories[0].content.contains("Rust"));
1892
1893        assert_eq!(result.focus_points.len(), 1);
1894        assert_eq!(result.focus_points[0].topic, "API设计");
1895
1896        assert_eq!(result.conversation_patterns.len(), 1);
1897        assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
1898
1899        assert!(!result.focus_keywords.is_empty());
1900        assert_eq!(result.focus_keywords.transition.len(), 1);
1901        assert_eq!(result.focus_keywords.question.len(), 1);
1902        assert_eq!(result.focus_keywords.task.len(), 1);
1903        assert_eq!(result.focus_keywords.tech.len(), 1);
1904    }
1905
1906    #[test]
1907    fn test_parse_unified_response_empty() {
1908        let json = r#"{
1909            "memories": [],
1910            "focus_points": [],
1911            "conversation_patterns": [],
1912            "focus_keywords": {
1913                "transition": [],
1914                "question": [],
1915                "task": [],
1916                "tech": []
1917            }
1918        }"#;
1919
1920        let result = parse_unified_response(json, None, None).unwrap();
1921
1922        assert!(result.memories.is_empty());
1923        assert!(result.focus_points.is_empty());
1924        assert!(result.conversation_patterns.is_empty());
1925        assert!(result.focus_keywords.is_empty());
1926    }
1927
1928    #[test]
1929    fn test_parse_unified_response_partial() {
1930        // Test with only memories (no focus_keywords)
1931        let json = r#"{
1932            "memories": [
1933                {
1934                    "category": "technical",
1935                    "content": "使用 PostgreSQL 作为主数据库存储",
1936                    "importance": 70
1937                }
1938            ]
1939        }"#;
1940
1941        let result = parse_unified_response(json, None, None).unwrap();
1942
1943        assert_eq!(result.memories.len(), 1);
1944        assert!(result.focus_points.is_empty());
1945        assert!(result.conversation_patterns.is_empty());
1946        assert!(result.focus_keywords.is_empty());
1947    }
1948
1949    #[test]
1950    fn test_parse_unified_response_with_code_block() {
1951        let json = r#"```json
1952{
1953    "memories": [],
1954    "focus_points": [],
1955    "conversation_patterns": [],
1956    "focus_keywords": {
1957        "transition": ["switching"],
1958        "question": [],
1959        "task": [],
1960        "tech": []
1961    }
1962}
1963```"#;
1964
1965        let result = parse_unified_response(json, None, None).unwrap();
1966
1967        assert_eq!(result.focus_keywords.transition.len(), 1);
1968        assert_eq!(result.focus_keywords.transition[0], "switching");
1969    }
1970
1971    #[test]
1972    fn test_unified_extraction_result_default() {
1973        let result = UnifiedExtractionResult::default();
1974        assert!(result.memories.is_empty());
1975        assert!(result.focus_points.is_empty());
1976        assert!(result.conversation_patterns.is_empty());
1977        assert!(result.focus_keywords.is_empty());
1978    }
1979
1980    #[test]
1981    fn test_unified_extraction_prompt_contains_all_sections() {
1982        // Verify the unified prompt contains all extraction sections
1983        assert!(UNIFIED_EXTRACTION_PROMPT.contains("长期记忆"));
1984        assert!(UNIFIED_EXTRACTION_PROMPT.contains("当前焦点"));
1985        assert!(UNIFIED_EXTRACTION_PROMPT.contains("对话模式"));
1986        assert!(UNIFIED_EXTRACTION_PROMPT.contains("焦点关键词"));
1987    }
1988
1989    #[test]
1990    fn test_unified_extraction_prompt_contains_keyword_categories() {
1991        assert!(UNIFIED_EXTRACTION_PROMPT.contains("transition"));
1992        assert!(UNIFIED_EXTRACTION_PROMPT.contains("question"));
1993        assert!(UNIFIED_EXTRACTION_PROMPT.contains("task"));
1994        assert!(UNIFIED_EXTRACTION_PROMPT.contains("tech"));
1995    }
1996
1997    #[test]
1998    fn test_parse_unified_response_keywords_merged() {
1999        let json = r#"{
2000            "memories": [],
2001            "focus_points": [],
2002            "conversation_patterns": [],
2003            "focus_keywords": {
2004                "transition": ["换个话题", "switching", "however"],
2005                "question": ["怎么", "how", "为什么"],
2006                "task": ["帮我", "implement", "创建"],
2007                "tech": ["rust", "数据库", "api"]
2008            }
2009        }"#;
2010
2011        let result = parse_unified_response(json, None, None).unwrap();
2012
2013        assert_eq!(result.focus_keywords.transition.len(), 3);
2014        assert_eq!(result.focus_keywords.question.len(), 3);
2015        assert_eq!(result.focus_keywords.task.len(), 3);
2016        assert_eq!(result.focus_keywords.tech.len(), 3);
2017        assert_eq!(result.focus_keywords.total_count(), 12);
2018    }
2019}
matrixcode_core/memory/extractor.rs

matrixcode_core/memory/
extractor.rs