1use crate::truncate::truncate_chars;
4use anyhow::Result;
5use serde::Deserialize;
6
7use super::config::*;
8use super::entry::{MemoryCategory, MemoryEntry};
9use super::manager::AutoMemory;
10use super::conversation_pattern::{ConversationPattern, PatternType, PatternSource};
11use super::unified_extraction::{UnifiedExtractionResult, ExtractedKeywords};
12use crate::compress::FocusPoint;
13
14#[async_trait::async_trait]
20pub trait MemoryExtractor: Send + Sync {
21 async fn extract(
23 &self,
24 text: &str,
25 session_id: Option<&str>,
26 project_path: Option<&str>,
27 ) -> Result<ExtractionResult>;
28
29 fn model_name(&self) -> &str;
31}
32
33#[derive(Debug, Clone)]
35pub struct ExtractionResult {
36 pub memories: Vec<MemoryEntry>,
37 pub focus_points: Vec<FocusPoint>,
38 pub conversation_patterns: Vec<ConversationPattern>,
40}
41
42pub struct AiMemoryExtractor {
44 provider: Box<dyn crate::providers::Provider>,
45 model: String,
46}
47
48impl AiMemoryExtractor {
49 pub fn new(provider: Box<dyn crate::providers::Provider>, model: String) -> Self {
51 Self { provider, model }
52 }
53
54 pub fn new_minimal(model: String) -> Self {
57 Self {
60 provider: crate::create_minimal_provider(&model),
61 model,
62 }
63 }
64}
65
66const MEMORY_EXTRACT_SYSTEM_PROMPT: &str = r#"你是记忆提取助手。从对话中提取值得长期记忆的关键信息。
67
68# 记忆类型
69
70<types>
71<type>
72 <name>decision</name>
73 <description>项目或技术选型的决定</description>
74 <when_to_save>用户明确做出技术决策时</when_to_save>
75 <body_structure>先写决策内容,然后 **Why:** 决策原因,**Context:** 适用场景</body_structure>
76</type>
77<type>
78 <name>preference</name>
79 <description>用户习惯或偏好</description>
80 <when_to_save>用户表达"我喜欢/习惯/偏好"时</when_to_save>
81 <body_structure>先写偏好内容,然后 **Why:** 偏好原因(如有)</body_structure>
82</type>
83<type>
84 <name>solution</name>
85 <description>解决问题的具体方法</description>
86 <when_to_save>问题成功解决且方法可复用时</when_to_save>
87 <body_structure>先写解决方案,然后 **Problem:** 解决的问题,**Key:** 关键步骤</body_structure>
88</type>
89<type>
90 <name>finding</name>
91 <description>重要发现或信息</description>
92 <when_to_save>发现非显而易见的信息时</when_to_save>
93</type>
94<type>
95 <name>technical</name>
96 <description>技术栈或框架信息</description>
97 <when_to_save>确认项目使用的技术时</when_to_save>
98</type>
99<type>
100 <name>structure</name>
101 <description>项目结构信息(重要!)</description>
102 <when_to_save>发现关键模块位置、核心文件路径、代码组织方式时</when_to_save>
103 <body_structure>先写结构描述,然后 **Location:** 具体路径,**Purpose:** 模块职责</body_structure>
104 <example>"上下文压缩模块位于 packages/core/src/compress/。**Location:** packages/core/src/compress/compressor.rs 是核心入口,**Purpose:** 负责上下文 token 优化"</example>
105</type>
106</types>
107
108# 不要保存什么到记忆中
109
110- Git 历史、最近更改 — git log/blame 是权威来源
111- 临时状态:进行中的任务、当前对话上下文
112- 错误信息和调试细节 — 问题解决后无需保留
113- 临时文件路径、临时变量名
114
115# 重要:应该保存的结构信息
116
117项目结构信息(structure 类型)应该保存,包括:
118- 关键模块的位置(如 "compress 模块在 packages/core/src/compress/")
119- 核心文件的功能(如 "agent/streaming.rs 负责流式响应处理")
120- 常见问题的定位路径(如 "上下文大小判断在 compressor.rs 的 estimate_tokens 函数")
121- 代码组织模式(如 "providers 模块实现了 Provider trait")
122
123这些信息能大幅减少未来会话的探索时间!
124
125# 对话模式提取
126
127当对话文本较长时(超过500字符),还要提取对话中使用的模式:
128
1291. **引用模式 (reference)**:用户如何引用之前的内容
130 - 示例:"正如前面所说"、"接着刚才的话题"、"as mentioned"、"previously"
131
1322. **代码模式 (code)**:对话中涉及的代码风格关键词
133 - 示例:语言关键词(fn, function, class)、代码块标记(```)
134
135模式提取规则:
136- 只提取明确出现的模式,不要推测
137- confidence 范围 0.0-1.0,越常见越低(常见模式置信度低)
138- 只在文本 > 500 字符时提取模式
139
140# 输出格式
141
142严格 JSON:
143{
144 "memories": [
145 {
146 "category": "decision",
147 "content": "采用 PostgreSQL 作为主数据库。**Why:** 性能要求和团队经验",
148 "importance": 85,
149 "keywords": ["PostgreSQL", "数据库", "database"],
150 "tags": ["backend", "storage"]
151 }
152 ],
153 "focus_points": [],
154 "conversation_patterns": [
155 {
156 "pattern_type": "reference",
157 "pattern": "正如我所说",
158 "confidence": 0.8
159 },
160 {
161 "pattern_type": "code",
162 "pattern": "fn ",
163 "confidence": 0.6
164 }
165 ]
166}
167
168关键词提取:3-5 个核心关键词(技术名词、项目名、关键概念)
169标签提取:1-3 个分类标签(backend、frontend、config、auth 等)
170
171只返回 JSON,不要其他解释。"#;
172
173#[async_trait::async_trait]
174impl MemoryExtractor for AiMemoryExtractor {
175 async fn extract(
176 &self,
177 text: &str,
178 session_id: Option<&str>,
179 project_path: Option<&str>,
180 ) -> Result<ExtractionResult> {
181 use crate::providers::{ChatRequest, Message, MessageContent, Role};
182
183 let truncated = truncate_chars(text, 4000);
185
186 let request = ChatRequest {
187 messages: vec![Message {
188 role: Role::User,
189 content: MessageContent::Text(format!(
190 "请从以下对话中提取值得记忆的关键信息和当前聚焦点:\n\n{}",
191 truncated
192 )),
193 }],
194 tools: vec![],
195 system: Some(MEMORY_EXTRACT_SYSTEM_PROMPT.to_string()),
196 think: false,
197 max_tokens: 512,
198 server_tools: vec![],
199 enable_caching: false,
200 };
201
202 let response = self.provider.chat(request).await?;
203
204 let response_text = response
205 .content
206 .iter()
207 .filter_map(|b| {
208 if let crate::providers::ContentBlock::Text { text } = b {
209 Some(text.clone())
210 } else {
211 None
212 }
213 })
214 .collect::<Vec<_>>()
215 .join("");
216
217 parse_memory_response(&response_text, session_id, project_path)
218 }
219
220 fn model_name(&self) -> &str {
221 &self.model
222 }
223}
224
225fn parse_memory_response(
226 json_text: &str,
227 session_id: Option<&str>,
228 project_path: Option<&str>,
229) -> Result<ExtractionResult> {
230 let cleaned = json_text
231 .trim()
232 .trim_start_matches("```json")
233 .trim_start_matches("```")
234 .trim_end_matches("```")
235 .trim();
236
237 #[derive(Deserialize)]
238 struct MemoryResponse {
239 memories: Vec<MemoryItem>,
240 #[serde(default)]
241 focus_points: Vec<FocusPointItem>,
242 #[serde(default)]
243 conversation_patterns: Vec<ConversationPatternItem>,
244 }
245
246 #[derive(Deserialize)]
247 struct MemoryItem {
248 category: String,
249 content: String,
250 #[serde(default)]
251 importance: f64,
252 #[serde(default)]
253 keywords: Vec<String>,
254 #[serde(default)]
255 tags: Vec<String>,
256 }
257
258 #[derive(Deserialize)]
259 struct FocusPointItem {
260 topic: String,
261 #[serde(default)]
262 keywords: Vec<String>,
263 #[serde(default)]
264 entities: Vec<String>,
265 #[serde(default)]
266 core_question: Option<String>,
267 #[serde(default = "default_importance")]
268 importance: f32,
269 #[serde(default = "default_is_current")]
270 is_current: bool,
271 }
272
273 #[derive(Deserialize)]
274 struct ConversationPatternItem {
275 pattern_type: String,
276 pattern: String,
277 #[serde(default)]
278 confidence: f32,
279 }
280
281 fn default_importance() -> f32 { 0.7 }
282 fn default_is_current() -> bool { true }
283
284 let parsed: MemoryResponse = serde_json::from_str(cleaned)?;
285
286 let entries = parsed
288 .memories
289 .into_iter()
290 .filter_map(|item| {
291 let category = match item.category.to_lowercase().as_str() {
292 "decision" => MemoryCategory::Decision,
293 "preference" => MemoryCategory::Preference,
294 "solution" => MemoryCategory::Solution,
295 "finding" => MemoryCategory::Finding,
296 "technical" => MemoryCategory::Technical,
297 "structure" => MemoryCategory::Structure,
298 _ => return None,
299 };
300
301 if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
302 return None;
303 }
304
305 let mut entry = MemoryEntry::new(
306 category,
307 item.content,
308 session_id.map(|s| s.to_string()),
309 project_path.map(|p| p.to_string()),
310 );
311 if item.importance > 0.0 {
312 entry.importance = item.importance.clamp(0.0, 100.0);
313 }
314 let valid_keywords: Vec<String> = item.keywords
316 .iter()
317 .filter(|k| k.len() >= 2 && !is_noise_word(k))
318 .cloned()
319 .collect();
320
321 let valid_tags: Vec<String> = item.tags
322 .iter()
323 .filter(|t| t.len() >= 2 && !is_noise_word(t))
324 .cloned()
325 .collect();
326
327 entry.tags.extend(valid_keywords);
328 entry.tags.extend(valid_tags);
329 entry.tags.dedup();
330
331 if entry.tags.len() > 10 {
333 entry.tags.truncate(10);
334 }
335
336 Some(entry)
337 })
338 .collect();
339
340 use chrono::Utc;
342 use crate::compress::FocusStatus;
343
344 let focus_points = parsed
345 .focus_points
346 .into_iter()
347 .map(|item| {
348 let mut focus = FocusPoint::new(
349 format!("focus-{}", Utc::now().timestamp()),
350 item.topic,
351 item.keywords,
352 item.entities,
353 item.core_question,
354 0,
355 );
356 focus.importance = item.importance.clamp(0.0, 1.0);
357 if !item.is_current {
358 focus.status = FocusStatus::Suspended;
359 }
360 focus
361 })
362 .collect();
363
364 let conversation_patterns = parsed
366 .conversation_patterns
367 .into_iter()
368 .filter_map(|item| {
369 let pattern_type = match item.pattern_type.to_lowercase().as_str() {
371 "reference" => PatternType::Reference,
372 "code" => PatternType::Code,
373 _ => return None, };
375
376 if item.pattern.trim().is_empty() {
378 return None;
379 }
380
381 let mut pattern = ConversationPattern::new(
383 pattern_type,
384 item.pattern,
385 PatternSource::UserConversation {
386 example: String::new(), },
388 );
389
390 pattern.confidence = if item.confidence > 0.0 {
392 item.confidence.clamp(0.0, 1.0)
393 } else {
394 0.5
395 };
396
397 Some(pattern)
398 })
399 .collect();
400
401 Ok(ExtractionResult {
402 memories: deduplicate_entries(entries),
403 focus_points,
404 conversation_patterns,
405 })
406}
407
408fn deduplicate_entries(entries: Vec<MemoryEntry>) -> Vec<MemoryEntry> {
409 let mut seen: Vec<String> = Vec::new();
410 entries
411 .into_iter()
412 .filter(|e| {
413 let content_lower = e.content.to_lowercase();
414 if seen.iter().any(|s| {
415 AutoMemory::calculate_similarity(s, &content_lower) >= SIMILARITY_THRESHOLD
416 }) {
417 false
418 } else {
419 seen.push(content_lower);
420 true
421 }
422 })
423 .take(MAX_DETECTED_ENTRIES)
424 .collect()
425}
426
427pub fn detect_memories_fallback(
433 text: &str,
434 session_id: Option<&str>,
435 project_path: Option<&str>,
436) -> Vec<MemoryEntry> {
437 let mut entries = Vec::new();
438 let text_lower = text.to_lowercase();
439
440 let patterns = [
442 (
443 MemoryCategory::Decision,
444 ["决定", "选择", "采用", "定下", "decided", "chose"],
445 ),
446 (
447 MemoryCategory::Preference,
448 ["偏好", "习惯", "喜欢", "首选", "prefer", "like"],
449 ),
450 (
451 MemoryCategory::Solution,
452 ["解决", "修复", "搞定", "改成", "fixed", "solved"],
453 ),
454 (
455 MemoryCategory::Finding,
456 ["发现", "原来", "原因", "定位", "found", "reason"],
457 ),
458 (
459 MemoryCategory::Technical,
460 ["技术栈", "框架", "用的", "基于", "stack", "using"],
461 ),
462 (
463 MemoryCategory::Structure,
464 ["入口", "主文件", "目录", "位于", "entry", "main"],
465 ),
466 ];
467
468 for (category, keywords) in patterns {
469 for keyword in keywords {
470 if text_lower.contains(&keyword.to_lowercase()) {
471 let content = extract_memory_content(text, keyword);
472 if !content.is_empty() && content.len() >= MIN_MEMORY_CONTENT_LENGTH {
473 entries.push(MemoryEntry::new(
474 category,
475 content,
476 session_id.map(|s| s.to_string()),
477 project_path.map(|p| p.to_string()),
478 ));
479 }
480 }
481 }
482 }
483
484 deduplicate_entries(entries)
485}
486
487pub fn detect_memories_from_text(
489 text: &str,
490 session_id: Option<&str>,
491 project_path: Option<&str>,
492) -> Vec<MemoryEntry> {
493 detect_memories_fallback(text, session_id, project_path)
494}
495
496pub async fn detect_memories_smart(
502 text: &str,
503 session_id: Option<&str>,
504 project_path: Option<&str>,
505 extractor: Option<&AiMemoryExtractor>,
506) -> ExtractionResult {
507 let mode = AiDetectionMode::from_env();
508 let text_len = text.len();
509
510 let should_try_ai = mode != AiDetectionMode::Never && extractor.is_some() && text_len > 200;
513
514 let model_name = extractor.map(|e| e.model_name()).unwrap_or("none");
516 crate::debug::debug_log().memory_ai_detection(
517 model_name,
518 0, text_len,
520 should_try_ai,
521 );
522
523 if should_try_ai && let Some(ex) = extractor {
524 if let Ok(result) = ex.extract(text, session_id, project_path).await {
525 crate::debug::debug_log().memory_ai_detection(
528 ex.model_name(),
529 result.memories.len(),
530 text_len,
531 true,
532 );
533 return result;
534 }
535 log::warn!("AI memory extraction failed, trying rule-based fallback for critical memories");
537
538 let critical_memories = detect_critical_memories(text, session_id, project_path);
540
541 crate::debug::debug_log().memory_ai_detection(
542 "rule-fallback",
543 critical_memories.len(),
544 text_len,
545 false,
546 );
547
548 return ExtractionResult {
549 memories: critical_memories,
550 focus_points: vec![],
551 conversation_patterns: vec![],
552 };
553 }
554
555 ExtractionResult {
558 memories: vec![],
559 focus_points: vec![],
560 conversation_patterns: vec![],
561 }
562}
563
564fn detect_critical_memories(
567 text: &str,
568 session_id: Option<&str>,
569 project_path: Option<&str>,
570) -> Vec<MemoryEntry> {
571 let critical_patterns = [
573 (MemoryCategory::Structure, ["位于", "入口", "模块", "packages/", "src/"], 85.0),
575 (MemoryCategory::Technical, ["技术栈", "框架", "基于", "使用", ""], 80.0),
577 (MemoryCategory::Decision, ["决定", "选择", "采用", "", ""], 75.0),
579 ];
580
581 let mut entries = Vec::new();
582 let text_lower = text.to_lowercase();
583
584 for (category, keywords, importance) in critical_patterns {
585 for keyword in keywords {
587 if text_lower.contains(&keyword.to_lowercase()) {
588 let content = extract_memory_content(text, keyword);
589
590 if !content.is_empty() && content.len() >= MIN_MEMORY_CONTENT_LENGTH {
591 let mut entry = MemoryEntry::new(
592 category,
593 content,
594 session_id.map(|s| s.to_string()),
595 project_path.map(|p| p.to_string()),
596 );
597 entry.importance = importance;
599 entries.push(entry);
600 break; }
602 }
603 }
604 }
605
606 deduplicate_entries(entries)
608}
609
610fn extract_memory_content(text: &str, keyword: &str) -> String {
611 let text_lower = text.to_lowercase();
612 let keyword_lower = keyword.to_lowercase();
613
614 let pos = match text_lower.find(&keyword_lower) {
615 Some(p) => p,
616 None => return String::new(),
617 };
618
619 let start = find_sentence_start(text, pos);
621 let end = find_sentence_end(text, pos);
622
623 let sentence = text[start..end].trim();
624
625 let cleaned = clean_memory_content(sentence);
627
628 if cleaned.len() > MAX_MEMORY_CONTENT_LENGTH {
629 truncate_intelligently(&cleaned, MAX_MEMORY_CONTENT_LENGTH)
631 } else {
632 cleaned
633 }
634}
635
636fn find_sentence_start(text: &str, pos: usize) -> usize {
638 let mut start = pos;
640 while start > 0 {
641 let prev_chars: Vec<char> = text.chars().collect();
642 let ch = prev_chars[start - 1];
643 if ch == '.' || ch == '。' || ch == '\n' || ch == '!' || ch == '?' || ch == '!' || ch == '?' {
645 return start;
646 }
647 if start >= 3 {
649 let slice: String = prev_chars[start - 3..start].iter().collect();
650 if slice == "```" {
651 return start - 3;
652 }
653 }
654 start -= 1;
655 }
656 0
657}
658
659fn find_sentence_end(text: &str, pos: usize) -> usize {
661 let chars: Vec<char> = text.chars().collect();
663 let mut end = pos;
664 while end < chars.len() {
665 let ch = chars[end];
666 if ch == '.' || ch == '。' || ch == '\n' || ch == '!' || ch == '?' || ch == '!' || ch == '?' {
668 return end + 1;
669 }
670 if end + 3 <= chars.len() {
672 let slice: String = chars[end..end + 3].iter().collect();
673 if slice == "```" {
674 return end + 3;
675 }
676 }
677 end += 1;
678 }
679 text.len()
680}
681
682fn clean_memory_content(content: &str) -> String {
684 let cleaned = content
686 .replace("**Why:**", "原因:")
687 .replace("**Context:**", "场景:")
688 .replace("**Location:**", "位置:")
689 .replace("**Purpose:**", "功能:")
690 .replace("**Problem:**", "问题:")
691 .replace("**Key:**", "关键:")
692 .replace("**", "")
693 .replace("`", "")
694 .replace("#", "");
695
696 let cleaned = cleaned
698 .split_whitespace()
699 .collect::<Vec<_>>()
700 .join(" ");
701
702 cleaned.trim().to_string()
703}
704
705fn truncate_intelligently(text: &str, max_len: usize) -> String {
707 if text.len() <= max_len {
708 return text.to_string();
709 }
710
711 let parts: Vec<&str> = text.split_whitespace().collect();
717 let mut result = Vec::new();
718 let mut current_len = 0;
719
720 let priority_keywords = ["位置:", "Location:", "功能:", "Purpose:", "packages/", "src/", ".rs", ".ts", ".js", ".py"];
722
723 for &part in &parts {
725 if priority_keywords.iter().any(|k| part.contains(k)) {
726 if current_len + part.len() + 1 <= max_len {
727 result.push(part);
728 current_len += part.len() + 1;
729 }
730 }
731 }
732
733 if result.is_empty() || current_len < max_len / 2 {
735 for &part in &parts {
736 if !result.contains(&part) && current_len + part.len() + 1 <= max_len {
737 result.push(part);
738 current_len += part.len() + 1;
739 }
740 }
741 }
742
743 if result.is_empty() {
744 text.chars().take(max_len).collect()
746 } else {
747 result.join(" ")
748 }
749}
750
751pub fn infer_category_from_content(content: &str) -> MemoryCategory {
753 let lower = content.to_lowercase();
754
755 if lower.contains("决定")
756 || lower.contains("选择")
757 || lower.contains("采用")
758 || lower.contains("decided")
759 {
760 return MemoryCategory::Decision;
761 }
762 if lower.contains("喜欢")
763 || lower.contains("偏好")
764 || lower.contains("习惯")
765 || lower.contains("prefer")
766 {
767 return MemoryCategory::Preference;
768 }
769 if lower.contains("解决")
770 || lower.contains("修复")
771 || lower.contains("搞定")
772 || lower.contains("fixed")
773 {
774 return MemoryCategory::Solution;
775 }
776 if lower.contains("发现")
777 || lower.contains("原因")
778 || lower.contains("原来")
779 || lower.contains("found")
780 {
781 return MemoryCategory::Finding;
782 }
783 if lower.contains("技术")
784 || lower.contains("框架")
785 || lower.contains("库")
786 || lower.contains("tech")
787 {
788 return MemoryCategory::Technical;
789 }
790 if lower.contains("文件")
791 || lower.contains("目录")
792 || lower.contains("入口")
793 || lower.contains("file")
794 {
795 return MemoryCategory::Structure;
796 }
797
798 MemoryCategory::Finding }
800
801const UNIFIED_EXTRACTION_PROMPT: &str = r#"你是信息提取助手。从对话中一次性提取以下信息:
807
808## 1. 长期记忆 (memories) - 最重要!
809- decision: 技术决策(如"决定使用 PostgreSQL"、"采用 React 架构")
810- preference: 用户偏好(如"我喜欢简洁的代码风格"、"习惯用 VS Code")
811- solution: 解决方案(如"通过添加缓存解决了性能问题")
812- finding: 重要发现(如"发现内存泄漏的原因")
813- technical: 技术栈(如"项目使用 Rust + Tokio")
814- structure: **项目结构信息(优先保存!)**(如"compress 模块在 packages/core/src/compress/"、"上下文判断逻辑在 compressor.rs:518")
815
816## 结构信息的重要性
817
818项目结构信息(structure 类型)能大幅减少未来会话的探索时间,必须保存:
819- 关键模块位置:"Agent 循环在 packages/core/src/agent/run.rs"
820- 核心文件功能:"streaming.rs 负责 API 流式响应处理"
821- 问题定位路径:"上下文大小判断在 estimate_tokens 函数(compressor.rs:518-561)"
822- 代码组织模式:"providers 模块实现了 Provider trait"
823
824## 2. 当前焦点 (focus_points)
825- topic: 当前讨论的主题
826- keywords: 相关关键词
827- entities: 涉及的文件/函数/类名
828- core_question: 核心问题(可选)
829
830## 3. 对话模式 (conversation_patterns)
831- reference: 引用模式(如"正如前面所说"、"as mentioned"、"previously")
832- code: 代码模式(如"fn ", "function", "```", "class ")
833
834## 4. 焦点关键词 (focus_keywords)
835- transition: 话题转换词(如"换个话题", "switching", "however", "等等")
836- question: 提问词(如"怎么", "how", "为什么", "why", "请问")
837- task: 任务词(如"帮我", "implement", "创建", "create", "修复")
838- tech: 技术词(如"rust", "数据库", "api", "性能", "优化")
839
840## 输出格式(严格 JSON)
841
842```json
843{
844 "memories": [
845 {
846 "category": "structure",
847 "content": "上下文压缩模块位于 packages/core/src/compress/。**Location:** compressor.rs:518-561 是 estimate_tokens 函数,**Purpose:** 计算上下文 token 数量",
848 "importance": 80,
849 "keywords": ["compress", "estimate_tokens", "context"],
850 "tags": ["core", "context-management"]
851 },
852 {
853 "category": "decision",
854 "content": "采用 PostgreSQL 作为主数据库。**Why:** 性能要求",
855 "importance": 85,
856 "keywords": ["PostgreSQL", "数据库"],
857 "tags": ["backend", "storage"]
858 }
859 ],
860 "focus_points": [
861 {
862 "topic": "API 设计优化",
863 "keywords": ["API", "REST", "性能"],
864 "entities": ["api.rs", "handler"],
865 "core_question": "如何优化 API 响应时间?",
866 "importance": 0.8,
867 "is_current": true
868 }
869 ],
870 "conversation_patterns": [
871 {
872 "pattern_type": "reference",
873 "pattern": "正如我所说",
874 "confidence": 0.8
875 },
876 {
877 "pattern_type": "code",
878 "pattern": "fn ",
879 "confidence": 0.6
880 }
881 ],
882 "focus_keywords": {
883 "transition": ["换个话题", "switching"],
884 "question": ["怎么", "how"],
885 "task": ["帮我", "implement"],
886 "tech": ["rust", "性能"]
887 }
888}
889```
890
891## 规则
8921. structure 类型的记忆优先级最高,发现就保存
8932. 只提取明确出现的信息,不要推测
8943. 如果某类信息没有,返回空数组/对象
8954. importance 范围:memories 0-100,focus_points 0.0-1.0
8965. confidence 范围:0.0-1.0,常见模式置信度较低
8976. 关键词提取 3-5 个核心关键词
8987. 只返回 JSON,不要其他解释"#;
899
900const UNIFIED_EXTRACTION_WITH_FOCUS_PROMPT: &str = r#"你是信息提取和焦点决策助手。从对话中一次性完成以下任务:
903
904## 1. 焦点决策 (focus_decision) - 最重要!
905
906你会收到当前已有的焦点列表。请判断:
907
908### 选择现有焦点
909如果最新对话与某个现有焦点匹配:
910- selected_focus_id: 该焦点的 ID
911- need_new_focus: false
912- confidence: 匹配置信度 (0.0-1.0)
913
914### 创建新焦点
915如果没有任何现有焦点匹配:
916- selected_focus_id: null
917- need_new_focus: true
918- new_focus_topic: 新焦点主题
919- new_core_question: 核心问题
920- confidence: 创建置信度
921
922### 判断话题切换
923- is_topic_switch: 是否从某焦点切换到另一焦点
924- previous_focus_id: 切换前的焦点 ID(如果有)
925
926### 焦点类型 (focus_type)
927- problem_solving: 修复 bug、解决错误
928- task_execution: 实现功能、完成任务
929- knowledge_exploration: 学习、研究、探索
930- decision_making: 技术选型、架构设计
931- code_optimization: 性能优化、重构
932- general: 一般对话
933
934## 2. 长期记忆 (memories)
935- decision: 技术决策
936- preference: 用户偏好
937- solution: 解决方案
938- finding: 重要发现
939- technical: 技术栈
940- structure: 项目结构
941
942## 3. 焦点关键词 (focus_keywords)
943- transition: 话题转换词
944- question: 提问词
945- task: 任务词
946- tech: 技术词
947
948## 输出格式(严格 JSON)
949
950```json
951{
952 "focus_decision": {
953 "selected_focus_id": "focus-1",
954 "need_new_focus": false,
955 "new_focus_topic": null,
956 "new_core_question": null,
957 "confidence": 0.85,
958 "focus_type": "code_optimization",
959 "is_topic_switch": true,
960 "previous_focus_id": "focus-2",
961 "focus_keywords": ["API", "latency", "performance"],
962 "related_entities": ["api.rs", "handle_request()"],
963 "reasoning": "用户从数据库切换到 API 性能话题"
964 },
965 "memories": [...],
966 "focus_keywords": {
967 "transition": ["换个话题"],
968 "question": ["怎么"],
969 "task": ["优化"],
970 "tech": ["api", "性能"]
971 }
972}
973```
974
975## 规则
9761. focus_decision 是最重要的输出,必须仔细判断
9772. 现有焦点列表会随对话文本一起提供
9783. 如果现有焦点都不匹配,必须标记 need_new_focus=true
9794. confidence 反映你对决策的确信程度
9805. 只返回 JSON,不要其他解释"#;
981
982pub struct UnifiedExtractor {
987 provider: Box<dyn crate::providers::Provider>,
988 model: String,
989}
990
991impl UnifiedExtractor {
992 pub fn new(provider: Box<dyn crate::providers::Provider>, model: String) -> Self {
994 Self { provider, model }
995 }
996
997 pub fn new_minimal(model: String) -> Self {
999 Self {
1000 provider: crate::create_minimal_provider(&model),
1001 model,
1002 }
1003 }
1004
1005 pub async fn extract_unified(
1007 &self,
1008 text: &str,
1009 session_id: Option<&str>,
1010 project_path: Option<&str>,
1011 ) -> Result<UnifiedExtractionResult> {
1012 use crate::providers::{ChatRequest, Message, MessageContent, Role};
1013
1014 let truncated = truncate_chars(text, 4000);
1016
1017 let request = ChatRequest {
1018 messages: vec![Message {
1019 role: Role::User,
1020 content: MessageContent::Text(format!(
1021 "请从以下对话中提取所有信息:\n\n{}",
1022 truncated
1023 )),
1024 }],
1025 tools: vec![],
1026 system: Some(UNIFIED_EXTRACTION_PROMPT.to_string()),
1027 think: false,
1028 max_tokens: 1024, server_tools: vec![],
1030 enable_caching: false,
1031 };
1032
1033 let response = self.provider.chat(request).await?;
1034
1035 let response_text = response
1036 .content
1037 .iter()
1038 .filter_map(|b| {
1039 if let crate::providers::ContentBlock::Text { text } = b {
1040 Some(text.clone())
1041 } else {
1042 None
1043 }
1044 })
1045 .collect::<Vec<_>>()
1046 .join("");
1047
1048 parse_unified_response(&response_text, session_id, project_path)
1049 }
1050
1051 pub async fn extract_unified_with_foci(
1065 &self,
1066 text: &str,
1067 existing_foci: &[(&str, &str, &[String])], session_id: Option<&str>,
1069 project_path: Option<&str>,
1070 ) -> Result<UnifiedExtractionResult> {
1071 use crate::providers::{ChatRequest, Message, MessageContent, Role};
1072
1073 let truncated = truncate_chars(text, 4000);
1075
1076 let foci_text = if existing_foci.is_empty() {
1078 "(当前没有现有焦点)".to_string()
1079 } else {
1080 let mut foci_list = Vec::new();
1081 for (id, topic, keywords) in existing_foci {
1082 foci_list.push(format!(
1083 "- ID: {}\n 主题: {}\n 关键词: {}",
1084 id,
1085 topic,
1086 keywords.join(", ")
1087 ));
1088 }
1089 format!("现有焦点列表:\n{}", foci_list.join("\n"))
1090 };
1091
1092 let user_prompt = format!(
1093 "{}\n\n最新对话:\n{}\n\n请判断最新对话与现有焦点的匹配关系,并做出焦点决策。",
1094 foci_text,
1095 truncated
1096 );
1097
1098 let request = ChatRequest {
1099 messages: vec![Message {
1100 role: Role::User,
1101 content: MessageContent::Text(user_prompt),
1102 }],
1103 tools: vec![],
1104 system: Some(UNIFIED_EXTRACTION_WITH_FOCUS_PROMPT.to_string()),
1105 think: false,
1106 max_tokens: 1024,
1107 server_tools: vec![],
1108 enable_caching: false,
1109 };
1110
1111 let response = self.provider.chat(request).await?;
1112
1113 let response_text = response
1114 .content
1115 .iter()
1116 .filter_map(|b| {
1117 if let crate::providers::ContentBlock::Text { text } = b {
1118 Some(text.clone())
1119 } else {
1120 None
1121 }
1122 })
1123 .collect::<Vec<_>>()
1124 .join("");
1125
1126 parse_unified_response_with_focus(&response_text, session_id, project_path)
1127 }
1128
1129 pub fn model_name(&self) -> &str {
1131 &self.model
1132 }
1133}
1134
1135fn parse_unified_response(
1137 json_text: &str,
1138 session_id: Option<&str>,
1139 project_path: Option<&str>,
1140) -> Result<UnifiedExtractionResult> {
1141 let cleaned = json_text
1142 .trim()
1143 .trim_start_matches("```json")
1144 .trim_start_matches("```")
1145 .trim_end_matches("```")
1146 .trim();
1147
1148 #[derive(Deserialize)]
1149 struct UnifiedResponse {
1150 #[serde(default)]
1151 memories: Vec<MemoryItem>,
1152 #[serde(default)]
1153 focus_points: Vec<FocusPointItem>,
1154 #[serde(default)]
1155 conversation_patterns: Vec<ConversationPatternItem>,
1156 #[serde(default)]
1157 focus_keywords: FocusKeywordsItem,
1158 }
1159
1160 #[derive(Deserialize, Default)]
1161 struct FocusKeywordsItem {
1162 #[serde(default)]
1163 transition: Vec<String>,
1164 #[serde(default)]
1165 question: Vec<String>,
1166 #[serde(default)]
1167 task: Vec<String>,
1168 #[serde(default)]
1169 tech: Vec<String>,
1170 }
1171
1172 #[derive(Deserialize)]
1173 struct MemoryItem {
1174 category: String,
1175 content: String,
1176 #[serde(default)]
1177 importance: f64,
1178 #[serde(default)]
1179 keywords: Vec<String>,
1180 #[serde(default)]
1181 tags: Vec<String>,
1182 }
1183
1184 #[derive(Deserialize)]
1185 struct FocusPointItem {
1186 topic: String,
1187 #[serde(default)]
1188 keywords: Vec<String>,
1189 #[serde(default)]
1190 entities: Vec<String>,
1191 #[serde(default)]
1192 core_question: Option<String>,
1193 #[serde(default = "default_importance")]
1194 importance: f32,
1195 #[serde(default = "default_is_current")]
1196 is_current: bool,
1197 }
1198
1199 #[derive(Deserialize)]
1200 struct ConversationPatternItem {
1201 pattern_type: String,
1202 pattern: String,
1203 #[serde(default)]
1204 confidence: f32,
1205 }
1206
1207 fn default_importance() -> f32 { 0.7 }
1208 fn default_is_current() -> bool { true }
1209
1210 let parsed: UnifiedResponse = serde_json::from_str(cleaned)?;
1211
1212 let entries = parsed
1214 .memories
1215 .into_iter()
1216 .filter_map(|item| {
1217 let category = match item.category.to_lowercase().as_str() {
1218 "decision" => MemoryCategory::Decision,
1219 "preference" => MemoryCategory::Preference,
1220 "solution" => MemoryCategory::Solution,
1221 "finding" => MemoryCategory::Finding,
1222 "technical" => MemoryCategory::Technical,
1223 "structure" => MemoryCategory::Structure,
1224 _ => return None,
1225 };
1226
1227 if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
1228 return None;
1229 }
1230
1231 let mut entry = MemoryEntry::new(
1232 category,
1233 item.content,
1234 session_id.map(|s| s.to_string()),
1235 project_path.map(|p| p.to_string()),
1236 );
1237 if item.importance > 0.0 {
1238 entry.importance = item.importance.clamp(0.0, 100.0);
1239 }
1240 if !item.keywords.is_empty() {
1241 entry.tags.extend(item.keywords);
1242 }
1243 if !item.tags.is_empty() {
1244 entry.tags.extend(item.tags);
1245 }
1246 entry.tags.dedup();
1247
1248 Some(entry)
1249 })
1250 .collect();
1251
1252 use chrono::Utc;
1254 use crate::compress::FocusStatus;
1255
1256 let focus_points = parsed
1257 .focus_points
1258 .into_iter()
1259 .map(|item| {
1260 let mut focus = FocusPoint::new(
1261 format!("focus-{}", Utc::now().timestamp()),
1262 item.topic,
1263 item.keywords,
1264 item.entities,
1265 item.core_question,
1266 0,
1267 );
1268 focus.importance = item.importance.clamp(0.0, 1.0);
1269 if !item.is_current {
1270 focus.status = FocusStatus::Suspended;
1271 }
1272 focus
1273 })
1274 .collect();
1275
1276 let conversation_patterns = parsed
1278 .conversation_patterns
1279 .into_iter()
1280 .filter_map(|item| {
1281 let pattern_type = match item.pattern_type.to_lowercase().as_str() {
1282 "reference" => PatternType::Reference,
1283 "code" => PatternType::Code,
1284 _ => return None,
1285 };
1286
1287 if item.pattern.trim().is_empty() {
1288 return None;
1289 }
1290
1291 let mut pattern = ConversationPattern::new(
1292 pattern_type,
1293 item.pattern,
1294 PatternSource::UserConversation {
1295 example: String::new(),
1296 },
1297 );
1298
1299 pattern.confidence = if item.confidence > 0.0 {
1300 item.confidence.clamp(0.0, 1.0)
1301 } else {
1302 0.5
1303 };
1304
1305 Some(pattern)
1306 })
1307 .collect();
1308
1309 let focus_keywords = ExtractedKeywords {
1311 transition: parsed.focus_keywords.transition,
1312 question: parsed.focus_keywords.question,
1313 task: parsed.focus_keywords.task,
1314 tech: parsed.focus_keywords.tech,
1315 };
1316
1317 Ok(UnifiedExtractionResult {
1318 memories: deduplicate_entries(entries),
1319 focus_points,
1320 conversation_patterns,
1321 focus_keywords,
1322 focus_decision: None, })
1324}
1325
1326fn parse_unified_response_with_focus(
1328 json_text: &str,
1329 session_id: Option<&str>,
1330 project_path: Option<&str>,
1331) -> Result<UnifiedExtractionResult> {
1332 let cleaned = json_text
1333 .trim()
1334 .trim_start_matches("```json")
1335 .trim_start_matches("```")
1336 .trim_end_matches("```")
1337 .trim();
1338
1339 #[derive(Deserialize)]
1340 struct UnifiedResponseWithFocus {
1341 #[serde(default)]
1342 focus_decision: Option<FocusDecisionItem>,
1343 #[serde(default)]
1344 memories: Vec<MemoryItem>,
1345 #[serde(default)]
1346 focus_keywords: FocusKeywordsItem,
1347 }
1348
1349 #[derive(Deserialize)]
1350 struct FocusDecisionItem {
1351 #[serde(default)]
1352 selected_focus_id: Option<String>,
1353 #[serde(default)]
1354 need_new_focus: bool,
1355 #[serde(default)]
1356 new_focus_topic: Option<String>,
1357 #[serde(default)]
1358 new_core_question: Option<String>,
1359 #[serde(default)]
1360 confidence: f32,
1361 #[serde(default)]
1362 focus_type: String,
1363 #[serde(default)]
1364 is_topic_switch: bool,
1365 #[serde(default)]
1366 previous_focus_id: Option<String>,
1367 #[serde(default)]
1368 focus_keywords: Vec<String>,
1369 #[serde(default)]
1370 related_entities: Vec<String>,
1371 #[serde(default)]
1372 reasoning: String,
1373 }
1374
1375 #[derive(Deserialize, Default)]
1376 struct FocusKeywordsItem {
1377 #[serde(default)]
1378 transition: Vec<String>,
1379 #[serde(default)]
1380 question: Vec<String>,
1381 #[serde(default)]
1382 task: Vec<String>,
1383 #[serde(default)]
1384 tech: Vec<String>,
1385 }
1386
1387 #[derive(Deserialize)]
1388 struct MemoryItem {
1389 category: String,
1390 content: String,
1391 #[serde(default)]
1392 importance: f64,
1393 #[serde(default)]
1394 keywords: Vec<String>,
1395 #[serde(default)]
1396 tags: Vec<String>,
1397 }
1398
1399 let parsed: UnifiedResponseWithFocus = serde_json::from_str(cleaned)?;
1400
1401 let focus_decision = parsed.focus_decision.map(|item| {
1403 use super::unified_extraction::{FocusDecision, FocusType};
1404
1405 let focus_type = match item.focus_type.to_lowercase().as_str() {
1406 "problem_solving" => FocusType::ProblemSolving,
1407 "task_execution" => FocusType::TaskExecution,
1408 "knowledge_exploration" => FocusType::KnowledgeExploration,
1409 "decision_making" => FocusType::DecisionMaking,
1410 "code_optimization" => FocusType::CodeOptimization,
1411 _ => FocusType::General,
1412 };
1413
1414 FocusDecision {
1415 selected_focus_id: item.selected_focus_id,
1416 need_new_focus: item.need_new_focus,
1417 new_focus_topic: item.new_focus_topic,
1418 new_core_question: item.new_core_question,
1419 confidence: item.confidence.clamp(0.0, 1.0),
1420 focus_type,
1421 is_topic_switch: item.is_topic_switch,
1422 previous_focus_id: item.previous_focus_id,
1423 focus_keywords: item.focus_keywords,
1424 related_entities: item.related_entities,
1425 reasoning: item.reasoning,
1426 }
1427 });
1428
1429 let entries = parsed
1431 .memories
1432 .into_iter()
1433 .filter_map(|item| {
1434 let category = match item.category.to_lowercase().as_str() {
1435 "decision" => MemoryCategory::Decision,
1436 "preference" => MemoryCategory::Preference,
1437 "solution" => MemoryCategory::Solution,
1438 "finding" => MemoryCategory::Finding,
1439 "technical" => MemoryCategory::Technical,
1440 "structure" => MemoryCategory::Structure,
1441 _ => return None,
1442 };
1443
1444 if item.content.len() < MIN_MEMORY_CONTENT_LENGTH {
1445 return None;
1446 }
1447
1448 let mut entry = MemoryEntry::new(
1449 category,
1450 item.content,
1451 session_id.map(|s| s.to_string()),
1452 project_path.map(|p| p.to_string()),
1453 );
1454 if item.importance > 0.0 {
1455 entry.importance = item.importance.clamp(0.0, 100.0);
1456 }
1457 if !item.keywords.is_empty() {
1458 entry.tags.extend(item.keywords);
1459 }
1460 if !item.tags.is_empty() {
1461 entry.tags.extend(item.tags);
1462 }
1463 entry.tags.dedup();
1464
1465 Some(entry)
1466 })
1467 .collect();
1468
1469 let focus_keywords = ExtractedKeywords {
1471 transition: parsed.focus_keywords.transition,
1472 question: parsed.focus_keywords.question,
1473 task: parsed.focus_keywords.task,
1474 tech: parsed.focus_keywords.tech,
1475 };
1476
1477 Ok(UnifiedExtractionResult {
1478 memories: deduplicate_entries(entries),
1479 focus_points: Vec::new(), conversation_patterns: Vec::new(), focus_keywords,
1482 focus_decision,
1483 })
1484}
1485
1486pub async fn detect_unified_smart(
1490 text: &str,
1491 session_id: Option<&str>,
1492 project_path: Option<&str>,
1493 extractor: Option<&UnifiedExtractor>,
1494) -> UnifiedExtractionResult {
1495 let mode = AiDetectionMode::from_env();
1496 let text_len = text.len();
1497
1498 let should_try_ai = mode != AiDetectionMode::Never && extractor.is_some() && text_len > 200;
1500
1501 if should_try_ai && let Some(ex) = extractor {
1502 if let Ok(result) = ex.extract_unified(text, session_id, project_path).await {
1503 return result;
1504 }
1505 log::warn!("Unified extraction failed, skipping detection for this turn");
1507 }
1508
1509 UnifiedExtractionResult::default()
1511}
1512
1513fn is_noise_word(word: &str) -> bool {
1515 let noise_words = [
1516 "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
1518 "have", "has", "had", "do", "does", "did", "will", "would", "could",
1519 "should", "may", "might", "must", "shall", "can", "need", "dare",
1520 "ought", "used", "to", "of", "in", "for", "on", "with", "at", "by",
1521 "from", "as", "into", "through", "during", "before", "after",
1522 "above", "below", "between", "under", "again", "further", "then",
1523 "once", "here", "there", "when", "where", "why", "how", "all", "each",
1524 "few", "more", "most", "other", "some", "such", "no", "nor", "not",
1525 "only", "own", "same", "so", "than", "too", "very", "just", "and",
1526 "but", "if", "or", "because", "until", "while", "although", "though",
1527 "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", "都",
1529 "一", "个", "也", "很", "要", "这", "那", "他", "她", "它", "们",
1530 "为", "与", "以", "及", "或", "但", "如", "而", "因", "所", "能",
1531 "会", "可", "把", "被", "让", "给", "从", "到", "对", "向", "比",
1532 "等", "时", "地", "得", "着", "过", "来", "去", "上", "下", "里",
1533 "中", "外", "前", "后", "左", "右", "好", "多", "少", "大", "小",
1534 "高", "低", "长", "短", "快", "慢", "新", "旧", "早", "晚", "真",
1535 "假", "全", "每", "各", "哪", "什么", "怎么", "怎样", "如何",
1536 "为什么", "因为", "所以", "如果", "虽然", "但是", "然后", "接着",
1537 "最后", "开始", "结束", "一直", "总是", "有时", "常常", "经常",
1538 ];
1539
1540 noise_words.contains(&word.to_lowercase().as_str())
1541}
1542
1543#[cfg(test)]
1544mod tests {
1545 use super::*;
1546
1547 #[test]
1552 fn test_parse_memory_response_with_patterns() {
1553 let json = r#"{
1554 "memories": [],
1555 "focus_points": [],
1556 "conversation_patterns": [
1557 {
1558 "pattern_type": "reference",
1559 "pattern": "正如我所说",
1560 "confidence": 0.8
1561 },
1562 {
1563 "pattern_type": "code",
1564 "pattern": "fn ",
1565 "confidence": 0.6
1566 }
1567 ]
1568 }"#;
1569
1570 let result = parse_memory_response(json, None, None).unwrap();
1571 assert_eq!(result.memories.len(), 0);
1572 assert_eq!(result.focus_points.len(), 0);
1573 assert_eq!(result.conversation_patterns.len(), 2);
1574
1575 let ref_pattern = &result.conversation_patterns[0];
1577 assert_eq!(ref_pattern.pattern_type, PatternType::Reference);
1578 assert_eq!(ref_pattern.pattern, "正如我所说");
1579 assert_eq!(ref_pattern.confidence, 0.8);
1580 assert!(ref_pattern.is_active);
1581
1582 let code_pattern = &result.conversation_patterns[1];
1584 assert_eq!(code_pattern.pattern_type, PatternType::Code);
1585 assert_eq!(code_pattern.pattern, "fn ");
1586 assert_eq!(code_pattern.confidence, 0.6);
1587 }
1588
1589 #[test]
1590 fn test_parse_memory_response_patterns_default_confidence() {
1591 let json = r#"{
1592 "memories": [],
1593 "focus_points": [],
1594 "conversation_patterns": [
1595 {
1596 "pattern_type": "reference",
1597 "pattern": "as mentioned"
1598 }
1599 ]
1600 }"#;
1601
1602 let result = parse_memory_response(json, None, None).unwrap();
1603 assert_eq!(result.conversation_patterns.len(), 1);
1604
1605 let pattern = &result.conversation_patterns[0];
1607 assert_eq!(pattern.confidence, 0.5);
1608 }
1609
1610 #[test]
1611 fn test_parse_memory_response_patterns_empty() {
1612 let json = r#"{
1613 "memories": [],
1614 "focus_points": []
1615 }"#;
1616
1617 let result = parse_memory_response(json, None, None).unwrap();
1618 assert_eq!(result.conversation_patterns.len(), 0);
1619 }
1620
1621 #[test]
1622 fn test_parse_memory_response_patterns_invalid_type() {
1623 let json = r#"{
1624 "memories": [],
1625 "focus_points": [],
1626 "conversation_patterns": [
1627 {
1628 "pattern_type": "invalid_type",
1629 "pattern": "test",
1630 "confidence": 0.5
1631 },
1632 {
1633 "pattern_type": "reference",
1634 "pattern": "valid pattern",
1635 "confidence": 0.7
1636 }
1637 ]
1638 }"#;
1639
1640 let result = parse_memory_response(json, None, None).unwrap();
1641 assert_eq!(result.conversation_patterns.len(), 1);
1643 assert_eq!(result.conversation_patterns[0].pattern, "valid pattern");
1644 }
1645
1646 #[test]
1647 fn test_parse_memory_response_patterns_empty_string() {
1648 let json = r#"{
1649 "memories": [],
1650 "focus_points": [],
1651 "conversation_patterns": [
1652 {
1653 "pattern_type": "reference",
1654 "pattern": "",
1655 "confidence": 0.5
1656 },
1657 {
1658 "pattern_type": "code",
1659 "pattern": " ",
1660 "confidence": 0.5
1661 },
1662 {
1663 "pattern_type": "reference",
1664 "pattern": "valid",
1665 "confidence": 0.8
1666 }
1667 ]
1668 }"#;
1669
1670 let result = parse_memory_response(json, None, None).unwrap();
1671 assert_eq!(result.conversation_patterns.len(), 1);
1673 assert_eq!(result.conversation_patterns[0].pattern, "valid");
1674 }
1675
1676 #[test]
1677 fn test_parse_memory_response_patterns_confidence_clamped() {
1678 let json = r#"{
1679 "memories": [],
1680 "focus_points": [],
1681 "conversation_patterns": [
1682 {
1683 "pattern_type": "reference",
1684 "pattern": "test1",
1685 "confidence": 1.5
1686 },
1687 {
1688 "pattern_type": "code",
1689 "pattern": "test2",
1690 "confidence": -0.3
1691 }
1692 ]
1693 }"#;
1694
1695 let result = parse_memory_response(json, None, None).unwrap();
1696 assert_eq!(result.conversation_patterns.len(), 2);
1697
1698 assert_eq!(result.conversation_patterns[0].confidence, 1.0);
1700 assert_eq!(result.conversation_patterns[1].confidence, 0.5);
1702 }
1703
1704 #[test]
1705 fn test_parse_memory_response_patterns_source() {
1706 let json = r#"{
1707 "memories": [],
1708 "focus_points": [],
1709 "conversation_patterns": [
1710 {
1711 "pattern_type": "reference",
1712 "pattern": "PR #123",
1713 "confidence": 0.9
1714 }
1715 ]
1716 }"#;
1717
1718 let result = parse_memory_response(json, None, None).unwrap();
1719 let pattern = &result.conversation_patterns[0];
1720
1721 match &pattern.source {
1723 PatternSource::UserConversation { example } => {
1724 assert_eq!(example, "");
1725 }
1726 _ => panic!("Expected UserConversation source"),
1727 }
1728 }
1729
1730 #[test]
1731 fn test_parse_memory_response_backward_compatible() {
1732 let json = r#"{
1734 "memories": [
1735 {
1736 "category": "decision",
1737 "content": "使用 Rust 作为主要语言",
1738 "importance": 80,
1739 "keywords": ["Rust"],
1740 "tags": ["backend"]
1741 }
1742 ],
1743 "focus_points": [
1744 {
1745 "topic": "API设计",
1746 "keywords": ["API", "REST"],
1747 "importance": 0.8
1748 }
1749 ]
1750 }"#;
1751
1752 let result = parse_memory_response(json, None, None).unwrap();
1753 assert_eq!(result.memories.len(), 1);
1754 assert_eq!(result.focus_points.len(), 1);
1755 assert_eq!(result.conversation_patterns.len(), 0);
1756
1757 assert_eq!(result.memories[0].category, MemoryCategory::Decision);
1759 assert!(result.memories[0].content.contains("Rust"));
1760 }
1761
1762 #[test]
1763 fn test_parse_memory_response_with_code_block_markers() {
1764 let json = r#"```json
1766{
1767 "memories": [],
1768 "focus_points": [],
1769 "conversation_patterns": [
1770 {
1771 "pattern_type": "code",
1772 "pattern": "```",
1773 "confidence": 0.7
1774 }
1775 ]
1776}
1777```"#;
1778
1779 let result = parse_memory_response(json, None, None).unwrap();
1780 assert_eq!(result.conversation_patterns.len(), 1);
1781 assert_eq!(result.conversation_patterns[0].pattern, "```");
1782 }
1783
1784 #[test]
1789 fn test_extraction_result_has_patterns_field() {
1790 let result = ExtractionResult {
1791 memories: vec![],
1792 focus_points: vec![],
1793 conversation_patterns: vec![
1794 ConversationPattern::new(
1795 PatternType::Reference,
1796 "test pattern",
1797 PatternSource::Manual,
1798 ),
1799 ],
1800 };
1801
1802 assert_eq!(result.conversation_patterns.len(), 1);
1803 }
1804
1805 #[test]
1806 fn test_extraction_result_clone() {
1807 let result = ExtractionResult {
1808 memories: vec![],
1809 focus_points: vec![],
1810 conversation_patterns: vec![
1811 ConversationPattern::new(
1812 PatternType::Code,
1813 "fn test()",
1814 PatternSource::Manual,
1815 ),
1816 ],
1817 };
1818
1819 let cloned = result.clone();
1820 assert_eq!(cloned.conversation_patterns.len(), 1);
1821 assert_eq!(cloned.conversation_patterns[0].pattern, "fn test()");
1822 }
1823
1824 #[test]
1825 fn test_extraction_result_empty_patterns() {
1826 let result = ExtractionResult {
1828 memories: vec![],
1829 focus_points: vec![],
1830 conversation_patterns: vec![],
1831 };
1832
1833 assert!(result.conversation_patterns.is_empty());
1834 assert!(result.memories.is_empty());
1835 assert!(result.focus_points.is_empty());
1836 }
1837
1838 #[test]
1843 fn test_memory_extract_prompt_contains_patterns_guidance() {
1844 assert!(
1846 MEMORY_EXTRACT_SYSTEM_PROMPT.contains("对话模式提取"),
1847 "Prompt should contain pattern extraction guidance"
1848 );
1849 assert!(
1850 MEMORY_EXTRACT_SYSTEM_PROMPT.contains("reference"),
1851 "Prompt should mention reference pattern type"
1852 );
1853 assert!(
1854 MEMORY_EXTRACT_SYSTEM_PROMPT.contains("code"),
1855 "Prompt should mention code pattern type"
1856 );
1857 }
1858
1859 #[test]
1860 fn test_memory_extract_prompt_contains_trigger_condition() {
1861 assert!(
1863 MEMORY_EXTRACT_SYSTEM_PROMPT.contains("500"),
1864 "Prompt should mention 500 chars trigger condition"
1865 );
1866 assert!(
1867 MEMORY_EXTRACT_SYSTEM_PROMPT.contains("> 500") || MEMORY_EXTRACT_SYSTEM_PROMPT.contains("超过500"),
1868 "Prompt should specify > 500 chars condition"
1869 );
1870 }
1871
1872 #[test]
1873 fn test_memory_extract_prompt_contains_output_format() {
1874 assert!(
1876 MEMORY_EXTRACT_SYSTEM_PROMPT.contains("conversation_patterns"),
1877 "Prompt should show conversation_patterns in output format"
1878 );
1879 assert!(
1880 MEMORY_EXTRACT_SYSTEM_PROMPT.contains("pattern_type"),
1881 "Prompt should show pattern_type field"
1882 );
1883 assert!(
1884 MEMORY_EXTRACT_SYSTEM_PROMPT.contains("confidence"),
1885 "Prompt should show confidence field"
1886 );
1887 }
1888
1889 #[test]
1894 fn test_parse_memory_response_full_integration() {
1895 let json = r#"{
1897 "memories": [
1898 {
1899 "category": "decision",
1900 "content": "使用 Rust 作为主要语言。**Why:** 性能要求",
1901 "importance": 85,
1902 "keywords": ["Rust"],
1903 "tags": ["backend"]
1904 }
1905 ],
1906 "focus_points": [
1907 {
1908 "topic": "API设计",
1909 "keywords": ["API", "REST"],
1910 "entities": ["User", "Order"],
1911 "importance": 0.8
1912 }
1913 ],
1914 "conversation_patterns": [
1915 {
1916 "pattern_type": "reference",
1917 "pattern": "正如我所说",
1918 "confidence": 0.9
1919 },
1920 {
1921 "pattern_type": "code",
1922 "pattern": "fn ",
1923 "confidence": 0.7
1924 }
1925 ]
1926 }"#;
1927
1928 let result = parse_memory_response(json, Some("session-123"), Some("/project/path")).unwrap();
1929
1930 assert_eq!(result.memories.len(), 1);
1932 assert_eq!(result.focus_points.len(), 1);
1933 assert_eq!(result.conversation_patterns.len(), 2);
1934
1935 assert_eq!(result.memories[0].category, MemoryCategory::Decision);
1937 assert!(result.memories[0].content.contains("Rust"));
1938
1939 assert_eq!(result.focus_points[0].topic, "API设计");
1941
1942 assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
1944 assert_eq!(result.conversation_patterns[1].pattern_type, PatternType::Code);
1945 }
1946
1947 #[test]
1948 fn test_parse_memory_response_mixed_valid_invalid_patterns() {
1949 let json = r#"{
1951 "memories": [],
1952 "focus_points": [],
1953 "conversation_patterns": [
1954 {
1955 "pattern_type": "reference",
1956 "pattern": "valid pattern 1",
1957 "confidence": 0.8
1958 },
1959 {
1960 "pattern_type": "unknown_type",
1961 "pattern": "should be skipped",
1962 "confidence": 0.5
1963 },
1964 {
1965 "pattern_type": "code",
1966 "pattern": "fn valid",
1967 "confidence": 0.6
1968 },
1969 {
1970 "pattern_type": "reference",
1971 "pattern": "",
1972 "confidence": 0.9
1973 }
1974 ]
1975 }"#;
1976
1977 let result = parse_memory_response(json, None, None).unwrap();
1978
1979 assert_eq!(result.conversation_patterns.len(), 2);
1981 assert_eq!(result.conversation_patterns[0].pattern, "valid pattern 1");
1982 assert_eq!(result.conversation_patterns[1].pattern, "fn valid");
1983 }
1984
1985 #[test]
1986 fn test_parse_memory_response_patterns_with_session_and_project() {
1987 let json = r#"{
1990 "memories": [
1991 {
1992 "category": "technical",
1993 "content": "Using PostgreSQL database",
1994 "importance": 70,
1995 "keywords": ["PostgreSQL"],
1996 "tags": ["database"]
1997 }
1998 ],
1999 "focus_points": [],
2000 "conversation_patterns": [
2001 {
2002 "pattern_type": "reference",
2003 "pattern": "as mentioned",
2004 "confidence": 0.7
2005 }
2006 ]
2007 }"#;
2008
2009 let result = parse_memory_response(json, Some("test-session"), Some("/test/project")).unwrap();
2010
2011 assert_eq!(result.memories[0].source_session, Some("test-session".to_string()));
2013 assert_eq!(result.memories[0].project_path, Some("/test/project".to_string()));
2014
2015 assert_eq!(result.conversation_patterns.len(), 1);
2017 }
2018
2019 #[test]
2020 fn test_parse_memory_response_all_pattern_types() {
2021 let json = r#"{
2023 "memories": [],
2024 "focus_points": [],
2025 "conversation_patterns": [
2026 {
2027 "pattern_type": "reference",
2028 "pattern": "previously discussed",
2029 "confidence": 0.8
2030 },
2031 {
2032 "pattern_type": "Reference",
2033 "pattern": "case insensitive",
2034 "confidence": 0.7
2035 },
2036 {
2037 "pattern_type": "CODE",
2038 "pattern": "function ",
2039 "confidence": 0.6
2040 },
2041 {
2042 "pattern_type": "code",
2043 "pattern": "class ",
2044 "confidence": 0.5
2045 }
2046 ]
2047 }"#;
2048
2049 let result = parse_memory_response(json, None, None).unwrap();
2050
2051 assert_eq!(result.conversation_patterns.len(), 4);
2053
2054 assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
2056 assert_eq!(result.conversation_patterns[1].pattern_type, PatternType::Reference);
2057 assert_eq!(result.conversation_patterns[2].pattern_type, PatternType::Code);
2058 assert_eq!(result.conversation_patterns[3].pattern_type, PatternType::Code);
2059 }
2060
2061 #[test]
2062 fn test_extraction_result_debug_trait() {
2063 let result = ExtractionResult {
2065 memories: vec![],
2066 focus_points: vec![],
2067 conversation_patterns: vec![
2068 ConversationPattern::new(
2069 PatternType::Reference,
2070 "test",
2071 PatternSource::Manual,
2072 ),
2073 ],
2074 };
2075
2076 let debug_str = format!("{:?}", result);
2077 assert!(debug_str.contains("ExtractionResult"));
2078 assert!(debug_str.contains("conversation_patterns"));
2079 }
2080
2081 #[test]
2086 fn test_parse_unified_response_full() {
2087 let json = r#"{
2088 "memories": [
2089 {
2090 "category": "decision",
2091 "content": "使用 Rust 作为主要语言",
2092 "importance": 85,
2093 "keywords": ["Rust"],
2094 "tags": ["backend"]
2095 }
2096 ],
2097 "focus_points": [
2098 {
2099 "topic": "API设计",
2100 "keywords": ["API", "REST"],
2101 "entities": ["User", "Order"],
2102 "core_question": "如何优化 API?",
2103 "importance": 0.8,
2104 "is_current": true
2105 }
2106 ],
2107 "conversation_patterns": [
2108 {
2109 "pattern_type": "reference",
2110 "pattern": "正如我所说",
2111 "confidence": 0.8
2112 }
2113 ],
2114 "focus_keywords": {
2115 "transition": ["换个话题"],
2116 "question": ["怎么"],
2117 "task": ["帮我"],
2118 "tech": ["rust"]
2119 }
2120 }"#;
2121
2122 let result = parse_unified_response(json, Some("session-123"), Some("/project")).unwrap();
2123
2124 assert_eq!(result.memories.len(), 1);
2126 assert_eq!(result.memories[0].category, MemoryCategory::Decision);
2127 assert!(result.memories[0].content.contains("Rust"));
2128
2129 assert_eq!(result.focus_points.len(), 1);
2130 assert_eq!(result.focus_points[0].topic, "API设计");
2131
2132 assert_eq!(result.conversation_patterns.len(), 1);
2133 assert_eq!(result.conversation_patterns[0].pattern_type, PatternType::Reference);
2134
2135 assert!(!result.focus_keywords.is_empty());
2136 assert_eq!(result.focus_keywords.transition.len(), 1);
2137 assert_eq!(result.focus_keywords.question.len(), 1);
2138 assert_eq!(result.focus_keywords.task.len(), 1);
2139 assert_eq!(result.focus_keywords.tech.len(), 1);
2140 }
2141
2142 #[test]
2143 fn test_parse_unified_response_empty() {
2144 let json = r#"{
2145 "memories": [],
2146 "focus_points": [],
2147 "conversation_patterns": [],
2148 "focus_keywords": {
2149 "transition": [],
2150 "question": [],
2151 "task": [],
2152 "tech": []
2153 }
2154 }"#;
2155
2156 let result = parse_unified_response(json, None, None).unwrap();
2157
2158 assert!(result.memories.is_empty());
2159 assert!(result.focus_points.is_empty());
2160 assert!(result.conversation_patterns.is_empty());
2161 assert!(result.focus_keywords.is_empty());
2162 }
2163
2164 #[test]
2165 fn test_parse_unified_response_partial() {
2166 let json = r#"{
2168 "memories": [
2169 {
2170 "category": "technical",
2171 "content": "使用 PostgreSQL 作为主数据库存储",
2172 "importance": 70
2173 }
2174 ]
2175 }"#;
2176
2177 let result = parse_unified_response(json, None, None).unwrap();
2178
2179 assert_eq!(result.memories.len(), 1);
2180 assert!(result.focus_points.is_empty());
2181 assert!(result.conversation_patterns.is_empty());
2182 assert!(result.focus_keywords.is_empty());
2183 }
2184
2185 #[test]
2186 fn test_parse_unified_response_with_code_block() {
2187 let json = r#"```json
2188{
2189 "memories": [],
2190 "focus_points": [],
2191 "conversation_patterns": [],
2192 "focus_keywords": {
2193 "transition": ["switching"],
2194 "question": [],
2195 "task": [],
2196 "tech": []
2197 }
2198}
2199```"#;
2200
2201 let result = parse_unified_response(json, None, None).unwrap();
2202
2203 assert_eq!(result.focus_keywords.transition.len(), 1);
2204 assert_eq!(result.focus_keywords.transition[0], "switching");
2205 }
2206
2207 #[test]
2208 fn test_unified_extraction_result_default() {
2209 let result = UnifiedExtractionResult::default();
2210 assert!(result.memories.is_empty());
2211 assert!(result.focus_points.is_empty());
2212 assert!(result.conversation_patterns.is_empty());
2213 assert!(result.focus_keywords.is_empty());
2214 }
2215
2216 #[test]
2217 fn test_unified_extraction_prompt_contains_all_sections() {
2218 assert!(UNIFIED_EXTRACTION_PROMPT.contains("长期记忆"));
2220 assert!(UNIFIED_EXTRACTION_PROMPT.contains("当前焦点"));
2221 assert!(UNIFIED_EXTRACTION_PROMPT.contains("对话模式"));
2222 assert!(UNIFIED_EXTRACTION_PROMPT.contains("焦点关键词"));
2223 }
2224
2225 #[test]
2226 fn test_unified_extraction_prompt_contains_keyword_categories() {
2227 assert!(UNIFIED_EXTRACTION_PROMPT.contains("transition"));
2228 assert!(UNIFIED_EXTRACTION_PROMPT.contains("question"));
2229 assert!(UNIFIED_EXTRACTION_PROMPT.contains("task"));
2230 assert!(UNIFIED_EXTRACTION_PROMPT.contains("tech"));
2231 }
2232
2233 #[test]
2234 fn test_parse_unified_response_keywords_merged() {
2235 let json = r#"{
2236 "memories": [],
2237 "focus_points": [],
2238 "conversation_patterns": [],
2239 "focus_keywords": {
2240 "transition": ["换个话题", "switching", "however"],
2241 "question": ["怎么", "how", "为什么"],
2242 "task": ["帮我", "implement", "创建"],
2243 "tech": ["rust", "数据库", "api"]
2244 }
2245 }"#;
2246
2247 let result = parse_unified_response(json, None, None).unwrap();
2248
2249 assert_eq!(result.focus_keywords.transition.len(), 3);
2250 assert_eq!(result.focus_keywords.question.len(), 3);
2251 assert_eq!(result.focus_keywords.task.len(), 3);
2252 assert_eq!(result.focus_keywords.tech.len(), 3);
2253 assert_eq!(result.focus_keywords.total_count(), 12);
2254 }
2255}