1use agent_diva_memory::{
4 sync_diary_entry_to_sqlite, DiaryEntry, DiaryPartition, DiaryStore, FileDiaryStore,
5 MemoryDomain, MemoryScope, MemorySourceRef,
6};
7use regex::Regex;
8use std::path::Path;
9
10const POSITIVE_KEYWORDS: &[&str] = &[
11 "架构",
12 "模块",
13 "目录",
14 "文档",
15 "设计",
16 "实现",
17 "方案",
18 "计划",
19 "阶段",
20 "下一步",
21 "建议",
22 "仓库",
23 "工作区",
24 "分析",
25 "验证",
26 "memory",
27 "architecture",
28 "module",
29 "workspace",
30 "document",
31 "docs",
32 "design",
33 "implement",
34 "implementation",
35 "plan",
36 "phase",
37 "next step",
38 "recommend",
39];
40const NEGATIVE_KEYWORDS: &[&str] = &[
41 "你好",
42 "谢谢",
43 "晚安",
44 "早上好",
45 "哈哈",
46 "抱歉",
47 "hello",
48 "thanks",
49];
50
51#[derive(Debug, Default)]
53pub struct RationalDiaryExtractor;
54
55impl RationalDiaryExtractor {
56 pub fn extract(&self, user_input: &str, assistant_output: &str) -> Option<DiaryEntry> {
57 let trimmed = assistant_output.trim();
58 if trimmed.chars().count() < 120 {
59 return None;
60 }
61
62 let lower = format!("{}\n{}", user_input.to_lowercase(), trimmed.to_lowercase());
63 let positive_hits = POSITIVE_KEYWORDS
64 .iter()
65 .filter(|keyword| lower.contains(&keyword.to_lowercase()))
66 .count();
67 let negative_hits = NEGATIVE_KEYWORDS
68 .iter()
69 .filter(|keyword| lower.contains(&keyword.to_lowercase()))
70 .count();
71 let source_refs = extract_source_refs(trimmed);
72 let has_structure =
73 trimmed.contains('\n') || trimmed.contains("1.") || trimmed.contains("- ");
74
75 if positive_hits < 2
76 || !has_structure
77 || (negative_hits > positive_hits && source_refs.is_empty())
78 {
79 return None;
80 }
81
82 let mut entry = DiaryEntry::new(
83 DiaryPartition::Rational,
84 detect_domain(trimmed),
85 MemoryScope::Workspace,
86 derive_title(user_input),
87 derive_summary(trimmed),
88 trimmed.to_string(),
89 );
90 entry.source_refs = source_refs;
91 entry.tags = derive_tags(trimmed);
92 entry.observations =
93 extract_bullets_after_heading(trimmed, &["观察", "发现", "observations"]);
94 entry.confirmed = extract_bullets_after_heading(trimmed, &["确认", "已确认", "confirmed"]);
95 entry.unknowns =
96 extract_bullets_after_heading(trimmed, &["未知", "待确认", "风险", "unknown"]);
97 entry.next_steps =
98 extract_bullets_after_heading(trimmed, &["下一步", "建议", "next", "follow-up"]);
99 if entry.observations.is_empty() {
100 entry.observations = collect_generic_bullets(trimmed, 3);
101 }
102 if entry.next_steps.is_empty() {
103 entry.next_steps = collect_next_steps(trimmed);
104 }
105 entry.confidence = ((positive_hits as f32) / 6.0).clamp(0.55, 0.95);
106 Some(entry)
107 }
108
109 pub fn persist_if_relevant<P: AsRef<Path>>(
110 &self,
111 workspace: P,
112 user_input: &str,
113 assistant_output: &str,
114 ) -> agent_diva_core::Result<bool> {
115 let Some(entry) = self.extract(user_input, assistant_output) else {
116 return Ok(false);
117 };
118 let workspace = workspace.as_ref();
119 let store = FileDiaryStore::new(workspace);
120 store.append_entry(&entry)?;
121 sync_diary_entry_to_sqlite(workspace, &entry)?;
122 Ok(true)
123 }
124}
125
126fn detect_domain(content: &str) -> MemoryDomain {
127 let lower = content.to_lowercase();
128 if lower.contains("文档")
129 || lower.contains("docs")
130 || lower.contains("readme")
131 || lower.contains("architecture")
132 {
133 MemoryDomain::Workspace
134 } else if lower.contains("任务")
135 || lower.contains("todo")
136 || lower.contains("下一步")
137 || lower.contains("plan")
138 {
139 MemoryDomain::Task
140 } else {
141 MemoryDomain::DiaryRational
142 }
143}
144
145fn derive_title(user_input: &str) -> String {
146 let title = user_input
147 .lines()
148 .next()
149 .unwrap_or("Rational diary note")
150 .trim();
151 let title = title.trim_matches('#').trim();
152 let truncated = title.chars().take(60).collect::<String>();
153 if truncated.is_empty() {
154 "Rational diary note".into()
155 } else {
156 truncated
157 }
158}
159
160fn derive_summary(content: &str) -> String {
161 for line in content.lines() {
162 let line = line.trim().trim_start_matches('-').trim();
163 if !line.is_empty() && !line.starts_with('#') {
164 return line.chars().take(140).collect();
165 }
166 }
167 content.chars().take(140).collect()
168}
169
170fn derive_tags(content: &str) -> Vec<String> {
171 let mut tags = Vec::new();
172 let lower = content.to_lowercase();
173 for (needle, tag) in [
174 ("架构", "architecture"),
175 ("architecture", "architecture"),
176 ("文档", "docs"),
177 ("docs", "docs"),
178 ("memory", "memory"),
179 ("设计", "design"),
180 ("plan", "plan"),
181 ("下一步", "next-step"),
182 ("workspace", "workspace"),
183 ] {
184 if lower.contains(needle) && !tags.iter().any(|existing| existing == tag) {
185 tags.push(tag.to_string());
186 }
187 }
188 tags
189}
190
191fn extract_source_refs(content: &str) -> Vec<MemorySourceRef> {
192 let Some(regex) = Regex::new(r"`([^`\n]+(?:/[^`\n]+)+)`").ok() else {
193 return Vec::new();
194 };
195
196 let mut refs = Vec::new();
197 for capture in regex.captures_iter(content) {
198 let Some(path_match) = capture.get(1) else {
199 continue;
200 };
201 refs.push(MemorySourceRef {
202 path: Some(path_match.as_str().to_string()),
203 section: None,
204 note: None,
205 });
206 }
207 refs.truncate(8);
208 refs
209}
210
211fn extract_bullets_after_heading(content: &str, headings: &[&str]) -> Vec<String> {
212 let mut capture = false;
213 let mut items = Vec::new();
214 for line in content.lines() {
215 let trimmed = line.trim();
216 let lower = trimmed.to_lowercase();
217 let heading_match = headings
218 .iter()
219 .any(|heading| lower.contains(&heading.to_lowercase()));
220 if heading_match
221 && (trimmed.starts_with('#') || trimmed.ends_with(':') || trimmed.ends_with(':'))
222 {
223 capture = true;
224 continue;
225 }
226 if capture {
227 if trimmed.starts_with("##") || trimmed.starts_with("###") {
228 break;
229 }
230 if let Some(value) = bullet_value(trimmed) {
231 items.push(value.to_string());
232 }
233 }
234 }
235 items
236}
237
238fn collect_generic_bullets(content: &str, limit: usize) -> Vec<String> {
239 let mut items = Vec::new();
240 for line in content.lines() {
241 if let Some(value) = bullet_value(line.trim()) {
242 items.push(value.to_string());
243 if items.len() >= limit {
244 break;
245 }
246 }
247 }
248 items
249}
250
251fn collect_next_steps(content: &str) -> Vec<String> {
252 let mut steps = Vec::new();
253 for line in content.lines() {
254 let trimmed = line.trim();
255 if trimmed.contains("建议")
256 || trimmed.contains("下一步")
257 || trimmed.to_lowercase().contains("next")
258 {
259 let cleaned = trimmed
260 .trim_start_matches('-')
261 .trim_start_matches('*')
262 .trim()
263 .to_string();
264 if !cleaned.is_empty() {
265 steps.push(cleaned);
266 }
267 }
268 }
269 steps.truncate(3);
270 steps
271}
272
273fn bullet_value(line: &str) -> Option<&str> {
274 if line.starts_with("- ") || line.starts_with("* ") {
275 Some(line[2..].trim())
276 } else if line.len() > 3
277 && line.as_bytes()[0].is_ascii_digit()
278 && line.as_bytes()[1] == b'.'
279 && line.as_bytes()[2] == b' '
280 {
281 Some(line[3..].trim())
282 } else {
283 None
284 }
285}
286
287#[cfg(test)]
288mod tests {
289 use super::*;
290 use agent_diva_memory::MemoryStore;
291 use tempfile::TempDir;
292
293 #[test]
294 fn test_extracts_analysis_entry() {
295 let extractor = RationalDiaryExtractor;
296 let assistant_output = r#"
297## 架构分析
298- 观察:`agent-diva-core/src/memory/mod.rs` 当前只暴露 MEMORY.md 相关能力。
299- 已确认:`agent-diva-agent/src/context.rs` 只注入长期记忆,不注入 diary。
300- 下一步:先增加 diary store,再挂接提取策略。
301- 建议:保留 MEMORY.md 兼容路径。
302"#;
303
304 let entry = extractor
305 .extract("请分析当前记忆架构并给出下一步方案", assistant_output)
306 .unwrap();
307 assert_eq!(entry.partition, DiaryPartition::Rational);
308 assert_eq!(entry.scope, MemoryScope::Workspace);
309 assert!(!entry.source_refs.is_empty());
310 assert!(!entry.next_steps.is_empty());
311 }
312
313 #[test]
314 fn test_skips_casual_reply() {
315 let extractor = RationalDiaryExtractor;
316 let assistant_output = "你好,今天过得怎么样?谢谢你。";
317 assert!(extractor.extract("打个招呼", assistant_output).is_none());
318 }
319
320 #[test]
321 fn test_persist_if_relevant() {
322 let temp_dir = TempDir::new().unwrap();
323 let extractor = RationalDiaryExtractor;
324 let assistant_output = r#"
325## 实现方案
326- 观察:`agent-diva-core/src/memory/manager.rs` 目前没有 diary 路径 helper。
327- 建议:增加 `memory/diary/rational/YYYY-MM-DD.md` 落盘。
328- 下一步:在 loop turn 中挂接提取器。
329- 设计:保持 MEMORY.md 行为不变。
330"#;
331
332 let persisted = extractor
333 .persist_if_relevant(temp_dir.path(), "请给出记忆实现方案", assistant_output)
334 .unwrap();
335 assert!(persisted);
336
337 let store = FileDiaryStore::new(temp_dir.path());
338 let days = store.list_days(DiaryPartition::Rational).unwrap();
339 assert_eq!(days.len(), 1);
340 let entries = store.load_day(&days[0], DiaryPartition::Rational).unwrap();
341 assert_eq!(entries.len(), 1);
342
343 let memory_store = agent_diva_memory::SqliteMemoryStore::new(temp_dir.path()).unwrap();
344 let records = memory_store.list_records().unwrap();
345 assert!(records.iter().any(|record| record.id.starts_with("diary:")));
346 }
347
348 #[test]
349 fn test_persist_if_relevant_derives_structured_memory_records() {
350 let temp_dir = TempDir::new().unwrap();
351 let extractor = RationalDiaryExtractor;
352 let assistant_output = r#"
353## 协作规则
354- 已确认:用户偏好中文回复,并希望答案尽量简洁直接。
355- 已确认:所有回复必须以前缀 [I strictly follow the rules] 开头。
356- 已确认:这个 agent 应该先说明动作再改文件,不要自作主张提交代码。
357- 建议:后续把这些规则写成可检索的结构化 memory。
358"#;
359
360 let persisted = extractor
361 .persist_if_relevant(temp_dir.path(), "请整理我们的协作规则", assistant_output)
362 .unwrap();
363 assert!(persisted);
364
365 let memory_store = agent_diva_memory::SqliteMemoryStore::new(temp_dir.path()).unwrap();
366 let records = memory_store.list_records().unwrap();
367 assert!(records.iter().any(|record| record.id.starts_with("diary:")));
368 assert!(records
369 .iter()
370 .any(|record| record.domain == MemoryDomain::Relationship));
371 assert!(records
372 .iter()
373 .any(|record| record.domain == MemoryDomain::SelfModel));
374 assert!(records
375 .iter()
376 .any(|record| record.domain == MemoryDomain::SoulSignal));
377 }
378}