Skip to main content

agent_diva_agent/
diary.rs

1//! Rational diary extraction policy for Phase A memory writes.
2
3use agent_diva_memory::{
4    sync_diary_entry_to_sqlite, DiaryEntry, DiaryPartition, DiaryStore, FileDiaryStore,
5    MemoryDomain, MemoryScope, MemorySourceRef,
6};
7use regex::Regex;
8use std::path::Path;
9
10const POSITIVE_KEYWORDS: &[&str] = &[
11    "架构",
12    "模块",
13    "目录",
14    "文档",
15    "设计",
16    "实现",
17    "方案",
18    "计划",
19    "阶段",
20    "下一步",
21    "建议",
22    "仓库",
23    "工作区",
24    "分析",
25    "验证",
26    "memory",
27    "architecture",
28    "module",
29    "workspace",
30    "document",
31    "docs",
32    "design",
33    "implement",
34    "implementation",
35    "plan",
36    "phase",
37    "next step",
38    "recommend",
39];
40const NEGATIVE_KEYWORDS: &[&str] = &[
41    "你好",
42    "谢谢",
43    "晚安",
44    "早上好",
45    "哈哈",
46    "抱歉",
47    "hello",
48    "thanks",
49];
50
51/// Minimal extractor for analysis-oriented diary entries.
52#[derive(Debug, Default)]
53pub struct RationalDiaryExtractor;
54
55impl RationalDiaryExtractor {
56    pub fn extract(&self, user_input: &str, assistant_output: &str) -> Option<DiaryEntry> {
57        let trimmed = assistant_output.trim();
58        if trimmed.chars().count() < 120 {
59            return None;
60        }
61
62        let lower = format!("{}\n{}", user_input.to_lowercase(), trimmed.to_lowercase());
63        let positive_hits = POSITIVE_KEYWORDS
64            .iter()
65            .filter(|keyword| lower.contains(&keyword.to_lowercase()))
66            .count();
67        let negative_hits = NEGATIVE_KEYWORDS
68            .iter()
69            .filter(|keyword| lower.contains(&keyword.to_lowercase()))
70            .count();
71        let source_refs = extract_source_refs(trimmed);
72        let has_structure =
73            trimmed.contains('\n') || trimmed.contains("1.") || trimmed.contains("- ");
74
75        if positive_hits < 2
76            || !has_structure
77            || (negative_hits > positive_hits && source_refs.is_empty())
78        {
79            return None;
80        }
81
82        let mut entry = DiaryEntry::new(
83            DiaryPartition::Rational,
84            detect_domain(trimmed),
85            MemoryScope::Workspace,
86            derive_title(user_input),
87            derive_summary(trimmed),
88            trimmed.to_string(),
89        );
90        entry.source_refs = source_refs;
91        entry.tags = derive_tags(trimmed);
92        entry.observations =
93            extract_bullets_after_heading(trimmed, &["观察", "发现", "observations"]);
94        entry.confirmed = extract_bullets_after_heading(trimmed, &["确认", "已确认", "confirmed"]);
95        entry.unknowns =
96            extract_bullets_after_heading(trimmed, &["未知", "待确认", "风险", "unknown"]);
97        entry.next_steps =
98            extract_bullets_after_heading(trimmed, &["下一步", "建议", "next", "follow-up"]);
99        if entry.observations.is_empty() {
100            entry.observations = collect_generic_bullets(trimmed, 3);
101        }
102        if entry.next_steps.is_empty() {
103            entry.next_steps = collect_next_steps(trimmed);
104        }
105        entry.confidence = ((positive_hits as f32) / 6.0).clamp(0.55, 0.95);
106        Some(entry)
107    }
108
109    pub fn persist_if_relevant<P: AsRef<Path>>(
110        &self,
111        workspace: P,
112        user_input: &str,
113        assistant_output: &str,
114    ) -> agent_diva_core::Result<bool> {
115        let Some(entry) = self.extract(user_input, assistant_output) else {
116            return Ok(false);
117        };
118        let workspace = workspace.as_ref();
119        let store = FileDiaryStore::new(workspace);
120        store.append_entry(&entry)?;
121        sync_diary_entry_to_sqlite(workspace, &entry)?;
122        Ok(true)
123    }
124}
125
126fn detect_domain(content: &str) -> MemoryDomain {
127    let lower = content.to_lowercase();
128    if lower.contains("文档")
129        || lower.contains("docs")
130        || lower.contains("readme")
131        || lower.contains("architecture")
132    {
133        MemoryDomain::Workspace
134    } else if lower.contains("任务")
135        || lower.contains("todo")
136        || lower.contains("下一步")
137        || lower.contains("plan")
138    {
139        MemoryDomain::Task
140    } else {
141        MemoryDomain::DiaryRational
142    }
143}
144
145fn derive_title(user_input: &str) -> String {
146    let title = user_input
147        .lines()
148        .next()
149        .unwrap_or("Rational diary note")
150        .trim();
151    let title = title.trim_matches('#').trim();
152    let truncated = title.chars().take(60).collect::<String>();
153    if truncated.is_empty() {
154        "Rational diary note".into()
155    } else {
156        truncated
157    }
158}
159
160fn derive_summary(content: &str) -> String {
161    for line in content.lines() {
162        let line = line.trim().trim_start_matches('-').trim();
163        if !line.is_empty() && !line.starts_with('#') {
164            return line.chars().take(140).collect();
165        }
166    }
167    content.chars().take(140).collect()
168}
169
170fn derive_tags(content: &str) -> Vec<String> {
171    let mut tags = Vec::new();
172    let lower = content.to_lowercase();
173    for (needle, tag) in [
174        ("架构", "architecture"),
175        ("architecture", "architecture"),
176        ("文档", "docs"),
177        ("docs", "docs"),
178        ("memory", "memory"),
179        ("设计", "design"),
180        ("plan", "plan"),
181        ("下一步", "next-step"),
182        ("workspace", "workspace"),
183    ] {
184        if lower.contains(needle) && !tags.iter().any(|existing| existing == tag) {
185            tags.push(tag.to_string());
186        }
187    }
188    tags
189}
190
191fn extract_source_refs(content: &str) -> Vec<MemorySourceRef> {
192    let Some(regex) = Regex::new(r"`([^`\n]+(?:/[^`\n]+)+)`").ok() else {
193        return Vec::new();
194    };
195
196    let mut refs = Vec::new();
197    for capture in regex.captures_iter(content) {
198        let Some(path_match) = capture.get(1) else {
199            continue;
200        };
201        refs.push(MemorySourceRef {
202            path: Some(path_match.as_str().to_string()),
203            section: None,
204            note: None,
205        });
206    }
207    refs.truncate(8);
208    refs
209}
210
211fn extract_bullets_after_heading(content: &str, headings: &[&str]) -> Vec<String> {
212    let mut capture = false;
213    let mut items = Vec::new();
214    for line in content.lines() {
215        let trimmed = line.trim();
216        let lower = trimmed.to_lowercase();
217        let heading_match = headings
218            .iter()
219            .any(|heading| lower.contains(&heading.to_lowercase()));
220        if heading_match
221            && (trimmed.starts_with('#') || trimmed.ends_with(':') || trimmed.ends_with(':'))
222        {
223            capture = true;
224            continue;
225        }
226        if capture {
227            if trimmed.starts_with("##") || trimmed.starts_with("###") {
228                break;
229            }
230            if let Some(value) = bullet_value(trimmed) {
231                items.push(value.to_string());
232            }
233        }
234    }
235    items
236}
237
238fn collect_generic_bullets(content: &str, limit: usize) -> Vec<String> {
239    let mut items = Vec::new();
240    for line in content.lines() {
241        if let Some(value) = bullet_value(line.trim()) {
242            items.push(value.to_string());
243            if items.len() >= limit {
244                break;
245            }
246        }
247    }
248    items
249}
250
251fn collect_next_steps(content: &str) -> Vec<String> {
252    let mut steps = Vec::new();
253    for line in content.lines() {
254        let trimmed = line.trim();
255        if trimmed.contains("建议")
256            || trimmed.contains("下一步")
257            || trimmed.to_lowercase().contains("next")
258        {
259            let cleaned = trimmed
260                .trim_start_matches('-')
261                .trim_start_matches('*')
262                .trim()
263                .to_string();
264            if !cleaned.is_empty() {
265                steps.push(cleaned);
266            }
267        }
268    }
269    steps.truncate(3);
270    steps
271}
272
273fn bullet_value(line: &str) -> Option<&str> {
274    if line.starts_with("- ") || line.starts_with("* ") {
275        Some(line[2..].trim())
276    } else if line.len() > 3
277        && line.as_bytes()[0].is_ascii_digit()
278        && line.as_bytes()[1] == b'.'
279        && line.as_bytes()[2] == b' '
280    {
281        Some(line[3..].trim())
282    } else {
283        None
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290    use agent_diva_memory::MemoryStore;
291    use tempfile::TempDir;
292
293    #[test]
294    fn test_extracts_analysis_entry() {
295        let extractor = RationalDiaryExtractor;
296        let assistant_output = r#"
297## 架构分析
298- 观察:`agent-diva-core/src/memory/mod.rs` 当前只暴露 MEMORY.md 相关能力。
299- 已确认:`agent-diva-agent/src/context.rs` 只注入长期记忆,不注入 diary。
300- 下一步:先增加 diary store,再挂接提取策略。
301- 建议:保留 MEMORY.md 兼容路径。
302"#;
303
304        let entry = extractor
305            .extract("请分析当前记忆架构并给出下一步方案", assistant_output)
306            .unwrap();
307        assert_eq!(entry.partition, DiaryPartition::Rational);
308        assert_eq!(entry.scope, MemoryScope::Workspace);
309        assert!(!entry.source_refs.is_empty());
310        assert!(!entry.next_steps.is_empty());
311    }
312
313    #[test]
314    fn test_skips_casual_reply() {
315        let extractor = RationalDiaryExtractor;
316        let assistant_output = "你好,今天过得怎么样?谢谢你。";
317        assert!(extractor.extract("打个招呼", assistant_output).is_none());
318    }
319
320    #[test]
321    fn test_persist_if_relevant() {
322        let temp_dir = TempDir::new().unwrap();
323        let extractor = RationalDiaryExtractor;
324        let assistant_output = r#"
325## 实现方案
326- 观察:`agent-diva-core/src/memory/manager.rs` 目前没有 diary 路径 helper。
327- 建议:增加 `memory/diary/rational/YYYY-MM-DD.md` 落盘。
328- 下一步:在 loop turn 中挂接提取器。
329- 设计:保持 MEMORY.md 行为不变。
330"#;
331
332        let persisted = extractor
333            .persist_if_relevant(temp_dir.path(), "请给出记忆实现方案", assistant_output)
334            .unwrap();
335        assert!(persisted);
336
337        let store = FileDiaryStore::new(temp_dir.path());
338        let days = store.list_days(DiaryPartition::Rational).unwrap();
339        assert_eq!(days.len(), 1);
340        let entries = store.load_day(&days[0], DiaryPartition::Rational).unwrap();
341        assert_eq!(entries.len(), 1);
342
343        let memory_store = agent_diva_memory::SqliteMemoryStore::new(temp_dir.path()).unwrap();
344        let records = memory_store.list_records().unwrap();
345        assert!(records.iter().any(|record| record.id.starts_with("diary:")));
346    }
347
348    #[test]
349    fn test_persist_if_relevant_derives_structured_memory_records() {
350        let temp_dir = TempDir::new().unwrap();
351        let extractor = RationalDiaryExtractor;
352        let assistant_output = r#"
353## 协作规则
354- 已确认:用户偏好中文回复,并希望答案尽量简洁直接。
355- 已确认:所有回复必须以前缀 [I strictly follow the rules] 开头。
356- 已确认:这个 agent 应该先说明动作再改文件,不要自作主张提交代码。
357- 建议:后续把这些规则写成可检索的结构化 memory。
358"#;
359
360        let persisted = extractor
361            .persist_if_relevant(temp_dir.path(), "请整理我们的协作规则", assistant_output)
362            .unwrap();
363        assert!(persisted);
364
365        let memory_store = agent_diva_memory::SqliteMemoryStore::new(temp_dir.path()).unwrap();
366        let records = memory_store.list_records().unwrap();
367        assert!(records.iter().any(|record| record.id.starts_with("diary:")));
368        assert!(records
369            .iter()
370            .any(|record| record.domain == MemoryDomain::Relationship));
371        assert!(records
372            .iter()
373            .any(|record| record.domain == MemoryDomain::SelfModel));
374        assert!(records
375            .iter()
376            .any(|record| record.domain == MemoryDomain::SoulSignal));
377    }
378}