Skip to main content

spool/
memory_importer.rs

1//! Transcript → candidate memory extractor.
2//!
3//! 从 Claude Code / Codex 的会话消息里启发式地挖出"值得变成记忆"的片段,
4//! 产出 `TranscriptCandidate`;上层再把它转成 `ProposeMemoryRequest` 走
5//! `LifecycleService::propose_ai`,入库后以 `Candidate` 状态进入审核队列。
6//!
7//! 启发式目前刻意保守:宁可漏掉模糊的东西,也不要产生一堆噪音候选。
8//! 覆盖三类信号:
9//! 1. 用户反馈 / 纠正("不要每次都"/"always"/"don't"/"记住"/"prefer")
10//!    → memory_type="preference"
11//! 2. 决策 / 方案敲定("决定"/"就这么"/"decided to"/"chose")
12//!    → memory_type="decision"
13//! 3. 事故 / 踩坑回顾("出过"/"曾经"/"之前"+"bug|incident|regression")
14//!    → memory_type="incident"
15
16use crate::desktop::DesktopSessionMessage;
17use crate::domain::memory_lifecycle::MemoryScope;
18use crate::lifecycle_service::LifecycleService;
19use crate::lifecycle_store::{ProposeMemoryRequest, TransitionMetadata};
20use crate::session_sources::{load_provider_messages, load_provider_sessions, raw_session_id};
21use serde::Serialize;
22use std::path::Path;
23use ts_rs::TS;
24
25#[derive(Debug, Clone, PartialEq, Eq, Serialize, TS)]
26#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
27pub struct TranscriptCandidate {
28    pub title: String,
29    pub summary: String,
30    pub memory_type: String,
31    pub scope: MemoryScope,
32    pub source_ref: String,
33    pub evidence_refs: Vec<String>,
34    pub signal: CandidateSignal,
35}
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, TS)]
38#[serde(rename_all = "snake_case")]
39#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
40pub enum CandidateSignal {
41    Preference,
42    Decision,
43    Incident,
44}
45
46impl CandidateSignal {
47    pub fn memory_type(self) -> &'static str {
48        match self {
49            Self::Preference => "preference",
50            Self::Decision => "decision",
51            Self::Incident => "incident",
52        }
53    }
54}
55
56/// 构建一次 session 的候选集合。
57///
58/// `session_ref` 形如 `claude:<id>` 或 `codex:<id>`,会作为 `source_ref`
59/// 写入候选;`evidence_refs` 额外追加命中消息的 role:timestamp 方便审核溯源。
60pub fn extract_from_messages(
61    session_ref: &str,
62    messages: &[DesktopSessionMessage],
63) -> Vec<TranscriptCandidate> {
64    let mut out = Vec::new();
65    for msg in messages {
66        let Some(signal) = classify(msg) else {
67            continue;
68        };
69        let trimmed = normalize(&msg.content);
70        if trimmed.is_empty() {
71            continue;
72        }
73        let summary = truncate_chars(&trimmed, 320);
74        let title = truncate_chars(&first_line(&trimmed), 80);
75        let evidence = format!("{}:{}", msg.role, msg.timestamp);
76        out.push(TranscriptCandidate {
77            title,
78            summary,
79            memory_type: signal.memory_type().to_string(),
80            scope: default_scope_for(signal),
81            source_ref: session_ref.to_string(),
82            evidence_refs: vec![session_ref.to_string(), evidence],
83            signal,
84        });
85    }
86    dedupe_by_summary(out)
87}
88
89impl TranscriptCandidate {
90    pub fn into_propose_request(self, actor: Option<String>) -> ProposeMemoryRequest {
91        ProposeMemoryRequest {
92            title: self.title,
93            summary: self.summary,
94            memory_type: self.memory_type,
95            scope: self.scope,
96            source_ref: self.source_ref,
97            project_id: None,
98            user_id: None,
99            sensitivity: None,
100            metadata: TransitionMetadata {
101                actor,
102                reason: Some("imported from session transcript".to_string()),
103                evidence_refs: self.evidence_refs,
104            },
105            entities: Vec::new(),
106            tags: Vec::new(),
107            triggers: Vec::new(),
108            related_files: Vec::new(),
109            related_records: Vec::new(),
110            supersedes: None,
111            applies_to: Vec::new(),
112            valid_until: None,
113        }
114    }
115}
116
117#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, TS)]
118#[serde(rename_all = "snake_case")]
119#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
120pub enum ImportProvider {
121    Claude,
122    Codex,
123}
124
125impl ImportProvider {
126    pub fn as_str(self) -> &'static str {
127        match self {
128            Self::Claude => "claude",
129            Self::Codex => "codex",
130        }
131    }
132
133    pub fn parse(value: &str) -> anyhow::Result<Self> {
134        match value {
135            "claude" => Ok(Self::Claude),
136            "codex" => Ok(Self::Codex),
137            other => Err(anyhow::anyhow!(
138                "unsupported importer provider: {other} (expected: claude | codex)"
139            )),
140        }
141    }
142}
143
144#[derive(Debug, Clone, Serialize, TS)]
145#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
146pub struct ImportSessionResponse {
147    pub session_ref: String,
148    pub total_messages: usize,
149    pub candidate_count: usize,
150    pub applied: bool,
151    pub applied_record_ids: Vec<String>,
152    pub candidates: Vec<TranscriptCandidate>,
153}
154
155/// 驱动一次 session → candidate 流水。
156///
157/// - `provider` + `session_id` 对齐 `session_sources::load_provider_sessions`
158///   产出的 `DesktopSessionItem { provider, session_id }` 契约。
159///   `session_id` 只接 raw id(不带 `<provider>:` 前缀)。
160/// - `apply=false` 是 dry run,只返回候选,不写 ledger。
161/// - `apply=true` 会把每个候选走 `LifecycleService::propose_ai`,record_id 回传。
162/// - `actor` 作为写入候选时的 provenance actor。
163pub fn import_session(
164    config_path: &Path,
165    provider: ImportProvider,
166    session_id: &str,
167    apply: bool,
168    actor: Option<String>,
169) -> anyhow::Result<ImportSessionResponse> {
170    let sessions = load_provider_sessions(None)?;
171    let raw = session_id.to_string();
172    let provider_str = provider.as_str();
173    let target = sessions
174        .into_iter()
175        .find(|s| s.provider == provider_str && raw_session_id(&s.session_id) == raw)
176        .ok_or_else(|| {
177            anyhow::anyhow!(
178                "session not found: provider={} session_id={} (先用桌面端或 memory browse 列表确认)",
179                provider_str,
180                raw,
181            )
182        })?;
183
184    let messages = load_provider_messages(&target, 0, 0)?;
185    let session_ref = format!("{}:{}", provider_str, raw);
186    let candidates = extract_from_messages(&session_ref, &messages.messages);
187    let candidate_count = candidates.len();
188
189    let mut applied_record_ids = Vec::new();
190    if apply && !candidates.is_empty() {
191        let service = LifecycleService::new();
192        for candidate in &candidates {
193            let request = candidate.clone().into_propose_request(actor.clone());
194            let result = service.propose_ai(config_path, request)?;
195            applied_record_ids.push(result.entry.record_id);
196        }
197    }
198
199    Ok(ImportSessionResponse {
200        session_ref,
201        total_messages: messages.total_messages,
202        candidate_count,
203        applied: apply,
204        applied_record_ids,
205        candidates,
206    })
207}
208
209fn classify(msg: &DesktopSessionMessage) -> Option<CandidateSignal> {
210    let content = msg.content.to_lowercase();
211    if msg.role == "user" && contains_any(&content, PREFERENCE_KEYS) {
212        return Some(CandidateSignal::Preference);
213    }
214    if contains_any(&content, DECISION_KEYS) {
215        return Some(CandidateSignal::Decision);
216    }
217    if contains_any(&content, INCIDENT_KEYS) {
218        return Some(CandidateSignal::Incident);
219    }
220    None
221}
222
223fn default_scope_for(signal: CandidateSignal) -> MemoryScope {
224    match signal {
225        CandidateSignal::Preference => MemoryScope::User,
226        CandidateSignal::Decision => MemoryScope::Project,
227        CandidateSignal::Incident => MemoryScope::Project,
228    }
229}
230
231const PREFERENCE_KEYS: &[&str] = &[
232    "不要每次",
233    "别每次",
234    "不用每次",
235    "记住",
236    "以后都",
237    "默认都",
238    "我喜欢",
239    "我倾向",
240    "我偏好",
241    "下次请",
242    "以后请",
243    "prefer",
244    "please always",
245    "please don't",
246    "stop doing",
247    "from now on",
248    "never do",
249    "always use",
250    "i like",
251    "i want",
252    "don't ever",
253];
254
255const DECISION_KEYS: &[&str] = &[
256    "决定",
257    "就这么定",
258    "敲定",
259    "定了",
260    "选型",
261    "方案就",
262    "确认用",
263    "最终选",
264    "decided to",
265    "we'll go with",
266    "chose",
267    "final decision",
268    "going with",
269    "let's use",
270    "settled on",
271];
272
273const INCIDENT_KEYS: &[&str] = &[
274    "踩过坑",
275    "出过问题",
276    "之前翻过",
277    "回归了",
278    "上次挂掉",
279    "踩坑",
280    "bug 是因为",
281    "根因是",
282    "regressed",
283    "broke production",
284    "incident",
285    "postmortem",
286    "root caused to",
287    "root cause was",
288    "caused by",
289    "lesson learned",
290];
291
292fn contains_any(haystack: &str, needles: &[&str]) -> bool {
293    needles.iter().any(|needle| haystack.contains(needle))
294}
295
296fn normalize(content: &str) -> String {
297    content.trim().replace("\r\n", "\n")
298}
299
300fn first_line(content: &str) -> String {
301    content
302        .lines()
303        .find(|line| !line.trim().is_empty())
304        .unwrap_or("")
305        .trim()
306        .to_string()
307}
308
309fn truncate_chars(value: &str, max_chars: usize) -> String {
310    let chars: Vec<char> = value.chars().collect();
311    if chars.len() <= max_chars {
312        return value.to_string();
313    }
314    let mut truncated: String = chars.iter().take(max_chars).collect();
315    truncated.push('…');
316    truncated
317}
318
319fn dedupe_by_summary(candidates: Vec<TranscriptCandidate>) -> Vec<TranscriptCandidate> {
320    let mut seen = std::collections::BTreeSet::new();
321    let mut out = Vec::with_capacity(candidates.len());
322    for candidate in candidates {
323        let key = (candidate.memory_type.clone(), candidate.summary.clone());
324        if seen.insert(key) {
325            out.push(candidate);
326        }
327    }
328    out
329}
330
331#[cfg(test)]
332mod tests {
333    use super::*;
334
335    fn msg(role: &str, ts: &str, content: &str) -> DesktopSessionMessage {
336        DesktopSessionMessage {
337            role: role.to_string(),
338            timestamp: ts.to_string(),
339            content: content.to_string(),
340            truncated: false,
341        }
342    }
343
344    #[test]
345    fn user_preference_is_captured_as_preference() {
346        let out = extract_from_messages(
347            "claude:abc",
348            &[msg(
349                "user",
350                "2026-04-18T12:00Z",
351                "以后都别每次都问我是否继续,默认自主推进",
352            )],
353        );
354        assert_eq!(out.len(), 1);
355        assert_eq!(out[0].signal, CandidateSignal::Preference);
356        assert_eq!(out[0].scope, MemoryScope::User);
357        assert_eq!(out[0].memory_type, "preference");
358        assert_eq!(out[0].source_ref, "claude:abc");
359        assert!(
360            out[0]
361                .evidence_refs
362                .contains(&"user:2026-04-18T12:00Z".to_string())
363        );
364    }
365
366    #[test]
367    fn assistant_decision_is_captured_as_decision() {
368        let out = extract_from_messages(
369            "codex:xyz",
370            &[msg(
371                "assistant",
372                "2026-04-18T12:30Z",
373                "I've decided to use React + shadcn/ui for the desktop UI.",
374            )],
375        );
376        assert_eq!(out.len(), 1);
377        assert_eq!(out[0].signal, CandidateSignal::Decision);
378        assert_eq!(out[0].memory_type, "decision");
379        assert_eq!(out[0].scope, MemoryScope::Project);
380    }
381
382    #[test]
383    fn incident_wording_is_captured_as_incident() {
384        let out = extract_from_messages(
385            "claude:abc",
386            &[msg(
387                "user",
388                "2026-04-18T12:45Z",
389                "这个地方之前翻过,上次 mock 的测试过了但 prod 挂掉了,要走真实数据库",
390            )],
391        );
392        assert_eq!(out.len(), 1);
393        assert_eq!(out[0].signal, CandidateSignal::Incident);
394    }
395
396    #[test]
397    fn non_signal_messages_are_skipped() {
398        let out = extract_from_messages(
399            "claude:abc",
400            &[
401                msg("user", "t1", "把这个函数改成小写好吗"),
402                msg("assistant", "t2", "好的,已经改了"),
403            ],
404        );
405        assert!(out.is_empty());
406    }
407
408    #[test]
409    fn duplicates_collapse_by_summary() {
410        let m = msg("user", "t", "请以后都记住:不要每次都问我确认");
411        let out = extract_from_messages("claude:abc", &[m.clone(), m]);
412        assert_eq!(out.len(), 1);
413    }
414
415    #[test]
416    fn long_content_is_truncated_in_summary_and_title() {
417        let long = "不要每次都问了, ".repeat(80);
418        let out = extract_from_messages("claude:abc", &[msg("user", "t", &long)]);
419        assert_eq!(out.len(), 1);
420        let summary_chars: Vec<char> = out[0].summary.chars().collect();
421        assert!(summary_chars.len() <= 321); // 320 + ellipsis
422        let title_chars: Vec<char> = out[0].title.chars().collect();
423        assert!(title_chars.len() <= 81);
424    }
425
426    #[test]
427    fn into_propose_request_fills_metadata_with_actor_and_reason() {
428        let candidate = extract_from_messages("claude:abc", &[msg("user", "t", "prefer 中文回复")])
429            .into_iter()
430            .next()
431            .unwrap();
432        let request = candidate.into_propose_request(Some("spool-importer".to_string()));
433        assert_eq!(request.metadata.actor.as_deref(), Some("spool-importer"));
434        assert_eq!(
435            request.metadata.reason.as_deref(),
436            Some("imported from session transcript"),
437        );
438        assert!(
439            request
440                .metadata
441                .evidence_refs
442                .contains(&"claude:abc".to_string())
443        );
444    }
445
446    #[test]
447    fn import_provider_parse_rejects_unknown() {
448        assert!(matches!(
449            ImportProvider::parse("claude").unwrap(),
450            ImportProvider::Claude
451        ));
452        assert!(matches!(
453            ImportProvider::parse("codex").unwrap(),
454            ImportProvider::Codex
455        ));
456        assert!(ImportProvider::parse("opencode").is_err());
457    }
458
459    #[test]
460    fn import_session_errors_when_provider_session_missing() {
461        use tempfile::TempDir;
462        let tmp = TempDir::new().unwrap();
463        let config = tmp.path().join("spool.toml");
464        std::fs::write(&config, "# empty stub for importer test\n").unwrap();
465        let err = import_session(
466            &config,
467            ImportProvider::Claude,
468            "definitely-not-a-real-session-id-zzz",
469            false,
470            None,
471        )
472        .expect_err("should error for missing session");
473        let text = format!("{err}");
474        assert!(
475            text.contains("session not found"),
476            "unexpected error: {text}"
477        );
478    }
479}