Skip to main content

anamnesis_adapter_claude_code/
lib.rs

1//! Anamnesis adapter for Claude Code.
2//!
3//! Data sources (see `docs/BLUEPRINT.md §6.8`):
4//!
5//!   ~/.claude/projects/<hash>/*.jsonl          — conversation history
6//!   ~/.claude/projects/<hash>/memory/MEMORY.md — index (NOT imported)
7//!   ~/.claude/projects/<hash>/memory/*.md      — typed memory files
8//!
9//! Mapping rules:
10//!   - `memory/*.md` frontmatter `type` → `Kind` / `Scope`
11//!       * user      → Kind::Fact      / Scope::User
12//!       * feedback  → Kind::Feedback  / Scope::User
13//!       * project   → Kind::Fact      / Scope::Project
14//!       * reference → Kind::Reference / Scope::User
15//!   - Each JSONL session → one `Kind::Episode` record (Scope::Session).
16//!
17//! Module layout:
18//!   detector    — `SourceDetector` impl (metadata-only discovery)
19//!   scanner     — filesystem walker (no content reads)
20//!   frontmatter — minimal YAML frontmatter parser
21//!   normalizer  — `RawRecord` → `AnamnesisRecord`
22
23#![forbid(unsafe_code)]
24#![warn(missing_docs)]
25
26pub mod detector;
27pub mod frontmatter;
28pub mod normalizer;
29pub mod scanner;
30pub mod session;
31
32use std::path::PathBuf;
33use std::sync::Arc;
34
35use anamnesis_core::adapter::{HealthStatus, MemoryAdapter, RawRecord, ScanOpts};
36use anamnesis_core::error::{Error, Result};
37use anamnesis_core::model::{AnamnesisRecord, SourceDescriptor};
38use async_trait::async_trait;
39use futures::stream::{self, BoxStream, StreamExt};
40
41pub use detector::ClaudeCodeDetector;
42
43/// Stable adapter identifier — referenced from many places.
44pub const ADAPTER_ID: &str = "claude-code";
45
46/// Configuration for the Claude Code adapter.
47#[derive(Debug, Clone)]
48pub struct ClaudeCodeConfig {
49    /// Root directory containing per-project subfolders.
50    pub projects_root: PathBuf,
51    /// Optional instance discriminator.
52    pub instance: Option<String>,
53}
54
55/// The adapter.
56pub struct ClaudeCodeAdapter {
57    config: Arc<ClaudeCodeConfig>,
58}
59
60impl ClaudeCodeAdapter {
61    /// Build a new adapter from config.
62    pub fn new(config: ClaudeCodeConfig) -> Self {
63        Self {
64            config: Arc::new(config),
65        }
66    }
67}
68
69#[async_trait]
70impl MemoryAdapter for ClaudeCodeAdapter {
71    fn descriptor(&self) -> SourceDescriptor {
72        SourceDescriptor {
73            adapter: ADAPTER_ID.into(),
74            instance: self.config.instance.clone(),
75            version: env!("CARGO_PKG_VERSION").into(),
76        }
77    }
78
79    fn scan<'a>(&'a self, opts: ScanOpts) -> BoxStream<'a, Result<RawRecord>> {
80        // Round-19 (§-1.5 PR-4a): stream files lazily and honor
81        // `opts.since` / `opts.full`. We still pre-walk the directory
82        // tree (the walk itself is cheap; what was expensive was reading
83        // every file into memory before yielding the first record).
84        // True per-file laziness happens inside `stream_raw_records`,
85        // which yields one `RawRecord` at a time and only reads the
86        // file body on demand.
87        let cfg = (*self.config).clone();
88        Box::pin(stream_raw_records(cfg, opts).map(Ok))
89    }
90
91    fn normalize(&self, raw: RawRecord) -> Result<Vec<AnamnesisRecord>> {
92        normalizer::normalize(raw, self.config.instance.as_deref())
93    }
94
95    async fn health(&self) -> HealthStatus {
96        let exists = self.config.projects_root.exists();
97        HealthStatus {
98            ok: exists,
99            detail: if exists {
100                format!("projects_root: {}", self.config.projects_root.display())
101            } else {
102                format!(
103                    "projects_root not found: {}",
104                    self.config.projects_root.display()
105                )
106            },
107        }
108    }
109}
110
111/// Whether the file at `path` is "newer than the threshold" for an
112/// incremental scan. `since == None` (the default / `--full` case) means
113/// "no filter, always include".
114///
115/// On a metadata-read failure we conservatively INCLUDE the file
116/// (return `true`): the importer's per-record raw_hash fast-path is a
117/// safety net — a re-emitted unchanged record is a no-op upsert. False
118/// positives are cheap; a false negative would silently drop user data.
119fn passes_since_filter(
120    path: &std::path::Path,
121    since: Option<chrono::DateTime<chrono::Utc>>,
122) -> bool {
123    let Some(threshold) = since else { return true };
124    match file_mtime(path) {
125        Some(mtime) => mtime > threshold,
126        None => {
127            tracing::debug!(
128                path = %path.display(),
129                "no mtime available; conservatively including in incremental scan"
130            );
131            true
132        }
133    }
134}
135
136/// Walk every project under `projects_root` and **stream** one
137/// `RawRecord` per memory / session file. Files that can't be read are
138/// skipped (the caller logs to `import_errors`). Lazy IO — the file
139/// body is read inside the per-item closure, not up-front.
140///
141/// Round-19 (§-1.5 PR-4a): if `opts.since` is set, files whose mtime is
142/// at or before `since` are skipped without reading the body.
143/// `opts.full` overrides this back to "yield everything".
144fn stream_raw_records(cfg: ClaudeCodeConfig, opts: ScanOpts) -> BoxStream<'static, RawRecord> {
145    let scans = match scanner::scan_projects_root(&cfg.projects_root) {
146        Ok(s) => s,
147        Err(e) => {
148            tracing::warn!(
149                error = %e,
150                root = %cfg.projects_root.display(),
151                "scan_projects_root failed; emitting zero records"
152            );
153            return Box::pin(stream::iter(Vec::<RawRecord>::new()));
154        }
155    };
156
157    // Flatten into a single (kind, path) list while preserving the
158    // existing order (memory files first per project, then sessions).
159    // PR-4b will push this flattening inside the scanner itself; for
160    // PR-4a we only fix the IO-per-file laziness.
161    enum FileKind {
162        Memory,
163        Session,
164    }
165    let mut work: Vec<(FileKind, std::path::PathBuf)> = Vec::new();
166    for proj in scans {
167        for mem in proj.memory_files {
168            work.push((FileKind::Memory, mem));
169        }
170        for sess in proj.jsonl_files {
171            work.push((FileKind::Session, sess));
172        }
173    }
174
175    // Apply `since` filter via per-file mtime BEFORE reading bodies.
176    let since = if opts.full { None } else { opts.since };
177    let instance = cfg.instance.clone();
178    let stream = stream::iter(work).filter_map(move |(kind, path)| {
179        let instance = instance.clone();
180        async move {
181            if !passes_since_filter(&path, since) {
182                return None;
183            }
184            match std::fs::read_to_string(&path) {
185                Ok(body) => {
186                    let mtime = file_mtime(&path);
187                    let raw = match kind {
188                        FileKind::Memory => {
189                            normalizer::raw_memory(&path, body, mtime, instance.as_deref())
190                        }
191                        FileKind::Session => {
192                            normalizer::raw_session(&path, &body, mtime, instance.as_deref())
193                        }
194                    };
195                    Some(raw)
196                }
197                Err(e) => {
198                    tracing::warn!(
199                        path = %path.display(),
200                        error = %e,
201                        "skipping unreadable file"
202                    );
203                    None
204                }
205            }
206        }
207    });
208    Box::pin(stream)
209}
210
211/// Read a file's modification time as `DateTime<Utc>`. Returns `None`
212/// when `metadata()` fails or the platform doesn't expose mtime — the
213/// normalizer falls back to `captured_at` in that case.
214fn file_mtime(path: &std::path::Path) -> Option<chrono::DateTime<chrono::Utc>> {
215    let meta = std::fs::metadata(path).ok()?;
216    let m = meta.modified().ok()?;
217    Some(chrono::DateTime::<chrono::Utc>::from(m))
218}
219
220/// Convenience: read a single memory file into a `RawRecord` (used by
221/// the importer when re-importing one file outside the streaming scan).
222pub fn read_memory_file(path: &std::path::Path, instance: Option<&str>) -> Result<RawRecord> {
223    let body = std::fs::read_to_string(path).map_err(|e| Error::Adapter {
224        adapter: ADAPTER_ID.into(),
225        message: format!("read {}: {e}", path.display()),
226    })?;
227    let mtime = file_mtime(path);
228    Ok(normalizer::raw_memory(path, body, mtime, instance))
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234    use anamnesis_core::adapter::MemoryAdapter;
235    use anamnesis_core::Kind;
236    use futures::StreamExt;
237    use std::fs;
238
239    static NONCE: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(0);
240
241    fn tmp_dir() -> std::path::PathBuf {
242        let n = NONCE.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
243        let pid = std::process::id();
244        let p = std::env::temp_dir().join(format!("anamnesis-adapter-{pid}-{n}"));
245        fs::create_dir_all(&p).unwrap();
246        p
247    }
248
249    fn touch(p: &std::path::Path, content: &str) {
250        if let Some(parent) = p.parent() {
251            fs::create_dir_all(parent).unwrap();
252        }
253        fs::write(p, content).unwrap();
254    }
255
256    fn realistic_fixture() -> std::path::PathBuf {
257        let root = tmp_dir();
258        let proj = root.join("project-abc");
259        touch(
260            &proj.join("memory").join("user_role.md"),
261            "---\nname: senior-dev\ndescription: 10y rust\nmetadata:\n  type: user\n---\n\nuser is senior",
262        );
263        touch(
264            &proj.join("memory").join("feedback_tests.md"),
265            "---\nname: no-mocks\nmetadata:\n  type: feedback\n---\n\nuse real DB",
266        );
267        touch(&proj.join("memory").join("MEMORY.md"), "index");
268        touch(
269            &proj.join("session-1.jsonl"),
270            "{\"role\":\"user\",\"content\":\"hi\"}\n{\"role\":\"assistant\",\"content\":\"hello\"}\n",
271        );
272        root
273    }
274
275    #[tokio::test]
276    async fn descriptor_is_stable() {
277        let a = ClaudeCodeAdapter::new(ClaudeCodeConfig {
278            projects_root: "/tmp/nonexistent".into(),
279            instance: Some("default".into()),
280        });
281        let d = a.descriptor();
282        assert_eq!(d.adapter, ADAPTER_ID);
283        assert_eq!(d.instance.as_deref(), Some("default"));
284    }
285
286    #[tokio::test]
287    async fn scan_empty_when_root_missing() {
288        let a = ClaudeCodeAdapter::new(ClaudeCodeConfig {
289            projects_root: "/tmp/definitely-not-here".into(),
290            instance: None,
291        });
292        let count = a.scan(ScanOpts::default()).collect::<Vec<_>>().await.len();
293        assert_eq!(count, 0);
294    }
295
296    #[tokio::test]
297    async fn scan_emits_memory_and_session_artifacts() {
298        let root = realistic_fixture();
299        let a = ClaudeCodeAdapter::new(ClaudeCodeConfig {
300            projects_root: root,
301            instance: Some("default".into()),
302        });
303        let items: Vec<_> = a
304            .scan(ScanOpts::default())
305            .collect::<Vec<_>>()
306            .await
307            .into_iter()
308            .filter_map(|r| r.ok())
309            .collect();
310        assert_eq!(items.len(), 3, "2 memory + 1 session (MEMORY.md excluded)");
311        let kinds: Vec<&str> = items
312            .iter()
313            .map(|r| r.payload["payload_kind"].as_str().unwrap())
314            .collect();
315        assert_eq!(kinds.iter().filter(|k| **k == "memory_md").count(), 2,);
316        assert_eq!(kinds.iter().filter(|k| **k == "session_jsonl").count(), 1,);
317    }
318
319    #[tokio::test]
320    async fn scan_then_normalize_produces_correct_record_kinds() {
321        let root = realistic_fixture();
322        let a = ClaudeCodeAdapter::new(ClaudeCodeConfig {
323            projects_root: root,
324            instance: Some("default".into()),
325        });
326        let mut user = 0;
327        let mut feedback = 0;
328        let mut episode = 0;
329        let raws: Vec<_> = a
330            .scan(ScanOpts::default())
331            .collect::<Vec<_>>()
332            .await
333            .into_iter()
334            .filter_map(|r| r.ok())
335            .collect();
336        for raw in raws {
337            for record in a.normalize(raw).unwrap() {
338                match record.kind {
339                    Kind::Fact => user += 1,
340                    Kind::Feedback => feedback += 1,
341                    Kind::Episode => episode += 1,
342                    _ => {}
343                }
344            }
345        }
346        assert_eq!(user, 1, "user_role.md should produce Kind::Fact");
347        assert_eq!(feedback, 1);
348        assert_eq!(episode, 1);
349    }
350
351    async fn collect_ids(adapter: &ClaudeCodeAdapter) -> Vec<anamnesis_core::RecordId> {
352        let raws: Vec<_> = adapter
353            .scan(ScanOpts::default())
354            .collect::<Vec<_>>()
355            .await
356            .into_iter()
357            .filter_map(|r| r.ok())
358            .collect();
359        let mut ids = Vec::new();
360        for raw in raws {
361            for record in adapter.normalize(raw).unwrap() {
362                ids.push(record.id);
363            }
364        }
365        ids.sort_by(|a, b| a.0.cmp(&b.0));
366        ids
367    }
368
369    #[tokio::test]
370    async fn import_is_idempotent_across_scan_runs() {
371        let root = realistic_fixture();
372        let a = ClaudeCodeAdapter::new(ClaudeCodeConfig {
373            projects_root: root,
374            instance: Some("default".into()),
375        });
376        let a_ids = collect_ids(&a).await;
377        let b_ids = collect_ids(&a).await;
378        assert_eq!(a_ids, b_ids, "two scans must produce identical record ids");
379    }
380
381    #[tokio::test]
382    async fn health_reports_path_existence() {
383        let a = ClaudeCodeAdapter::new(ClaudeCodeConfig {
384            projects_root: "/tmp/never".into(),
385            instance: None,
386        });
387        let h = a.health().await;
388        assert!(!h.ok);
389        assert!(h.detail.contains("not found"));
390    }
391
392    /// Round-19 (§-1.5 PR-4a): the adapter must skip files whose mtime
393    /// is at or before `opts.since`. Build a fixture with two memory
394    /// files, force one's mtime into the past, then scan with `since`
395    /// set between them. Only the newer file should be emitted.
396    #[tokio::test]
397    async fn scan_since_filters_files_by_mtime() {
398        use filetime::FileTime;
399        let root = tmp_dir();
400        let proj = root.join("proj-pr4");
401
402        touch(
403            &proj.join("memory").join("old.md"),
404            "---\ntype: fact\n---\nold content",
405        );
406        touch(
407            &proj.join("memory").join("new.md"),
408            "---\ntype: fact\n---\nnew content",
409        );
410
411        // Force the old file's mtime to a known-past timestamp.
412        let old_path = proj.join("memory").join("old.md");
413        filetime::set_file_mtime(&old_path, FileTime::from_unix_time(1_700_000_000, 0)).unwrap();
414
415        // Cutoff sits AFTER the old file but BEFORE the new file.
416        // (`new.md` was just written, so its mtime is "now"; old.md was
417        // pushed back to ~2023-11-14.)
418        let cutoff = chrono::DateTime::<chrono::Utc>::from_timestamp(1_750_000_000, 0).unwrap();
419
420        let adapter = ClaudeCodeAdapter::new(ClaudeCodeConfig {
421            projects_root: root,
422            instance: Some("default".into()),
423        });
424
425        let raws: Vec<_> = adapter
426            .scan(ScanOpts {
427                since: Some(cutoff),
428                full: false,
429            })
430            .collect::<Vec<_>>()
431            .await
432            .into_iter()
433            .filter_map(|r| r.ok())
434            .collect();
435
436        assert_eq!(
437            raws.len(),
438            1,
439            "since-filter should drop the old.md file; got: {raws:?}"
440        );
441        assert!(
442            raws[0]
443                .native_path
444                .as_deref()
445                .unwrap_or("")
446                .ends_with("new.md"),
447            "the surviving record must be new.md; got native_path={:?}",
448            raws[0].native_path,
449        );
450    }
451
452    /// `opts.full = true` must override `opts.since` — the contract that
453    /// keeps `--full` honest.
454    #[tokio::test]
455    async fn scan_full_overrides_since_filter() {
456        use filetime::FileTime;
457        let root = tmp_dir();
458        let proj = root.join("proj-pr4-full");
459        touch(
460            &proj.join("memory").join("old.md"),
461            "---\ntype: fact\n---\nold",
462        );
463        touch(
464            &proj.join("memory").join("new.md"),
465            "---\ntype: fact\n---\nnew",
466        );
467        let old_path = proj.join("memory").join("old.md");
468        filetime::set_file_mtime(&old_path, FileTime::from_unix_time(1_700_000_000, 0)).unwrap();
469
470        let cutoff = chrono::DateTime::<chrono::Utc>::from_timestamp(1_750_000_000, 0).unwrap();
471        let adapter = ClaudeCodeAdapter::new(ClaudeCodeConfig {
472            projects_root: root,
473            instance: Some("default".into()),
474        });
475
476        let raws: Vec<_> = adapter
477            .scan(ScanOpts {
478                since: Some(cutoff),
479                full: true, // → ignore `since`
480            })
481            .collect::<Vec<_>>()
482            .await
483            .into_iter()
484            .filter_map(|r| r.ok())
485            .collect();
486
487        assert_eq!(
488            raws.len(),
489            2,
490            "--full must override --since; expected both files, got: {raws:?}"
491        );
492    }
493}