Skip to main content

ai_memory/cli/commands/
export_reflections.rs

1// Copyright 2026 AlphaOne LLC
2// SPDX-License-Identifier: Apache-2.0
3
4//! v0.7.0 QW-1 — file-backed reflection chain export.
5//!
6//! Ships the `ai-memory export-reflections` CLI subcommand so an
7//! operator can `cat ~/.ai-memory/reflections/<namespace>/<id>.md`
8//! and read what the substrate has synthesised without learning SQL.
9//!
10//! # Wire shape
11//!
12//! ```bash
13//! ai-memory export-reflections \
14//!     --namespace team/alpha \
15//!     --out-dir ~/.ai-memory/reflections \
16//!     --format md \
17//!     --since 2026-05-01T00:00:00Z \
18//!     --quiet
19//! ```
20//!
21//! The substrate is the source of truth: the SQL row is authoritative,
22//! the file on disk is a derived artefact. Operators may freely
23//! delete / regenerate the directory at any time.
24
25use std::fs;
26use std::path::{Path, PathBuf};
27
28use anyhow::{Context, Result};
29use chrono::Utc;
30use clap::Args;
31use serde::Serialize;
32
33use crate::cli::CliOutput;
34use crate::db;
35use crate::models::{Memory, MemoryKind};
36
37/// Reflection-export subdirectory under `~/.ai-memory` — shared with the
38/// post-reflect auto-export hook (#1558 batch 6).
39pub(crate) const REFLECTIONS_SUBDIR: &str = "reflections";
40
41/// CLI args for `ai-memory export-reflections`.
42#[derive(Args, Debug, Clone)]
43pub struct ExportReflectionsArgs {
44    /// Restrict the export to reflections under this namespace.
45    /// When omitted, every reflection memory is exported (one
46    /// subdirectory per namespace under `--out-dir`).
47    #[arg(long, value_name = "NS")]
48    pub namespace: Option<String>,
49
50    /// Output directory root. Defaults to `~/.ai-memory/reflections/`.
51    /// The directory is created if it does not exist.
52    #[arg(long, value_name = "PATH")]
53    pub out_dir: Option<PathBuf>,
54
55    /// Export format. `md` (default) writes a YAML-frontmatter
56    /// markdown file per reflection. `json` writes a structured
57    /// JSON envelope per reflection.
58    #[arg(long, default_value = "md", value_name = "FMT")]
59    pub format: String,
60
61    /// Only export reflections created at or after this RFC3339
62    /// instant. Pre-existing reflections are skipped.
63    #[arg(long, value_name = "RFC3339")]
64    pub since: Option<String>,
65
66    /// Suppress per-file output; only emit the final count line.
67    #[arg(long, default_value_t = false)]
68    pub quiet: bool,
69}
70
71/// Result of one export-reflections run — returned so unit tests can
72/// assert on counts without re-parsing stdout.
73#[derive(Debug, Clone, Default, PartialEq, Eq)]
74pub struct ExportSummary {
75    /// Reflections written to disk.
76    pub written: usize,
77    /// Reflections matched but skipped (already present, etc.).
78    pub skipped: usize,
79}
80
81/// The "json" format envelope. Mirrors the fields surfaced in the
82/// markdown frontmatter so the two outputs carry the same provenance.
83#[derive(Debug, Serialize)]
84struct JsonEnvelope<'a> {
85    memory_id: &'a str,
86    namespace: &'a str,
87    title: &'a str,
88    reflection_depth: i32,
89    attest_level: &'a str,
90    created_at: &'a str,
91    agent_id: &'a str,
92    reflects_on: Vec<String>,
93    content: &'a str,
94}
95
96/// Dispatch entry-point called from `daemon_runtime::run`.
97///
98/// # Errors
99///
100/// Propagates DB / I/O errors. Returns `Ok(0)` on success, `Ok(non-zero)`
101/// for non-fatal anomalies (e.g. unsupported format) so the harness can
102/// map the exit code without `Err` unwinding tripping the post-run
103/// WAL checkpoint.
104pub fn run(db_path: &Path, args: &ExportReflectionsArgs, out: &mut CliOutput<'_>) -> Result<i32> {
105    let format = parse_format(&args.format)?;
106    let out_dir = resolve_out_dir(args.out_dir.as_deref())?;
107    fs::create_dir_all(&out_dir)
108        .with_context(|| format!("creating out-dir {}", out_dir.display()))?;
109
110    let conn = db::open(db_path)?;
111    let mut summary = ExportSummary::default();
112
113    let reflections = collect_reflections(&conn, args.namespace.as_deref(), args.since.as_deref())?;
114    for mem in &reflections {
115        let edges = collect_outbound_reflects_on(&conn, &mem.id)?;
116        let attest_level = summarise_attest_level(&edges);
117        let payload = render_payload(mem, &edges, attest_level, format);
118
119        let ns_dir = out_dir.join(sanitise_namespace_for_path(&mem.namespace));
120        fs::create_dir_all(&ns_dir)
121            .with_context(|| format!("creating namespace dir {}", ns_dir.display()))?;
122        let filename = format!("{}.{}", mem.id, format.extension());
123        let path = ns_dir.join(&filename);
124        fs::write(&path, payload).with_context(|| crate::errors::msg::writing(path.display()))?;
125        summary.written += 1;
126        if !args.quiet {
127            writeln!(out.stdout, "wrote {}", path.display())?;
128        }
129    }
130    writeln!(
131        out.stdout,
132        "exported {} reflection(s) to {}",
133        summary.written,
134        out_dir.display()
135    )?;
136    let _ = summary.skipped; // reserved for future "skip-existing" mode.
137    Ok(0)
138}
139
140/// One outbound `reflects_on` edge — the substrate-side projection that
141/// drives both the markdown body and the JSON envelope.
142#[derive(Debug, Clone)]
143pub(crate) struct ReflectsOnEdge {
144    pub target_id: String,
145    pub attest_level: String,
146    pub created_at: String,
147}
148
149/// Visible-for-testing: parse `--format` into the enum.
150pub(crate) fn parse_format(spec: &str) -> Result<ExportFormat> {
151    match spec.to_lowercase().as_str() {
152        "md" | "markdown" => Ok(ExportFormat::Markdown),
153        "json" => Ok(ExportFormat::Json),
154        other => anyhow::bail!("unsupported export format '{other}' (expected 'md' or 'json')"),
155    }
156}
157
158/// Supported export formats. `pub` because the substrate-side hook
159/// at `crate::hooks::post_reflect::auto_export` carries an
160/// `AutoExportConfig.format: ExportFormat` field, and Rust requires
161/// the type to be at least as public as the field.
162#[derive(Debug, Clone, Copy, PartialEq, Eq)]
163pub enum ExportFormat {
164    Markdown,
165    Json,
166}
167
168impl ExportFormat {
169    pub(crate) fn extension(self) -> &'static str {
170        match self {
171            Self::Markdown => "md",
172            Self::Json => "json",
173        }
174    }
175}
176
177/// Resolve `--out-dir` (or the canonical default) to an absolute path.
178///
179/// Default = `${HOME}/.ai-memory/reflections/`. Falls back to
180/// `./.ai-memory/reflections/` (relative to CWD) when `HOME` is
181/// unavailable — typical in CI containers and the test harness, where
182/// writing to a project-local relative path is the only valid choice.
183pub(crate) fn resolve_out_dir(explicit: Option<&Path>) -> Result<PathBuf> {
184    if let Some(p) = explicit {
185        return Ok(p.to_path_buf());
186    }
187    if let Some(home) = std::env::var_os("HOME") {
188        return Ok(PathBuf::from(home)
189            .join(crate::AI_MEMORY_HOME_DIR_NAME)
190            .join(REFLECTIONS_SUBDIR));
191    }
192    Ok(PathBuf::from(crate::AI_MEMORY_HOME_DIR_NAME).join(REFLECTIONS_SUBDIR))
193}
194
195/// Namespace → safe filesystem path component. Slashes are preserved
196/// (so `team/alpha` becomes nested subdirs), every other "weird"
197/// character is replaced with `_`. The substrate already validates
198/// namespace strings on the write path, so the universe of inputs is
199/// already constrained — this is defence-in-depth.
200pub(crate) fn sanitise_namespace_for_path(ns: &str) -> PathBuf {
201    let mut buf = PathBuf::new();
202    for component in ns.split('/') {
203        let cleaned: String = component
204            .chars()
205            .map(|c| {
206                if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
207                    c
208                } else {
209                    '_'
210                }
211            })
212            .collect();
213        if !cleaned.is_empty() {
214            buf.push(cleaned);
215        }
216    }
217    if buf.as_os_str().is_empty() {
218        buf.push("_unnamed");
219    }
220    buf
221}
222
223/// Read every reflection-kind memory matching the supplied filters.
224///
225/// The query is namespace-scoped on the SQL side (cheap) and
226/// memory-kind / since-filtered in Rust (correct over `Memory`'s
227/// model-level fields rather than column-level oddities).
228fn collect_reflections(
229    conn: &rusqlite::Connection,
230    namespace: Option<&str>,
231    since: Option<&str>,
232) -> Result<Vec<Memory>> {
233    // The substrate's `db::list` already understands the namespace +
234    // since filters; we layer the memory_kind filter on top in Rust so
235    // we don't need to thread a new column-filter into the substrate
236    // signature for a CLI-side cosmetic export.
237    let now = Utc::now().to_rfc3339();
238    let _ = now; // future: time-bounded resume
239    let rows = db::list(
240        conn,
241        namespace,
242        None,
243        i32::MAX as usize,
244        0,
245        None,
246        since,
247        None,
248        None,
249        None,
250    )?;
251    Ok(rows
252        .into_iter()
253        .filter(|m| matches!(m.memory_kind, MemoryKind::Reflection))
254        .collect())
255}
256
257/// Read all `reflects_on` outbound edges (this memory → its sources)
258/// with their attestation level.
259fn collect_outbound_reflects_on(
260    conn: &rusqlite::Connection,
261    memory_id: &str,
262) -> Result<Vec<ReflectsOnEdge>> {
263    let mut stmt = conn.prepare(
264        "SELECT target_id, COALESCE(attest_level, 'unsigned'), created_at \
265         FROM memory_links \
266         WHERE source_id = ?1 AND relation = 'reflects_on' \
267         ORDER BY created_at ASC",
268    )?;
269    let rows = stmt.query_map(rusqlite::params![memory_id], |row| {
270        Ok(ReflectsOnEdge {
271            target_id: row.get(0)?,
272            attest_level: row.get(1)?,
273            created_at: row.get(2)?,
274        })
275    })?;
276    Ok(rows.collect::<rusqlite::Result<Vec<_>>>()?)
277}
278
279/// Summarise the per-edge attestation into one row-level label for
280/// the frontmatter. Promotion order: `signed > peer_attested >
281/// self_signed > unsigned`. No outbound edges → `"unsigned"`.
282pub(crate) fn summarise_attest_level(edges: &[ReflectsOnEdge]) -> &'static str {
283    let mut best = 0u8;
284    for e in edges {
285        let rank: u8 = match e.attest_level.as_str() {
286            "signed" => 3,
287            s if s == crate::models::AttestLevel::PeerAttested.as_str() => 2,
288            s if s == crate::models::AttestLevel::SelfSigned.as_str() => 1,
289            _ => 0,
290        };
291        if rank > best {
292            best = rank;
293        }
294    }
295    match best {
296        3 => "signed",
297        2 => crate::models::AttestLevel::PeerAttested.as_str(),
298        1 => crate::models::AttestLevel::SelfSigned.as_str(),
299        _ => crate::models::AttestLevel::Unsigned.as_str(),
300    }
301}
302
303/// Read `metadata.agent_id` off a reflection memory. Returns the empty
304/// string when the field is missing — the canonical "unknown" shape
305/// for downstream readers (`grep -v "^agent_id: $"` still finds rows).
306pub(crate) fn agent_id_of(mem: &Memory) -> &str {
307    mem.metadata
308        .get("agent_id")
309        .and_then(serde_json::Value::as_str)
310        .unwrap_or("")
311}
312
313/// Render the export payload (md or json) as a UTF-8 `String`.
314pub(crate) fn render_payload(
315    mem: &Memory,
316    edges: &[ReflectsOnEdge],
317    attest_level: &str,
318    format: ExportFormat,
319) -> String {
320    match format {
321        ExportFormat::Markdown => render_markdown(mem, edges, attest_level),
322        ExportFormat::Json => render_json(mem, edges, attest_level),
323    }
324}
325
326/// Render the YAML-frontmatter markdown body. Frontmatter fields are
327/// emitted in a stable order (memory_id, namespace, reflection_depth,
328/// attest_level, created_at, agent_id) followed by a sequence of
329/// `reflects_on` edges. The body of the reflection follows after a
330/// blank line, exactly as it was stored.
331fn render_markdown(mem: &Memory, edges: &[ReflectsOnEdge], attest_level: &str) -> String {
332    let agent_id = agent_id_of(mem);
333    let mut out = String::with_capacity(256 + mem.content.len());
334    out.push_str("---\n");
335    out.push_str(&format!("memory_id: {}\n", mem.id));
336    out.push_str(&format!("namespace: {}\n", yaml_scalar(&mem.namespace)));
337    out.push_str(&format!("title: {}\n", yaml_scalar(&mem.title)));
338    out.push_str(&format!("reflection_depth: {}\n", mem.reflection_depth));
339    out.push_str(&format!("attest_level: {attest_level}\n"));
340    out.push_str(&format!("created_at: {}\n", mem.created_at));
341    out.push_str(&format!("agent_id: {}\n", yaml_scalar(agent_id)));
342    out.push_str("reflects_on:\n");
343    if edges.is_empty() {
344        out.push_str("  []\n");
345    } else {
346        for e in edges {
347            out.push_str(&format!(
348                "  - target_id: {}\n    attest_level: {}\n    created_at: {}\n",
349                e.target_id, e.attest_level, e.created_at,
350            ));
351        }
352    }
353    out.push_str("---\n\n");
354    out.push_str(&mem.content);
355    if !mem.content.ends_with('\n') {
356        out.push('\n');
357    }
358    out
359}
360
361/// Render the JSON envelope.
362fn render_json(mem: &Memory, edges: &[ReflectsOnEdge], attest_level: &str) -> String {
363    let agent_id = agent_id_of(mem);
364    let env = JsonEnvelope {
365        memory_id: &mem.id,
366        namespace: &mem.namespace,
367        title: &mem.title,
368        reflection_depth: mem.reflection_depth,
369        attest_level,
370        created_at: &mem.created_at,
371        agent_id,
372        reflects_on: edges.iter().map(|e| e.target_id.clone()).collect(),
373        content: &mem.content,
374    };
375    // `to_string_pretty` keeps the JSON human-readable when the
376    // operator inspects it; `to_string` would land everything on one
377    // line, which is hostile to `git diff` and to `cat`.
378    serde_json::to_string_pretty(&env).unwrap_or_else(|e| format!("{{\"error\": \"{e}\"}}"))
379}
380
381/// Conservatively quote a YAML scalar. We escape only the obviously
382/// dangerous shapes (containing `:` `#` `"` `'` or starting with `-`
383/// `?` `*` `&`); everything else is emitted bare. The shape is
384/// deliberately simple — operators read these files; we never
385/// round-trip them back through a YAML parser, so over-engineering
386/// the escape is dead weight.
387fn yaml_scalar(s: &str) -> String {
388    let needs_quote = s.is_empty()
389        || s.starts_with(['-', '?', '*', '&', '!', '|', '>', '\'', '"', '%', '@', '`'])
390        || s.contains(':')
391        || s.contains('#')
392        || s.contains('\n');
393    if needs_quote {
394        let escaped = s.replace('\\', "\\\\").replace('"', "\\\"");
395        format!("\"{escaped}\"")
396    } else {
397        s.to_string()
398    }
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404    use crate::models::Tier;
405    use chrono::Utc;
406    use tempfile::TempDir;
407
408    fn fresh_db() -> (rusqlite::Connection, TempDir) {
409        let dir = TempDir::new().expect("tempdir");
410        let path = dir.path().join("ai-memory.db");
411        let conn = db::open(&path).expect("db::open");
412        (conn, dir)
413    }
414
415    fn make_reflection(ns: &str, depth: i32, title: &str, body: &str, agent_id: &str) -> Memory {
416        let now = Utc::now().to_rfc3339();
417        Memory {
418            id: uuid::Uuid::new_v4().to_string(),
419            tier: Tier::Mid,
420            namespace: ns.to_string(),
421            title: title.to_string(),
422            content: body.to_string(),
423            tags: vec![],
424            priority: 5,
425            confidence: 1.0,
426            source: "test".to_string(),
427            access_count: 0,
428            created_at: now.clone(),
429            updated_at: now,
430            last_accessed_at: None,
431            expires_at: None,
432            metadata: serde_json::json!({"agent_id": agent_id}),
433            reflection_depth: depth,
434            memory_kind: MemoryKind::Reflection,
435            entity_id: None,
436            persona_version: None,
437            citations: Vec::new(),
438            source_uri: None,
439            source_span: None,
440            confidence_source: crate::models::ConfidenceSource::CallerProvided,
441            confidence_signals: None,
442            confidence_decayed_at: None,
443            version: 1,
444        }
445    }
446
447    #[test]
448    fn parse_format_accepts_md_and_json() {
449        assert_eq!(parse_format("md").unwrap(), ExportFormat::Markdown);
450        assert_eq!(parse_format("markdown").unwrap(), ExportFormat::Markdown);
451        assert_eq!(parse_format("MD").unwrap(), ExportFormat::Markdown);
452        assert_eq!(parse_format("json").unwrap(), ExportFormat::Json);
453        assert!(parse_format("yaml").is_err());
454    }
455
456    #[test]
457    fn sanitise_namespace_handles_slashes_and_weird_chars() {
458        let p = sanitise_namespace_for_path("team/alpha");
459        assert_eq!(p, PathBuf::from("team").join("alpha"));
460        let p2 = sanitise_namespace_for_path("evil:ns?with*bits");
461        assert_eq!(p2, PathBuf::from("evil_ns_with_bits"));
462        let p3 = sanitise_namespace_for_path("");
463        assert_eq!(p3, PathBuf::from("_unnamed"));
464    }
465
466    #[test]
467    fn summarise_attest_level_promotes_to_highest() {
468        let mk = |s: &str| ReflectsOnEdge {
469            target_id: "x".into(),
470            attest_level: s.into(),
471            created_at: "2026-01-01".into(),
472        };
473        assert_eq!(summarise_attest_level(&[]), "unsigned");
474        assert_eq!(
475            summarise_attest_level(&[mk("unsigned"), mk("unsigned")]),
476            "unsigned"
477        );
478        assert_eq!(
479            summarise_attest_level(&[mk("unsigned"), mk("self_signed")]),
480            "self_signed"
481        );
482        assert_eq!(
483            summarise_attest_level(&[mk("self_signed"), mk("peer_attested")]),
484            "peer_attested"
485        );
486        assert_eq!(
487            summarise_attest_level(&[mk("peer_attested"), mk("signed")]),
488            "signed"
489        );
490    }
491
492    #[test]
493    fn render_markdown_carries_frontmatter_and_edges() {
494        let mem = make_reflection(
495            "team/alpha",
496            2,
497            "lesson learned",
498            "Body line.\n",
499            "agent-without-colon",
500        );
501        let edges = vec![
502            ReflectsOnEdge {
503                target_id: "src-1".into(),
504                attest_level: "unsigned".into(),
505                created_at: "2026-01-01T00:00:00Z".into(),
506            },
507            ReflectsOnEdge {
508                target_id: "src-2".into(),
509                attest_level: "signed".into(),
510                created_at: "2026-01-02T00:00:00Z".into(),
511            },
512        ];
513        let s = render_markdown(&mem, &edges, "signed");
514        assert!(s.starts_with("---\n"));
515        assert!(s.contains(&format!("memory_id: {}\n", mem.id)));
516        assert!(s.contains("namespace: team/alpha\n"));
517        assert!(s.contains("reflection_depth: 2\n"));
518        assert!(s.contains("attest_level: signed\n"));
519        // Bare scalar (no quotes) when value has no YAML-unsafe chars.
520        assert!(s.contains("agent_id: agent-without-colon\n"));
521        assert!(s.contains("  - target_id: src-1\n"));
522        assert!(s.contains("    attest_level: signed\n"));
523        assert!(s.ends_with("Body line.\n"));
524    }
525
526    #[test]
527    fn render_markdown_quotes_agent_id_with_colon() {
528        // `ai:test` style ids contain a `:` and must be quoted on the
529        // YAML wire so a downstream YAML parser doesn't misread the
530        // remainder as a nested mapping value.
531        let mem = make_reflection("ns", 1, "t", "body", "ai:bot");
532        let s = render_markdown(&mem, &[], "unsigned");
533        assert!(s.contains("agent_id: \"ai:bot\"\n"));
534    }
535
536    #[test]
537    fn render_markdown_quotes_yaml_unsafe_strings() {
538        let mut mem = make_reflection("global", 1, "weird: title", "body", "");
539        mem.namespace = "weird:ns".into();
540        let s = render_markdown(&mem, &[], "unsigned");
541        // Title carries a colon — must be quoted.
542        assert!(s.contains("title: \"weird: title\"\n"));
543        // Namespace carries a colon — quoted on the frontmatter row,
544        // even though `sanitise_namespace_for_path` would replace it
545        // on disk.
546        assert!(s.contains("namespace: \"weird:ns\"\n"));
547        // Empty agent_id quoted as "" (so grep finds the row).
548        assert!(s.contains("agent_id: \"\"\n"));
549        // No edges → bracket form.
550        assert!(s.contains("reflects_on:\n  []\n"));
551    }
552
553    #[test]
554    fn render_json_emits_pretty_envelope() {
555        let mem = make_reflection("ns", 1, "t", "body content\n", "ai:bot");
556        let edges = vec![ReflectsOnEdge {
557            target_id: "src".into(),
558            attest_level: "self_signed".into(),
559            created_at: "2026-01-01".into(),
560        }];
561        let s = render_json(&mem, &edges, "self_signed");
562        let parsed: serde_json::Value = serde_json::from_str(&s).unwrap();
563        assert_eq!(parsed["memory_id"].as_str().unwrap(), mem.id);
564        assert_eq!(parsed["namespace"].as_str().unwrap(), "ns");
565        assert_eq!(parsed["reflection_depth"].as_i64().unwrap(), 1);
566        assert_eq!(parsed["attest_level"].as_str().unwrap(), "self_signed");
567        assert_eq!(parsed["agent_id"].as_str().unwrap(), "ai:bot");
568        assert_eq!(parsed["reflects_on"].as_array().unwrap().len(), 1);
569        assert_eq!(parsed["reflects_on"][0].as_str().unwrap(), "src");
570        assert!(parsed["content"].as_str().unwrap().contains("body content"));
571    }
572
573    #[test]
574    fn resolve_out_dir_explicit_overrides_default() {
575        let p = resolve_out_dir(Some(Path::new("/tmp/some-path"))).unwrap();
576        assert_eq!(p, PathBuf::from("/tmp/some-path"));
577    }
578
579    #[test]
580    fn collect_reflections_filters_observations() {
581        let (conn, _g) = fresh_db();
582        // Reflection
583        let r = make_reflection("ns-r", 1, "rfl", "rfl body", "ai:a");
584        db::insert(&conn, &r).unwrap();
585        // Observation (same namespace)
586        let mut obs = make_reflection("ns-r", 0, "obs", "obs body", "ai:a");
587        obs.memory_kind = MemoryKind::Observation;
588        obs.reflection_depth = 0;
589        db::insert(&conn, &obs).unwrap();
590
591        let collected = collect_reflections(&conn, Some("ns-r"), None).unwrap();
592        assert_eq!(collected.len(), 1);
593        assert!(matches!(collected[0].memory_kind, MemoryKind::Reflection));
594    }
595
596    #[test]
597    fn agent_id_of_returns_empty_when_absent() {
598        let mut mem = make_reflection("n", 1, "t", "c", "");
599        mem.metadata = serde_json::json!({});
600        assert_eq!(agent_id_of(&mem), "");
601    }
602}