Skip to main content

batuta/agent/
instructions.rs

1//! Project-instruction loading with `@import` syntax + user-level fallback
2//! (PMAT-CODE-MEMORY-PARITY-001).
3//!
4//! Mirrors Claude Code's CLAUDE.md memory mechanism. Two extensions over
5//! the legacy single-file project-only loader:
6//!
7//! 1. **`@<path>` inline imports** inside CLAUDE.md / APR.md content.
8//!    A line starting with `@` followed by a relative or absolute path
9//!    is replaced with that file's contents (transitively, with depth
10//!    limit). Mirrors Claude Code's `@./CONVENTIONS.md` syntax. Missing
11//!    or unreadable imports leave the line verbatim and emit a stderr
12//!    warning (Poka-Yoke — no silent partial expansion).
13//!
14//! 2. **User-level fallback**: when no project-level CLAUDE.md / APR.md
15//!    is found in `cwd`, load from user-level locations in this order:
16//!    `$APR_CONFIG/CLAUDE.md` → `~/.config/apr/CLAUDE.md` →
17//!    `~/.claude/CLAUDE.md` (Claude-Code cross-compat). Same fallback
18//!    applies to APR.md (apr-native filename takes precedence over
19//!    Claude-Code-flavored filename in either layer).
20//!
21//! Pure-function design: no terminal I/O, no caller-state mutation. Any
22//! warnings are returned via an `&mut Vec<String>` so the caller can
23//! decide where they go (REPL stderr vs CCPA trace).
24
25use std::path::{Path, PathBuf};
26
27/// Maximum recursive `@import` depth. A flat document with one level of
28/// `@CONVENTIONS.md` is the common case; 4 hops handles a chain like
29/// CLAUDE.md → conventions.md → security.md → boilerplate.md without
30/// pathological loops eating context budget.
31pub const MAX_IMPORT_DEPTH: usize = 4;
32
33/// Filenames considered as project-level instructions, in priority order.
34/// `APR.md` is the apr-native; `CLAUDE.md` is the cross-compat name.
35pub const PROJECT_FILENAMES: &[&str] = &["APR.md", "CLAUDE.md"];
36
37/// Find the first existing project-level instructions file under `cwd`.
38/// Honors [`PROJECT_FILENAMES`] priority order.
39pub fn find_project_instructions(cwd: &Path) -> Option<PathBuf> {
40    PROJECT_FILENAMES.iter().map(|f| cwd.join(f)).find(|p| p.is_file())
41}
42
43/// Find the first existing user-global instructions file. Search order:
44/// `$APR_CONFIG/<name>` → `~/.config/apr/<name>` → `~/.claude/<name>`,
45/// trying each `name` from [`PROJECT_FILENAMES`] within each layer
46/// before moving to the next layer.
47pub fn find_user_global_instructions() -> Option<PathBuf> {
48    for layer in user_global_search_dirs() {
49        for fname in PROJECT_FILENAMES {
50            let p = layer.join(fname);
51            if p.is_file() {
52                return Some(p);
53            }
54        }
55    }
56    None
57}
58
59/// User-global instruction search directories, in priority order.
60///
61/// * If `$APR_CONFIG` is set, that's the **only** location consulted —
62///   setting the env var is treated as an explicit opt-out of the
63///   default lookup chain (Poka-Yoke; tests + sandboxed runs need this
64///   so host-level CLAUDE.md can't leak in).
65/// * Otherwise, search XDG `~/.config/apr/` first, then `~/.claude/`
66///   (Claude-Code cross-compat).
67fn user_global_search_dirs() -> Vec<PathBuf> {
68    if let Ok(custom) = std::env::var("APR_CONFIG") {
69        if !custom.is_empty() {
70            return vec![PathBuf::from(custom)];
71        }
72    }
73    let mut out = Vec::new();
74    if let Some(cfg) = dirs::config_dir() {
75        out.push(cfg.join("apr"));
76    }
77    if let Some(home) = dirs::home_dir() {
78        out.push(home.join(".claude"));
79    }
80    out
81}
82
83/// Expand `@<path>` import lines in `content` recursively. Each
84/// imported file's contents replace the import line (relative paths
85/// resolved against `base_dir`).
86///
87/// Behavior:
88/// * Only lines whose **trimmed start** is `@` followed by a non-
89///   whitespace path token are imports. This means a line like
90///   `Talk to noah@paiml.com` is NOT an import — `@` must lead.
91/// * Imports are resolved relative to the importing file's directory,
92///   not `cwd`, matching Claude Code's `@./conventions.md` semantics.
93/// * Recursion depth is capped at [`MAX_IMPORT_DEPTH`]. Cycles or
94///   over-deep chains leave the import line verbatim and emit a
95///   warning. So does any I/O failure (Poka-Yoke).
96pub fn expand_imports(content: &str, base_dir: &Path, warnings: &mut Vec<String>) -> String {
97    expand_imports_inner(content, base_dir, 0, warnings)
98}
99
100fn expand_imports_inner(
101    content: &str,
102    base_dir: &Path,
103    depth: usize,
104    warnings: &mut Vec<String>,
105) -> String {
106    let mut out = String::with_capacity(content.len());
107    for line in content.lines() {
108        if let Some(import_path) = parse_import_line(line) {
109            if depth >= MAX_IMPORT_DEPTH {
110                warnings.push(format!(
111                    "@{import_path}: import depth limit ({MAX_IMPORT_DEPTH}) exceeded; line kept verbatim"
112                ));
113                out.push_str(line);
114                out.push('\n');
115                continue;
116            }
117            let resolved = resolve_import_path(import_path, base_dir);
118            match std::fs::read_to_string(&resolved) {
119                Ok(body) => {
120                    let next_base = resolved
121                        .parent()
122                        .map(Path::to_path_buf)
123                        .unwrap_or_else(|| base_dir.to_path_buf());
124                    let expanded = expand_imports_inner(&body, &next_base, depth + 1, warnings);
125                    out.push_str(&expanded);
126                    if !out.ends_with('\n') {
127                        out.push('\n');
128                    }
129                }
130                Err(e) => {
131                    warnings.push(format!("@{import_path}: {e}"));
132                    out.push_str(line);
133                    out.push('\n');
134                }
135            }
136        } else {
137            out.push_str(line);
138            out.push('\n');
139        }
140    }
141    out
142}
143
144/// If `line` is an `@<path>` import directive, return the path. The
145/// `@` must be at the start of the trimmed line (not mid-line) so an
146/// inline mention like `email noah@paiml.com` is preserved.
147///
148/// The path token runs until the first whitespace, so paths with
149/// spaces are NOT supported (matches Claude Code's grammar).
150fn parse_import_line(line: &str) -> Option<&str> {
151    let t = line.trim_start();
152    let after_at = t.strip_prefix('@')?;
153    let path = after_at.split_whitespace().next()?;
154    if path.is_empty() {
155        None
156    } else {
157        Some(path)
158    }
159}
160
161/// Resolve `import_path` relative to `base_dir`. Absolute paths and
162/// `~`-prefixed paths are expanded; otherwise the result is
163/// `base_dir.join(import_path)`.
164fn resolve_import_path(import_path: &str, base_dir: &Path) -> PathBuf {
165    if let Some(rest) = import_path.strip_prefix("~/") {
166        if let Some(home) = dirs::home_dir() {
167            return home.join(rest);
168        }
169    }
170    let p = Path::new(import_path);
171    if p.is_absolute() {
172        p.to_path_buf()
173    } else {
174        base_dir.join(p)
175    }
176}
177
178/// Truncate `content` to `max_bytes` on a UTF-8 char boundary,
179/// appending an `(truncated from N bytes)` annotation. `max_bytes==0`
180/// returns `None` (caller skips loading entirely).
181pub fn truncate_to_budget(content: String, max_bytes: usize) -> Option<String> {
182    if max_bytes == 0 {
183        return None;
184    }
185    if content.len() <= max_bytes {
186        return Some(content);
187    }
188    let end = content
189        .char_indices()
190        .take_while(|(i, _)| *i < max_bytes)
191        .last()
192        .map(|(i, c)| i + c.len_utf8())
193        .unwrap_or(max_bytes.min(content.len()));
194    Some(format!("{}...\n(truncated from {} bytes)", &content[..end], content.len()))
195}
196
197/// Load layered project + user-global instructions with `@import`
198/// expansion. Returns `None` if `max_bytes` is 0 or no file exists at
199/// any layer.
200///
201/// Layering: when both a user-global file AND a project file exist,
202/// the user-global content is concatenated FIRST (under a
203/// `## User-global instructions` heading) and the project content
204/// appears AFTER (matching Claude Code's "later layer wins context-
205/// wise but earlier layers still inform the model"). Either layer
206/// can be missing.
207pub fn load_layered_instructions(
208    cwd: &Path,
209    max_bytes: usize,
210    warnings: &mut Vec<String>,
211) -> Option<String> {
212    if max_bytes == 0 {
213        return None;
214    }
215    let mut accumulated = String::new();
216
217    if let Some(user_path) = find_user_global_instructions() {
218        if let Ok(body) = std::fs::read_to_string(&user_path) {
219            let user_dir = user_path.parent().unwrap_or(Path::new("."));
220            let expanded = expand_imports(&body, user_dir, warnings);
221            accumulated.push_str("## User-global instructions (");
222            accumulated.push_str(&user_path.display().to_string());
223            accumulated.push_str(")\n\n");
224            accumulated.push_str(&expanded);
225            if !accumulated.ends_with("\n\n") {
226                accumulated.push('\n');
227            }
228        }
229    }
230
231    if let Some(project_path) = find_project_instructions(cwd) {
232        if let Ok(body) = std::fs::read_to_string(&project_path) {
233            let project_dir = project_path.parent().unwrap_or(cwd);
234            let expanded = expand_imports(&body, project_dir, warnings);
235            if !accumulated.is_empty() {
236                accumulated.push_str("\n## Project instructions (");
237                accumulated.push_str(&project_path.display().to_string());
238                accumulated.push_str(")\n\n");
239            }
240            accumulated.push_str(&expanded);
241        }
242    }
243
244    if accumulated.is_empty() {
245        return None;
246    }
247    truncate_to_budget(accumulated, max_bytes)
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253    use std::fs;
254
255    fn write(path: &Path, body: &str) {
256        if let Some(p) = path.parent() {
257            fs::create_dir_all(p).expect("mkdir");
258        }
259        fs::write(path, body).expect("write");
260    }
261
262    // ── parse_import_line ──────────────────────────────────────────
263
264    #[test]
265    fn import_line_simple() {
266        assert_eq!(parse_import_line("@./CONVENTIONS.md"), Some("./CONVENTIONS.md"));
267    }
268
269    #[test]
270    fn import_line_strips_indent() {
271        assert_eq!(parse_import_line("  @abs/path.md"), Some("abs/path.md"));
272    }
273
274    #[test]
275    fn import_line_stops_at_whitespace() {
276        // anything after the path is ignored — Claude Code grammar
277        assert_eq!(parse_import_line("@./README.md trailing comment"), Some("./README.md"));
278    }
279
280    #[test]
281    fn import_line_email_not_an_import() {
282        assert_eq!(parse_import_line("Email noah@paiml.com"), None);
283    }
284
285    #[test]
286    fn import_line_bare_at_is_not() {
287        assert_eq!(parse_import_line("@"), None);
288        assert_eq!(parse_import_line("@   "), None);
289    }
290
291    #[test]
292    fn import_line_inline_at_ignored() {
293        // Claude Code only considers leading `@`. An inline `@` mid-line
294        // is just text.
295        assert_eq!(parse_import_line("see @./foo.md inline"), None);
296    }
297
298    // ── resolve_import_path ────────────────────────────────────────
299
300    #[test]
301    fn resolve_relative_against_base() {
302        let p = resolve_import_path("./conventions.md", Path::new("/tmp/proj"));
303        assert_eq!(p, Path::new("/tmp/proj/./conventions.md"));
304    }
305
306    #[test]
307    fn resolve_absolute_passes_through() {
308        let p = resolve_import_path("/abs/file.md", Path::new("/tmp/proj"));
309        assert_eq!(p, Path::new("/abs/file.md"));
310    }
311
312    #[test]
313    fn resolve_tilde_expands_home() {
314        let p = resolve_import_path("~/CONVENTIONS.md", Path::new("/tmp/proj"));
315        if let Some(home) = dirs::home_dir() {
316            assert_eq!(p, home.join("CONVENTIONS.md"));
317        }
318    }
319
320    // ── expand_imports ─────────────────────────────────────────────
321
322    #[test]
323    fn expand_no_imports_returns_unchanged_modulo_newlines() {
324        let mut warns = Vec::new();
325        let out = expand_imports("hello\nworld\n", Path::new("/tmp"), &mut warns);
326        assert_eq!(out, "hello\nworld\n");
327        assert!(warns.is_empty());
328    }
329
330    #[test]
331    fn expand_single_import() {
332        let dir = tempfile::tempdir().expect("tempdir");
333        let imp = dir.path().join("conv.md");
334        write(&imp, "## Conventions\n- camelCase\n");
335        let body = format!("Top-level\n@{}\nBottom\n", imp.display());
336        let mut warns = Vec::new();
337        let out = expand_imports(&body, dir.path(), &mut warns);
338        assert!(out.contains("Top-level"));
339        assert!(out.contains("## Conventions"));
340        assert!(out.contains("camelCase"));
341        assert!(out.contains("Bottom"));
342        assert!(warns.is_empty());
343    }
344
345    #[test]
346    fn expand_relative_import_against_base() {
347        let dir = tempfile::tempdir().expect("tempdir");
348        let imp = dir.path().join("conv.md");
349        write(&imp, "imported-body");
350        let body = "@./conv.md\n";
351        let mut warns = Vec::new();
352        let out = expand_imports(body, dir.path(), &mut warns);
353        assert!(out.contains("imported-body"));
354    }
355
356    #[test]
357    fn expand_missing_import_keeps_line_and_warns() {
358        let dir = tempfile::tempdir().expect("tempdir");
359        let body = "@./not-there.md\n";
360        let mut warns = Vec::new();
361        let out = expand_imports(body, dir.path(), &mut warns);
362        assert!(out.contains("@./not-there.md"));
363        assert_eq!(warns.len(), 1);
364        assert!(warns[0].contains("not-there.md"));
365    }
366
367    #[test]
368    fn expand_recursive_imports() {
369        // a.md @-imports b.md which @-imports c.md (3 levels).
370        let dir = tempfile::tempdir().expect("tempdir");
371        let a = dir.path().join("a.md");
372        let b = dir.path().join("b.md");
373        let c = dir.path().join("c.md");
374        write(&a, "AAA\n@./b.md\n");
375        write(&b, "BBB\n@./c.md\n");
376        write(&c, "CCC\n");
377        let mut warns = Vec::new();
378        let out = expand_imports(&fs::read_to_string(&a).unwrap(), dir.path(), &mut warns);
379        assert!(out.contains("AAA"));
380        assert!(out.contains("BBB"));
381        assert!(out.contains("CCC"));
382        assert!(warns.is_empty());
383    }
384
385    #[test]
386    fn expand_recursive_path_resolves_against_importing_file() {
387        // Sub-dir import: imported file lives elsewhere; its own
388        // imports must resolve against ITS directory, not the
389        // top-level one.
390        let dir = tempfile::tempdir().expect("tempdir");
391        let sub = dir.path().join("sub");
392        let outer = dir.path().join("outer.md");
393        let mid = sub.join("mid.md");
394        let leaf = sub.join("leaf.md");
395        write(&outer, "TOP\n@./sub/mid.md\n");
396        write(&mid, "MID\n@./leaf.md\n"); // relative to sub/, not to dir/
397        write(&leaf, "LEAF\n");
398        let mut warns = Vec::new();
399        let out = expand_imports(&fs::read_to_string(&outer).unwrap(), dir.path(), &mut warns);
400        assert!(out.contains("TOP"));
401        assert!(out.contains("MID"));
402        assert!(out.contains("LEAF"), "leaf.md should resolve relative to sub/, got: {out:?}");
403    }
404
405    #[test]
406    fn expand_depth_limit_prevents_cycle_blowup() {
407        // a.md @-imports b.md, b.md @-imports a.md → cycle. The depth
408        // limit caps the recursion; the line that would trigger the
409        // (depth+1)-th expansion is kept verbatim with a warning.
410        let dir = tempfile::tempdir().expect("tempdir");
411        let a = dir.path().join("a.md");
412        let b = dir.path().join("b.md");
413        write(&a, "@./b.md\n");
414        write(&b, "@./a.md\n");
415        let mut warns = Vec::new();
416        let out = expand_imports(&fs::read_to_string(&a).unwrap(), dir.path(), &mut warns);
417        // Stays bounded — output is non-empty but doesn't blow up.
418        assert!(out.len() < 100_000);
419        // The depth-limit warning fires.
420        assert!(warns.iter().any(|w| w.contains("depth limit")), "warns: {warns:?}");
421    }
422
423    // ── truncate_to_budget ─────────────────────────────────────────
424
425    #[test]
426    fn truncate_zero_budget_yields_none() {
427        assert!(truncate_to_budget("xxx".into(), 0).is_none());
428    }
429
430    #[test]
431    fn truncate_under_budget_passthrough() {
432        let s = truncate_to_budget("short".into(), 100).expect("kept");
433        assert_eq!(s, "short");
434    }
435
436    #[test]
437    fn truncate_over_budget_appends_annotation() {
438        let big = "x".repeat(500);
439        let s = truncate_to_budget(big, 100).expect("truncated");
440        assert!(s.starts_with("x"));
441        assert!(s.contains("truncated from 500 bytes"));
442    }
443
444    #[test]
445    fn truncate_respects_utf8_boundary() {
446        // 3-byte char at byte 99 must not be split.
447        let s = format!("{}é", "a".repeat(99));
448        let truncated = truncate_to_budget(s, 100).expect("truncated");
449        // No char-boundary panics; just verify it parses as valid UTF-8
450        // and contains the truncation annotation.
451        assert!(truncated.contains("truncated from"));
452    }
453
454    // ── find_user_global_instructions / load_layered_instructions ──
455    //
456    // CI flake fix: tests below mutate the process-wide `APR_CONFIG` env
457    // var. cargo test runs `#[test]` functions in parallel by default;
458    // without serialization, two parallel tests can corrupt each other's
459    // view of the env (test A sets, test B reads, test A removes). Local
460    // runs happened to pass because of timing; CI hit the race.
461    //
462    // The Mutex below serializes all env-mutating tests in this module.
463    // The `.lock()` is held for the duration of each test so neither
464    // `set_var` nor `remove_var` can interleave.
465    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
466        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
467        LOCK.lock().unwrap_or_else(|e| e.into_inner())
468    }
469
470    #[test]
471    fn user_global_honors_apr_config_env_first() {
472        let _guard = env_lock();
473        let dir = tempfile::tempdir().expect("tempdir");
474        write(&dir.path().join("CLAUDE.md"), "user-global-content");
475        std::env::set_var("APR_CONFIG", dir.path());
476        let p = find_user_global_instructions().expect("found");
477        std::env::remove_var("APR_CONFIG");
478        assert_eq!(p, dir.path().join("CLAUDE.md"));
479    }
480
481    #[test]
482    fn user_global_prefers_apr_md_over_claude_md_within_layer() {
483        let _guard = env_lock();
484        let dir = tempfile::tempdir().expect("tempdir");
485        write(&dir.path().join("APR.md"), "apr-version");
486        write(&dir.path().join("CLAUDE.md"), "claude-version");
487        std::env::set_var("APR_CONFIG", dir.path());
488        let p = find_user_global_instructions().expect("found");
489        std::env::remove_var("APR_CONFIG");
490        assert_eq!(p, dir.path().join("APR.md"), "APR.md wins over CLAUDE.md within a layer");
491    }
492
493    #[test]
494    fn load_layered_returns_none_when_nothing_to_load() {
495        let _guard = env_lock();
496        let cfg = tempfile::tempdir().expect("cfg");
497        let proj = tempfile::tempdir().expect("proj");
498        std::env::set_var("APR_CONFIG", cfg.path());
499        let mut warns = Vec::new();
500        let out = load_layered_instructions(proj.path(), 4096, &mut warns);
501        std::env::remove_var("APR_CONFIG");
502        assert!(out.is_none());
503    }
504
505    #[test]
506    fn load_layered_concatenates_user_global_then_project() {
507        let _guard = env_lock();
508        let cfg = tempfile::tempdir().expect("cfg");
509        let proj = tempfile::tempdir().expect("proj");
510        write(&cfg.path().join("CLAUDE.md"), "USER-GLOBAL-BODY\n");
511        write(&proj.path().join("CLAUDE.md"), "PROJECT-BODY\n");
512        std::env::set_var("APR_CONFIG", cfg.path());
513        let mut warns = Vec::new();
514        let out = load_layered_instructions(proj.path(), 65536, &mut warns).expect("loaded");
515        std::env::remove_var("APR_CONFIG");
516        let user_idx = out.find("USER-GLOBAL-BODY").expect("user-global present");
517        let proj_idx = out.find("PROJECT-BODY").expect("project present");
518        assert!(
519            user_idx < proj_idx,
520            "user-global must come before project so project wins context-wise"
521        );
522        assert!(out.contains("User-global instructions"));
523        assert!(out.contains("Project instructions"));
524    }
525
526    #[test]
527    fn load_layered_resolves_imports_in_each_layer() {
528        let _guard = env_lock();
529        let cfg = tempfile::tempdir().expect("cfg");
530        let proj = tempfile::tempdir().expect("proj");
531        // user-global: imports a sibling file
532        write(&cfg.path().join("CLAUDE.md"), "USER\n@./shared.md\n");
533        write(&cfg.path().join("shared.md"), "USER-SHARED\n");
534        // project: imports a sibling file
535        write(&proj.path().join("CLAUDE.md"), "PROJ\n@./conv.md\n");
536        write(&proj.path().join("conv.md"), "PROJ-CONV\n");
537        std::env::set_var("APR_CONFIG", cfg.path());
538        let mut warns = Vec::new();
539        let out = load_layered_instructions(proj.path(), 65536, &mut warns).expect("loaded");
540        std::env::remove_var("APR_CONFIG");
541        assert!(out.contains("USER-SHARED"));
542        assert!(out.contains("PROJ-CONV"));
543        assert!(warns.is_empty(), "no warnings expected, got: {warns:?}");
544    }
545}