Skip to main content

caliban_memory/
project_walk.rs

1//! Ancestor walk: discover `CLAUDE.md` / `AGENTS.md` / `.caliban.md` files
2//! upward from cwd to git/fs root, with inode-based dedupe and gitignore-style
3//! excludes.
4//!
5//! Part of ADR 0036 (CLAUDE.md ancestor walk + `@`-imports). See
6//! `docs/superpowers/specs/2026-05-24-claudemd-ancestry-design.md` for the
7//! design.
8
9use std::collections::BTreeSet;
10use std::path::{Path, PathBuf};
11
12use globset::GlobSet;
13
14/// Where the ancestor walk stops.
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
16pub enum WalkStop {
17    /// Stop at the first directory containing a `.git/` entry.
18    GitRoot,
19    /// Stop at the filesystem root (`/`).
20    FsRoot,
21    /// Stop at whichever boundary is hit first (default).
22    #[default]
23    Both,
24}
25
26impl WalkStop {
27    /// Parse a `WalkStop` from its lowercase string form
28    /// (`"git_root"` / `"fs_root"` / `"both"`). Falls back to `Both` for any
29    /// unrecognized input.
30    #[must_use]
31    pub fn parse(s: &str) -> Self {
32        match s.trim().to_ascii_lowercase().as_str() {
33            "git_root" | "gitroot" | "git" => Self::GitRoot,
34            "fs_root" | "fsroot" | "fs" => Self::FsRoot,
35            _ => Self::Both,
36        }
37    }
38}
39
40/// Filenames the walk looks for in every visited directory. Order within a
41/// directory: most-specific → most-general.
42pub const ANCESTRY_FILENAMES: &[&str] = &[".caliban.md", "CLAUDE.md", "AGENTS.md"];
43
44/// Walk up the directory tree starting at `cwd`, returning every CLAUDE.md /
45/// AGENTS.md / `.caliban.md` discovered along the way. Results are returned
46/// in **broad → narrow** order (closest to the root first, cwd last).
47///
48/// `excludes` is a gitignore-style glob set evaluated against the path
49/// **relative to `cwd`** (the workspace root for that walk).
50///
51/// Duplicate files reached via symlinks are dropped via inode-based dedupe
52/// (or by canonical-path equality on platforms where `MetadataExt::ino` isn't
53/// available).
54#[must_use]
55pub fn walk_ancestors(cwd: &Path, stop: WalkStop, excludes: &GlobSet) -> Vec<PathBuf> {
56    // First pass: collect per-directory files in narrow → broad order.
57    // Within each directory we keep the source order from `ANCESTRY_FILENAMES`
58    // (most-specific → most-general). Reversing the dir list at the end then
59    // gives broad → narrow ordering across directories without disturbing the
60    // within-directory order.
61    let mut per_dir: Vec<Vec<PathBuf>> = Vec::new();
62    let mut seen: BTreeSet<InodeKey> = BTreeSet::new();
63    let mut current: Option<PathBuf> = Some(cwd.to_path_buf());
64
65    while let Some(dir) = current {
66        let mut dir_hits = Vec::new();
67        for name in ANCESTRY_FILENAMES {
68            let candidate = dir.join(name);
69            if !candidate.is_file() {
70                continue;
71            }
72            // Inode-based dedupe (catches symlinks pointing to the same file).
73            let key = inode_key(&candidate);
74            if !seen.insert(key) {
75                continue;
76            }
77            // Excludes evaluated relative to the workspace root (= cwd).
78            let rel = candidate.strip_prefix(cwd).unwrap_or(&candidate);
79            if excludes.is_match(rel) {
80                continue;
81            }
82            dir_hits.push(candidate);
83        }
84        if !dir_hits.is_empty() {
85            per_dir.push(dir_hits);
86        }
87
88        if reached_stop(&dir, stop) {
89            break;
90        }
91
92        match dir.parent() {
93            Some(parent) if parent != dir => current = Some(parent.to_path_buf()),
94            _ => break,
95        }
96    }
97
98    // Reverse dir order (broad → narrow) but preserve within-dir order.
99    per_dir.reverse();
100    per_dir.into_iter().flatten().collect()
101}
102
103/// True when `dir` is a stop boundary for the given walk-stop mode.
104fn reached_stop(dir: &Path, stop: WalkStop) -> bool {
105    match stop {
106        WalkStop::GitRoot => dir.join(".git").exists(),
107        WalkStop::FsRoot => dir.parent().is_none(),
108        WalkStop::Both => dir.join(".git").exists() || dir.parent().is_none(),
109    }
110}
111
112/// Stable dedupe key for a file. Prefers `(dev, inode)` on Unix; falls back to
113/// the canonicalized path on platforms that don't expose inode info or when
114/// the syscall fails.
115#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
116enum InodeKey {
117    /// `(device, inode)` from `MetadataExt::ino()` and `MetadataExt::dev()`.
118    Inode(u64, u64),
119    /// Canonicalized path fallback.
120    Path(PathBuf),
121}
122
123fn inode_key(path: &Path) -> InodeKey {
124    #[cfg(unix)]
125    {
126        use std::os::unix::fs::MetadataExt;
127        if let Ok(md) = std::fs::metadata(path) {
128            return InodeKey::Inode(md.dev(), md.ino());
129        }
130    }
131    #[cfg(not(unix))]
132    {
133        let _ = path;
134    }
135    InodeKey::Path(std::fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf()))
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141    use std::fs;
142    use tempfile::TempDir;
143
144    fn empty_globset() -> GlobSet {
145        GlobSet::empty()
146    }
147
148    fn excludes(patterns: &[&str]) -> GlobSet {
149        let mut b = globset::GlobSetBuilder::new();
150        for p in patterns {
151            b.add(globset::Glob::new(p).unwrap());
152        }
153        b.build().unwrap()
154    }
155
156    #[test]
157    fn walk_from_subdir_discovers_parent_claude_md() {
158        let tmp = TempDir::new().unwrap();
159        let root = tmp.path();
160        fs::create_dir_all(root.join(".git")).unwrap();
161        fs::write(root.join("CLAUDE.md"), "ROOT").unwrap();
162        let sub = root.join("a").join("b");
163        fs::create_dir_all(&sub).unwrap();
164
165        let found = walk_ancestors(&sub, WalkStop::GitRoot, &empty_globset());
166        assert_eq!(found.len(), 1, "expected one CLAUDE.md");
167        assert_eq!(
168            found[0].canonicalize().unwrap(),
169            root.join("CLAUDE.md").canonicalize().unwrap(),
170        );
171    }
172
173    #[test]
174    fn walk_concatenation_order_is_broad_to_narrow() {
175        let tmp = TempDir::new().unwrap();
176        let root = tmp.path();
177        fs::create_dir_all(root.join(".git")).unwrap();
178        fs::write(root.join("CLAUDE.md"), "ROOT").unwrap();
179        let mid = root.join("mid");
180        let leaf = mid.join("leaf");
181        fs::create_dir_all(&leaf).unwrap();
182        fs::write(mid.join("CLAUDE.md"), "MID").unwrap();
183        fs::write(leaf.join("CLAUDE.md"), "LEAF").unwrap();
184
185        let found = walk_ancestors(&leaf, WalkStop::GitRoot, &empty_globset());
186        assert_eq!(found.len(), 3);
187        let bodies: Vec<_> = found
188            .iter()
189            .map(|p| fs::read_to_string(p).unwrap())
190            .collect();
191        assert_eq!(bodies, vec!["ROOT", "MID", "LEAF"]);
192    }
193
194    #[cfg(unix)]
195    #[test]
196    fn walk_dedupes_by_inode_when_symlink_targets_ancestor() {
197        let tmp = TempDir::new().unwrap();
198        let root = tmp.path();
199        fs::create_dir_all(root.join(".git")).unwrap();
200        fs::write(root.join("CLAUDE.md"), "ROOT").unwrap();
201        let sub = root.join("sub");
202        fs::create_dir_all(&sub).unwrap();
203        // Symlink sub/CLAUDE.md → root/CLAUDE.md (same inode).
204        std::os::unix::fs::symlink(root.join("CLAUDE.md"), sub.join("CLAUDE.md")).unwrap();
205
206        let found = walk_ancestors(&sub, WalkStop::GitRoot, &empty_globset());
207        assert_eq!(found.len(), 1, "symlink should be deduped: {found:?}");
208    }
209
210    #[test]
211    fn walk_loads_both_claude_md_and_agents_md_in_same_dir() {
212        let tmp = TempDir::new().unwrap();
213        let root = tmp.path();
214        fs::create_dir_all(root.join(".git")).unwrap();
215        fs::write(root.join("CLAUDE.md"), "C").unwrap();
216        fs::write(root.join("AGENTS.md"), "A").unwrap();
217        fs::write(root.join(".caliban.md"), "K").unwrap();
218
219        let found = walk_ancestors(root, WalkStop::GitRoot, &empty_globset());
220        let names: Vec<_> = found
221            .iter()
222            .map(|p| p.file_name().and_then(|s| s.to_str()).unwrap().to_string())
223            .collect();
224        // After reversal: still the same set; within a dir, order was preserved
225        // (most-specific first). Since the walk only visited one directory,
226        // reversal doesn't change anything.
227        assert_eq!(names, vec![".caliban.md", "CLAUDE.md", "AGENTS.md"]);
228    }
229
230    #[test]
231    fn walk_honors_excludes_relative_to_cwd() {
232        let tmp = TempDir::new().unwrap();
233        let root = tmp.path();
234        fs::create_dir_all(root.join(".git")).unwrap();
235        fs::write(root.join("CLAUDE.md"), "ROOT").unwrap();
236        let vendor = root.join("vendor");
237        fs::create_dir_all(&vendor).unwrap();
238        fs::write(vendor.join("CLAUDE.md"), "VENDOR").unwrap();
239
240        // From vendor, the walk would find vendor/CLAUDE.md + root/CLAUDE.md.
241        // Exclude vendor/* — note the exclude is relative to cwd (= vendor),
242        // so `CLAUDE.md` matches vendor's own file. The root file is
243        // referenced by its absolute path (strip_prefix fails), so the
244        // glob doesn't match it.
245        let g = excludes(&["CLAUDE.md"]);
246        let found = walk_ancestors(&vendor, WalkStop::GitRoot, &g);
247        let names: Vec<_> = found.iter().map(|p| p.display().to_string()).collect();
248        assert!(
249            !names.iter().any(|n| n.ends_with("vendor/CLAUDE.md")),
250            "vendor file should be excluded: {names:?}"
251        );
252    }
253
254    #[test]
255    fn walk_excludes_via_workspace_relative_pattern() {
256        // The "monorepo case": cwd is the root, and a nested CLAUDE.md should
257        // be skipped via a workspace-relative glob like `node_modules/**`.
258        let tmp = TempDir::new().unwrap();
259        let root = tmp.path();
260        fs::create_dir_all(root.join(".git")).unwrap();
261        fs::write(root.join("CLAUDE.md"), "ROOT").unwrap();
262        // Add a nested CLAUDE.md three levels deep but DO NOT start the walk
263        // from it. (Note: walk only goes UP — this test serves as a marker
264        // that nested-on-demand is handled elsewhere; here we just verify the
265        // exclude pattern semantics work for relative-to-cwd matches.)
266        let g = excludes(&["CLAUDE.md"]); // matches the root's CLAUDE.md
267        let found = walk_ancestors(root, WalkStop::GitRoot, &g);
268        assert!(
269            found.is_empty(),
270            "excluded file should be skipped: {found:?}"
271        );
272    }
273
274    #[test]
275    fn walk_stops_at_git_root() {
276        let tmp = TempDir::new().unwrap();
277        let outer = tmp.path();
278        let inner = outer.join("inner");
279        let leaf = inner.join("a").join("b");
280        fs::create_dir_all(&leaf).unwrap();
281        fs::create_dir_all(inner.join(".git")).unwrap();
282        fs::write(inner.join("CLAUDE.md"), "INNER").unwrap();
283        // OUTER has a CLAUDE.md but it's outside the git root — must NOT be loaded.
284        fs::write(outer.join("CLAUDE.md"), "OUTER").unwrap();
285
286        let found = walk_ancestors(&leaf, WalkStop::GitRoot, &empty_globset());
287        assert_eq!(found.len(), 1);
288        let body = fs::read_to_string(&found[0]).unwrap();
289        assert_eq!(body, "INNER");
290    }
291}