Skip to main content

mermaid_cli/app/
instructions.rs

1//! MERMAID.md project-instructions loader (Step 5h).
2//!
3//! On session start, walk UP from the current working directory looking
4//! for `MERMAID.md`. Stop at the git root (any directory containing a
5//! `.git` entry) or at `$HOME`, whichever is reached first. Load the
6//! nearest file (single file wins — no merging up the tree); cap at
7//! `MAX_INSTRUCTIONS_BYTES`; pass the content to the model as a dynamic
8//! suffix on the system prompt.
9//!
10//! Auto-reload: before every model call, `refresh()` stats the loaded
11//! file's path and compares mtime. If the mtime moved, re-read; if the
12//! file is gone, drop the instructions. One stat per turn is
13//! microseconds — no need for a filesystem watcher.
14
15use std::path::{Path, PathBuf};
16use std::time::SystemTime;
17
18use crate::constants::{INSTRUCTIONS_TRUNCATION_MARKER, MAX_INSTRUCTIONS_BYTES};
19
20/// Filename Mermaid looks for. Single canonical name in Step 5h —
21/// alternatives like `AGENTS.md` / `CLAUDE.md` deferred.
22const INSTRUCTIONS_FILENAME: &str = "MERMAID.md";
23
24/// Hard cap on how many directory levels `find_mermaid_md` walks up
25/// before giving up. Guards against pathological symlink loops.
26const MAX_WALK_DEPTH: usize = 32;
27
28/// One-shot snapshot of the loaded MERMAID.md. Stored on `App` and
29/// `NonInteractiveRunner` so the per-turn auto-reload check has
30/// something to compare against.
31#[derive(Debug, Clone)]
32pub struct LoadedInstructions {
33    /// Absolute path the content was read from.
34    pub path: PathBuf,
35    /// File body, possibly truncated. The truncation marker is
36    /// appended in-place so the model sees the elision.
37    pub content: String,
38    /// mtime at last read — compared against the next `stat()` to
39    /// decide whether to re-read.
40    pub mtime: SystemTime,
41    /// Original file size on disk (before any truncation).
42    pub byte_len: usize,
43    /// True when the file was larger than `MAX_INSTRUCTIONS_BYTES`
44    /// and the content was clipped + marker appended.
45    pub truncated: bool,
46}
47
48impl LoadedInstructions {
49    /// Approximate token count for status messages. ~4 chars/token is
50    /// the rule of thumb that's correct enough for user-facing display.
51    pub fn approx_tokens(&self) -> usize {
52        self.content.len() / 4
53    }
54}
55
56/// Outcome of a `refresh()` call. Used to decide whether to emit a
57/// status line so the user knows their context shifted.
58#[derive(Debug, PartialEq, Eq)]
59pub enum ReloadOutcome {
60    /// File still has the same mtime (or was/still is absent).
61    Unchanged,
62    /// File was loaded for the first time this session — handles "user
63    /// created MERMAID.md mid-session" gracefully.
64    LoadedFirst { tokens: usize },
65    /// File content changed since the last read.
66    Reloaded {
67        old_tokens: usize,
68        new_tokens: usize,
69    },
70    /// File was previously loaded but has been deleted from disk.
71    Removed,
72}
73
74/// Walk UP from `start` looking for `MERMAID.md`. Stops at the first of:
75/// - a directory containing `.git` (the git root)
76/// - `$HOME` (don't search above the user's home)
77/// - filesystem root
78/// - `MAX_WALK_DEPTH` levels (symlink-loop guard)
79///
80/// Returns the absolute path of the first MERMAID.md found, or `None`
81/// if no MERMAID.md exists in the bounded walk.
82pub fn find_mermaid_md(start: &Path) -> Option<PathBuf> {
83    let home = std::env::var_os("HOME").map(PathBuf::from);
84    let mut current = start.to_path_buf();
85    for _ in 0..MAX_WALK_DEPTH {
86        // Check this directory for MERMAID.md.
87        let candidate = current.join(INSTRUCTIONS_FILENAME);
88        if candidate.is_file() {
89            return Some(candidate);
90        }
91        // Stop at the git root (the .git entry itself ends the walk;
92        // most projects vendor a single MERMAID.md at the repo root).
93        if current.join(".git").exists() {
94            return None;
95        }
96        // Stop at $HOME — don't search the user's home directory or
97        // anything above it. Avoids accidentally picking up a
98        // long-forgotten MERMAID.md from a sibling project.
99        if let Some(ref h) = home
100            && current == *h
101        {
102            return None;
103        }
104        // Move up one level. If we're at the filesystem root, stop.
105        match current.parent() {
106            Some(parent) if parent != current => current = parent.to_path_buf(),
107            _ => return None,
108        }
109    }
110    None
111}
112
113/// Read the file at `path`, truncate to `MAX_INSTRUCTIONS_BYTES` if
114/// oversized, and return a `LoadedInstructions`. Returns `None` if the
115/// file can't be read or doesn't exist.
116pub fn load_from_path(path: &Path) -> Option<LoadedInstructions> {
117    let metadata = std::fs::metadata(path).ok()?;
118    let mtime = metadata.modified().ok()?;
119    let raw = std::fs::read_to_string(path).ok()?;
120    let byte_len = raw.len();
121    let (content, truncated) = if byte_len > MAX_INSTRUCTIONS_BYTES {
122        // Char-boundary-safe truncation. `floor_char_boundary` stabilized
123        // in Rust 1.91.0 — matches the crate MSRV pinned in `Cargo.toml`.
124        let cut = raw.floor_char_boundary(MAX_INSTRUCTIONS_BYTES);
125        let mut clipped = raw[..cut].to_string();
126        clipped.push_str(INSTRUCTIONS_TRUNCATION_MARKER);
127        (clipped, true)
128    } else {
129        (raw, false)
130    };
131    Some(LoadedInstructions {
132        path: path.to_path_buf(),
133        content,
134        mtime,
135        byte_len,
136        truncated,
137    })
138}
139
140/// Per-turn auto-reload check. Compares the previously-loaded mtime to
141/// the current mtime on disk; reloads only when they differ. The hot
142/// path (file unchanged) is one `stat()` syscall — no I/O.
143///
144/// `cwd` is used to re-discover MERMAID.md when `current` is `None`
145/// (handles "user created the file mid-session" by re-running the walk).
146pub fn refresh(
147    current: Option<LoadedInstructions>,
148    cwd: &Path,
149) -> (Option<LoadedInstructions>, ReloadOutcome) {
150    match current {
151        Some(prior) => {
152            // Stat the previously-loaded path to detect edits or removal.
153            let metadata = std::fs::metadata(&prior.path);
154            match metadata.and_then(|m| m.modified()) {
155                Ok(new_mtime) if new_mtime == prior.mtime => {
156                    // Hot path: no change.
157                    (Some(prior), ReloadOutcome::Unchanged)
158                },
159                Ok(_) => {
160                    // mtime changed — re-read.
161                    let old_tokens = prior.approx_tokens();
162                    let path = prior.path.clone();
163                    match load_from_path(&path) {
164                        Some(reloaded) => {
165                            let new_tokens = reloaded.approx_tokens();
166                            (
167                                Some(reloaded),
168                                ReloadOutcome::Reloaded {
169                                    old_tokens,
170                                    new_tokens,
171                                },
172                            )
173                        },
174                        None => {
175                            // mtime moved but read failed (race or
176                            // permission) — treat as removed for safety.
177                            (None, ReloadOutcome::Removed)
178                        },
179                    }
180                },
181                Err(_) => {
182                    // File is gone or no longer accessible.
183                    (None, ReloadOutcome::Removed)
184                },
185            }
186        },
187        None => {
188            // No prior load — re-walk in case the user created
189            // MERMAID.md after session start.
190            match find_mermaid_md(cwd).and_then(|p| load_from_path(&p)) {
191                Some(loaded) => {
192                    let tokens = loaded.approx_tokens();
193                    (Some(loaded), ReloadOutcome::LoadedFirst { tokens })
194                },
195                None => (None, ReloadOutcome::Unchanged),
196            }
197        },
198    }
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use std::fs;
205    use std::sync::Mutex;
206
207    /// Tests touch the filesystem; serialize them so concurrent test
208    /// runs don't see each other's temp files.
209    static FS_LOCK: Mutex<()> = Mutex::new(());
210
211    fn temp_dir(name: &str) -> PathBuf {
212        let p = std::env::temp_dir().join(format!("mermaid_instructions_test_{}", name));
213        let _ = fs::remove_dir_all(&p);
214        fs::create_dir_all(&p).expect("create temp dir");
215        p
216    }
217
218    #[test]
219    fn find_mermaid_md_finds_in_cwd() {
220        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
221        let dir = temp_dir("cwd");
222        fs::write(dir.join("MERMAID.md"), "rules").unwrap();
223        let found = find_mermaid_md(&dir).expect("should find");
224        assert_eq!(found, dir.join("MERMAID.md"));
225        let _ = fs::remove_dir_all(&dir);
226    }
227
228    #[test]
229    fn find_mermaid_md_walks_up_to_git_root() {
230        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
231        let root = temp_dir("walkup");
232        fs::create_dir(root.join(".git")).unwrap();
233        fs::write(root.join("MERMAID.md"), "root rules").unwrap();
234        let sub = root.join("subdir/deeper");
235        fs::create_dir_all(&sub).unwrap();
236        let found = find_mermaid_md(&sub).expect("should walk up");
237        assert_eq!(found, root.join("MERMAID.md"));
238        let _ = fs::remove_dir_all(&root);
239    }
240
241    #[test]
242    fn find_mermaid_md_stops_at_git_root_without_file() {
243        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
244        let root = temp_dir("git_no_md");
245        fs::create_dir(root.join(".git")).unwrap();
246        // Place a MERMAID.md ABOVE the git root — should NOT be found
247        // because the walk stops at the .git boundary.
248        let parent = root.parent().unwrap();
249        let above_md = parent.join("MERMAID.md");
250        fs::write(&above_md, "outside").unwrap();
251        let sub = root.join("subdir");
252        fs::create_dir_all(&sub).unwrap();
253        let found = find_mermaid_md(&sub);
254        assert!(found.is_none(), "walk must stop at .git boundary");
255        let _ = fs::remove_dir_all(&root);
256        let _ = fs::remove_file(&above_md);
257    }
258
259    #[test]
260    fn find_mermaid_md_returns_none_if_absent() {
261        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
262        let dir = temp_dir("absent");
263        // No MERMAID.md anywhere — but also no .git, so the walk
264        // continues all the way up. As long as nothing UP the tree
265        // happens to have MERMAID.md, this returns None. To make the
266        // test deterministic, plant a .git so the walk stops here.
267        fs::create_dir(dir.join(".git")).unwrap();
268        let found = find_mermaid_md(&dir);
269        assert!(found.is_none());
270        let _ = fs::remove_dir_all(&dir);
271    }
272
273    #[test]
274    fn load_from_path_truncates_oversized_file() {
275        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
276        let dir = temp_dir("oversized");
277        let path = dir.join("MERMAID.md");
278        // Write 50 KB — over the 40 KB cap.
279        let big = "a".repeat(50_000);
280        fs::write(&path, &big).unwrap();
281        let loaded = load_from_path(&path).expect("load");
282        assert!(loaded.truncated);
283        assert_eq!(loaded.byte_len, 50_000); // original size preserved
284        assert!(loaded.content.ends_with(INSTRUCTIONS_TRUNCATION_MARKER));
285        // Content should be exactly cap + marker length.
286        assert_eq!(
287            loaded.content.len(),
288            MAX_INSTRUCTIONS_BYTES + INSTRUCTIONS_TRUNCATION_MARKER.len()
289        );
290        let _ = fs::remove_dir_all(&dir);
291    }
292
293    #[test]
294    fn load_from_path_returns_none_when_missing() {
295        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
296        let dir = temp_dir("missing");
297        assert!(load_from_path(&dir.join("nope.md")).is_none());
298        let _ = fs::remove_dir_all(&dir);
299    }
300
301    #[test]
302    fn refresh_returns_unchanged_when_mtime_stable() {
303        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
304        let dir = temp_dir("stable");
305        let path = dir.join("MERMAID.md");
306        fs::write(&path, "v1").unwrap();
307        let prior = load_from_path(&path).unwrap();
308        let (after, outcome) = refresh(Some(prior.clone()), &dir);
309        assert_eq!(outcome, ReloadOutcome::Unchanged);
310        assert!(after.is_some());
311        let _ = fs::remove_dir_all(&dir);
312    }
313
314    #[test]
315    fn refresh_returns_reloaded_on_content_change() {
316        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
317        let dir = temp_dir("changed");
318        let path = dir.join("MERMAID.md");
319        fs::write(&path, "v1").unwrap();
320        let prior = load_from_path(&path).unwrap();
321        // Sleep briefly to ensure mtime resolution registers a change.
322        // Most filesystems track mtime at second granularity or finer.
323        std::thread::sleep(std::time::Duration::from_millis(1100));
324        fs::write(&path, "v2 longer content here").unwrap();
325        let (after, outcome) = refresh(Some(prior), &dir);
326        assert!(matches!(outcome, ReloadOutcome::Reloaded { .. }));
327        assert_eq!(after.unwrap().content, "v2 longer content here");
328        let _ = fs::remove_dir_all(&dir);
329    }
330
331    #[test]
332    fn refresh_returns_removed_when_file_deleted() {
333        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
334        let dir = temp_dir("removed");
335        let path = dir.join("MERMAID.md");
336        fs::write(&path, "v1").unwrap();
337        let prior = load_from_path(&path).unwrap();
338        fs::remove_file(&path).unwrap();
339        let (after, outcome) = refresh(Some(prior), &dir);
340        assert_eq!(outcome, ReloadOutcome::Removed);
341        assert!(after.is_none());
342        let _ = fs::remove_dir_all(&dir);
343    }
344
345    #[test]
346    fn refresh_returns_loaded_first_on_initial_discovery() {
347        let _lock = FS_LOCK.lock().unwrap_or_else(|e| e.into_inner());
348        let dir = temp_dir("first");
349        // Plant .git so the walk stays inside `dir`.
350        fs::create_dir(dir.join(".git")).unwrap();
351        // No prior load. Call refresh — should discover the new file.
352        fs::write(dir.join("MERMAID.md"), "fresh").unwrap();
353        let (after, outcome) = refresh(None, &dir);
354        assert!(matches!(outcome, ReloadOutcome::LoadedFirst { .. }));
355        assert_eq!(after.unwrap().content, "fresh");
356        let _ = fs::remove_dir_all(&dir);
357    }
358}