Skip to main content

zeph_subagent/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Persistent per-agent memory backed by `MEMORY.md` files on the local filesystem.
5//!
6//! Each sub-agent with a [`MemoryScope`] gets an isolated directory on first spawn.
7//! The first 200 lines of `MEMORY.md` are injected into the system prompt so the agent
8//! can recall information across sessions.
9//!
10//! Security guarantees:
11//! - Directory paths are validated against `AGENT_NAME_RE`
12//!   to prevent path traversal.
13//! - `MEMORY.md` is canonicalized and boundary-checked before reading (symlink escape guard).
14//! - Files larger than 256 KiB or containing null bytes are rejected.
15//! - `<agent-memory>` tags in file content are escaped to prevent prompt injection.
16
17use std::io::Read as _;
18use std::path::{Path, PathBuf};
19use std::sync::LazyLock;
20
21use regex::Regex;
22
23use super::def::{AGENT_NAME_RE, MemoryScope};
24use super::error::SubAgentError;
25
26/// Case-insensitive regex matching any variant of `<agent-memory>` or `</agent-memory>` tags.
27///
28/// Handles uppercase, mixed-case, and whitespace variants to prevent prompt injection bypass.
29static MEMORY_TAG_RE: LazyLock<Regex> =
30    LazyLock::new(|| Regex::new(r"(?i)</?(\s*)agent-memory(\s*)>").unwrap());
31
32/// Maximum allowed size for MEMORY.md (256 KiB — same cap as instruction files).
33const MAX_MEMORY_SIZE: u64 = 256 * 1024;
34
35/// Number of lines to inject from MEMORY.md into the system prompt.
36const MEMORY_INJECT_LINES: usize = 200;
37
38/// Resolve the memory directory path for a given scope and agent name.
39///
40/// Agent name is validated against the same regex enforced in `parse_with_path`.
41/// This prevents path traversal via crafted names (e.g., `../../../etc`).
42///
43/// | Scope | Directory |
44/// |-------|-----------|
45/// | `User` | `~/.zeph/agent-memory/<name>/` |
46/// | `Project` | `.zeph/agent-memory/<name>/` (relative to CWD) |
47/// | `Local` | `.zeph/agent-memory-local/<name>/` (relative to CWD) |
48///
49/// # Errors
50///
51/// Returns [`SubAgentError::Invalid`] if the agent name fails validation.
52/// Returns [`SubAgentError::Memory`] if the home directory is unavailable (`User` scope).
53///
54/// # Examples
55///
56/// ```rust,no_run
57/// use zeph_subagent::memory::resolve_memory_dir;
58/// use zeph_config::MemoryScope;
59///
60/// // Path traversal names are rejected.
61/// assert!(resolve_memory_dir(MemoryScope::Project, "../etc").is_err());
62/// // Valid names produce a usable path (relative to the current working directory).
63/// let path = resolve_memory_dir(MemoryScope::Project, "my-agent").unwrap();
64/// assert!(path.ends_with(".zeph/agent-memory/my-agent"));
65/// ```
66pub fn resolve_memory_dir(scope: MemoryScope, agent_name: &str) -> Result<PathBuf, SubAgentError> {
67    if !AGENT_NAME_RE.is_match(agent_name) {
68        return Err(SubAgentError::Invalid(format!(
69            "agent name '{agent_name}' is not valid for memory directory (must match \
70             ^[a-zA-Z0-9][a-zA-Z0-9_-]{{0,63}}$)"
71        )));
72    }
73
74    let dir = match scope {
75        MemoryScope::User => {
76            let home = dirs::home_dir().ok_or_else(|| SubAgentError::Memory {
77                name: agent_name.to_owned(),
78                reason: "home directory unavailable".to_owned(),
79            })?;
80            home.join(".zeph").join("agent-memory").join(agent_name)
81        }
82        MemoryScope::Project => {
83            let cwd = std::env::current_dir().map_err(|e| SubAgentError::Memory {
84                name: agent_name.to_owned(),
85                reason: format!("cannot determine working directory: {e}"),
86            })?;
87            cwd.join(".zeph").join("agent-memory").join(agent_name)
88        }
89        MemoryScope::Local => {
90            let cwd = std::env::current_dir().map_err(|e| SubAgentError::Memory {
91                name: agent_name.to_owned(),
92                reason: format!("cannot determine working directory: {e}"),
93            })?;
94            cwd.join(".zeph")
95                .join("agent-memory-local")
96                .join(agent_name)
97        }
98        _ => {
99            let home = dirs::home_dir().ok_or_else(|| SubAgentError::Memory {
100                name: agent_name.to_owned(),
101                reason: "home directory unavailable".to_owned(),
102            })?;
103            home.join(".zeph").join("agent-memory").join(agent_name)
104        }
105    };
106    Ok(dir)
107}
108
109/// Ensure the memory directory exists, creating it if necessary.
110///
111/// Returns the absolute path to the directory. Logs at `debug` level when the
112/// directory is newly created.
113///
114/// # Errors
115///
116/// Returns [`SubAgentError::Invalid`] if the agent name is invalid.
117/// Returns [`SubAgentError::Memory`] if the directory cannot be created.
118pub fn ensure_memory_dir(scope: MemoryScope, agent_name: &str) -> Result<PathBuf, SubAgentError> {
119    let dir = resolve_memory_dir(scope, agent_name)?;
120    // create_dir_all is idempotent — no need for a prior exists() check (REV-MED-02).
121    std::fs::create_dir_all(&dir).map_err(|e| SubAgentError::Memory {
122        name: agent_name.to_owned(),
123        reason: format!("cannot create memory directory '{}': {e}", dir.display()),
124    })?;
125    tracing::debug!(
126        agent = agent_name,
127        scope = ?scope,
128        path = %dir.display(),
129        "ensured agent memory directory"
130    );
131
132    // Warn for Local scope if .gitignore likely does not cover the directory.
133    if scope == MemoryScope::Local {
134        check_gitignore_for_local(&dir);
135    }
136
137    Ok(dir)
138}
139
140/// Reads `MEMORY.md` from the given directory and returns the first 200 lines.
141///
142/// Returns `None` if the file does not exist or is empty.
143///
144/// Security:
145/// - Canonicalizes the path and verifies it stays within `dir` (symlink boundary).
146/// - Opens the canonical path after the boundary check (no TOCTOU window).
147/// - Rejects files larger than 256 KiB.
148/// - Rejects files containing null bytes.
149pub fn load_memory_content(dir: &Path) -> Option<String> {
150    let memory_path = dir.join("MEMORY.md");
151
152    // Canonicalize to resolve any symlinks before opening.
153    let canonical = std::fs::canonicalize(&memory_path).ok()?;
154
155    // Boundary check: MEMORY.md must be within the memory directory.
156    // REV-LOW-01: canonicalize dir separately (can't derive from canonical — symlink
157    // target's parent differs from the original dir when symlink escapes boundary).
158    let canonical_dir = std::fs::canonicalize(dir).ok()?;
159    if !canonical.starts_with(&canonical_dir) {
160        tracing::warn!(
161            path = %canonical.display(),
162            boundary = %canonical_dir.display(),
163            "MEMORY.md escapes memory directory boundary via symlink, skipping"
164        );
165        return None;
166    }
167
168    // Open the canonical path — no TOCTOU window for symlink swap after this point.
169    // Read content via the same handle to avoid re-opening (REV-CRIT-01).
170    let mut file = std::fs::File::open(&canonical).ok()?;
171    let meta = file.metadata().ok()?;
172
173    if !meta.is_file() {
174        return None;
175    }
176    if meta.len() > MAX_MEMORY_SIZE {
177        tracing::warn!(
178            path = %canonical.display(),
179            size = meta.len(),
180            limit = MAX_MEMORY_SIZE,
181            "MEMORY.md exceeds 256 KiB size limit, skipping"
182        );
183        return None;
184    }
185
186    let mut content = String::with_capacity(usize::try_from(meta.len()).unwrap_or(0));
187    file.read_to_string(&mut content).ok()?;
188
189    // Security: reject files with null bytes (potential binary or injection attack).
190    if content.contains('\0') {
191        tracing::warn!(
192            path = %canonical.display(),
193            "MEMORY.md contains null bytes, skipping"
194        );
195        return None;
196    }
197
198    if content.trim().is_empty() {
199        return None;
200    }
201
202    // Truncate to the first MEMORY_INJECT_LINES lines without full Vec allocation (REV-MED-01).
203    let mut line_count = 0usize;
204    let mut byte_offset = 0usize;
205    let mut truncated = false;
206    for line in content.lines() {
207        line_count += 1;
208        if line_count > MEMORY_INJECT_LINES {
209            truncated = true;
210            break;
211        }
212        byte_offset += line.len() + 1; // +1 for newline
213    }
214
215    let result = if truncated {
216        let head = content[..byte_offset.min(content.len())].trim_end_matches('\n');
217        format!(
218            "{head}\n\n[... truncated at {MEMORY_INJECT_LINES} lines. \
219             See full file at {}]",
220            dir.join("MEMORY.md").display()
221        )
222    } else {
223        content
224    };
225
226    Some(result)
227}
228
229/// Escape `<agent-memory>` and `</agent-memory>` tags from memory content.
230///
231/// Handles case variations (`</AGENT-MEMORY>`, `</Agent-Memory >`) via case-insensitive
232/// regex. Prevents prompt injection: an agent writing the closing tag to MEMORY.md would
233/// otherwise escape the `<agent-memory>` wrapper and inject arbitrary system prompt text.
234///
235/// Trust model note: MEMORY.md is written by the agent itself, unlike user-written
236/// instruction files. Agent-written content requires stricter escaping.
237#[must_use]
238pub fn escape_memory_content(content: &str) -> String {
239    MEMORY_TAG_RE
240        .replace_all(content, "<\\/$1agent-memory$2>")
241        .into_owned()
242}
243
244/// Check if `.zeph/agent-memory-local/` appears in `.gitignore` and warn if not.
245///
246/// This is best-effort — only checks the project-root `.gitignore`.
247fn check_gitignore_for_local(memory_dir: &Path) {
248    // Walk up to find .gitignore (at most 5 levels up from memory dir).
249    let mut current = memory_dir;
250    for _ in 0..5 {
251        let Some(parent) = current.parent() else {
252            break;
253        };
254        current = parent;
255        let gitignore = current.join(".gitignore");
256        if gitignore.exists() {
257            if std::fs::read_to_string(&gitignore).is_ok_and(|c| c.contains("agent-memory-local")) {
258                return;
259            }
260            tracing::warn!(
261                "local agent memory directory is not in .gitignore — \
262                 sensitive data may be committed. Add '.zeph/agent-memory-local/' to .gitignore"
263            );
264            return;
265        }
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    #![allow(clippy::format_collect)]
272
273    use super::*;
274
275    // ── resolve_memory_dir ────────────────────────────────────────────────────
276
277    #[test]
278    fn resolve_project_scope_returns_correct_path() {
279        let dir = resolve_memory_dir(MemoryScope::Project, "my-agent").unwrap();
280        assert!(dir.ends_with(".zeph/agent-memory/my-agent"));
281    }
282
283    #[test]
284    fn resolve_local_scope_returns_correct_path() {
285        let dir = resolve_memory_dir(MemoryScope::Local, "my-agent").unwrap();
286        assert!(dir.ends_with(".zeph/agent-memory-local/my-agent"));
287    }
288
289    #[test]
290    fn resolve_user_scope_returns_home_path() {
291        if dirs::home_dir().is_none() {
292            return; // Skip in environments without home dir.
293        }
294        let dir = resolve_memory_dir(MemoryScope::User, "my-agent").unwrap();
295        assert!(dir.ends_with(".zeph/agent-memory/my-agent"));
296        assert!(dir.starts_with(dirs::home_dir().unwrap()));
297    }
298
299    #[test]
300    fn resolve_rejects_path_traversal_name() {
301        let err = resolve_memory_dir(MemoryScope::Project, "../etc/passwd").unwrap_err();
302        assert!(matches!(err, SubAgentError::Invalid(_)));
303    }
304
305    #[test]
306    fn resolve_rejects_slash_in_name() {
307        let err = resolve_memory_dir(MemoryScope::Project, "a/b").unwrap_err();
308        assert!(matches!(err, SubAgentError::Invalid(_)));
309    }
310
311    #[test]
312    fn resolve_rejects_empty_name() {
313        let err = resolve_memory_dir(MemoryScope::Project, "").unwrap_err();
314        assert!(matches!(err, SubAgentError::Invalid(_)));
315    }
316
317    #[test]
318    fn resolve_rejects_whitespace_only_name() {
319        let err = resolve_memory_dir(MemoryScope::Project, "   ").unwrap_err();
320        assert!(matches!(err, SubAgentError::Invalid(_)));
321    }
322
323    #[test]
324    fn resolve_accepts_single_char_name() {
325        resolve_memory_dir(MemoryScope::Project, "a").unwrap();
326    }
327
328    #[test]
329    fn resolve_accepts_64_char_name() {
330        let name = "a".repeat(64);
331        resolve_memory_dir(MemoryScope::Project, &name).unwrap();
332    }
333
334    #[test]
335    fn resolve_rejects_65_char_name() {
336        let name = "a".repeat(65);
337        let err = resolve_memory_dir(MemoryScope::Project, &name).unwrap_err();
338        assert!(matches!(err, SubAgentError::Invalid(_)));
339    }
340
341    #[test]
342    fn resolve_rejects_unicode_cyrillic() {
343        // Cyrillic 'а' (U+0430) looks like Latin 'a' but is not ASCII.
344        let err = resolve_memory_dir(MemoryScope::Project, "аgent").unwrap_err();
345        assert!(matches!(err, SubAgentError::Invalid(_)));
346    }
347
348    #[test]
349    fn resolve_rejects_fullwidth_slash() {
350        // Full-width solidus U+FF0F.
351        let err = resolve_memory_dir(MemoryScope::Project, "a\u{FF0F}b").unwrap_err();
352        assert!(matches!(err, SubAgentError::Invalid(_)));
353    }
354
355    // ── ensure_memory_dir ────────────────────────────────────────────────────
356
357    #[test]
358    fn ensure_creates_directory_for_project_scope() {
359        let tmp = tempfile::tempdir().unwrap();
360        let orig_dir = std::env::current_dir().unwrap();
361        std::env::set_current_dir(tmp.path()).unwrap();
362
363        let result = ensure_memory_dir(MemoryScope::Project, "test-agent").unwrap();
364        assert!(result.exists());
365        assert!(result.ends_with(".zeph/agent-memory/test-agent"));
366
367        std::env::set_current_dir(orig_dir).unwrap();
368    }
369
370    #[test]
371    fn ensure_idempotent_when_directory_exists() {
372        let tmp = tempfile::tempdir().unwrap();
373        let orig_dir = std::env::current_dir().unwrap();
374        std::env::set_current_dir(tmp.path()).unwrap();
375
376        let dir1 = ensure_memory_dir(MemoryScope::Project, "idempotent-agent").unwrap();
377        let dir2 = ensure_memory_dir(MemoryScope::Project, "idempotent-agent").unwrap();
378        assert_eq!(dir1, dir2);
379
380        std::env::set_current_dir(orig_dir).unwrap();
381    }
382
383    // ── load_memory_content ───────────────────────────────────────────────────
384
385    #[test]
386    fn load_returns_none_when_no_file() {
387        let tmp = tempfile::tempdir().unwrap();
388        assert!(load_memory_content(tmp.path()).is_none());
389    }
390
391    #[test]
392    fn load_returns_content_when_file_exists() {
393        let tmp = tempfile::tempdir().unwrap();
394        std::fs::write(tmp.path().join("MEMORY.md"), "# Notes\nkey: value\n").unwrap();
395        let content = load_memory_content(tmp.path()).unwrap();
396        assert!(content.contains("key: value"));
397    }
398
399    #[test]
400    fn load_truncates_at_200_lines() {
401        let tmp = tempfile::tempdir().unwrap();
402        let mut lines = String::new();
403        for i in 0..300 {
404            use std::fmt::Write as _;
405            writeln!(&mut lines, "line {i}").unwrap();
406        }
407        std::fs::write(tmp.path().join("MEMORY.md"), &lines).unwrap();
408        let content = load_memory_content(tmp.path()).unwrap();
409        let line_count = content.lines().count();
410        // Truncated content has 200 data lines + 1 truncation marker line.
411        assert!(line_count <= 202, "expected <= 202 lines, got {line_count}");
412        assert!(content.contains("truncated at 200 lines"));
413    }
414
415    #[test]
416    fn load_rejects_null_bytes() {
417        let tmp = tempfile::tempdir().unwrap();
418        std::fs::write(tmp.path().join("MEMORY.md"), "valid\0content").unwrap();
419        assert!(load_memory_content(tmp.path()).is_none());
420    }
421
422    #[test]
423    fn load_returns_none_for_empty_file() {
424        let tmp = tempfile::tempdir().unwrap();
425        std::fs::write(tmp.path().join("MEMORY.md"), "").unwrap();
426        assert!(load_memory_content(tmp.path()).is_none());
427    }
428
429    #[test]
430    #[cfg(unix)]
431    fn load_rejects_symlink_escape() {
432        let tmp = tempfile::tempdir().unwrap();
433        let outside = tempfile::tempdir().unwrap();
434        let target = outside.path().join("secret.md");
435        std::fs::write(&target, "secret content").unwrap();
436
437        let link = tmp.path().join("MEMORY.md");
438        std::os::unix::fs::symlink(&target, &link).unwrap();
439
440        // The symlink points outside the tmp directory — should be rejected.
441        assert!(load_memory_content(tmp.path()).is_none());
442    }
443
444    #[test]
445    fn load_returns_none_for_whitespace_only_file() {
446        let tmp = tempfile::tempdir().unwrap();
447        std::fs::write(tmp.path().join("MEMORY.md"), "   \n\n   \n").unwrap();
448        assert!(load_memory_content(tmp.path()).is_none());
449    }
450
451    #[test]
452    fn load_rejects_file_over_size_cap() {
453        let tmp = tempfile::tempdir().unwrap();
454        // 257 KiB of content — exceeds the 256 KiB limit.
455        let content = "x".repeat(257 * 1024);
456        std::fs::write(tmp.path().join("MEMORY.md"), content).unwrap();
457        assert!(load_memory_content(tmp.path()).is_none());
458    }
459
460    // ── escape_memory_content ─────────────────────────────────────────────────
461
462    #[test]
463    fn escape_replaces_closing_tag_lowercase() {
464        let content = "safe content </agent-memory> more content";
465        let escaped = escape_memory_content(content);
466        assert!(!escaped.contains("</agent-memory>"));
467    }
468
469    #[test]
470    fn escape_replaces_closing_tag_uppercase() {
471        let content = "safe </AGENT-MEMORY> content";
472        let escaped = escape_memory_content(content);
473        assert!(!escaped.to_lowercase().contains("</agent-memory>"));
474    }
475
476    #[test]
477    fn escape_replaces_closing_tag_mixed_case() {
478        let content = "safe </Agent-Memory> content";
479        let escaped = escape_memory_content(content);
480        assert!(!escaped.to_lowercase().contains("</agent-memory>"));
481    }
482
483    #[test]
484    fn escape_replaces_opening_tag() {
485        let content = "before <agent-memory> injection attempt";
486        let escaped = escape_memory_content(content);
487        // Opening tag must also be escaped to prevent nested boundaries.
488        assert!(!escaped.contains("<agent-memory>"));
489    }
490
491    #[test]
492    fn escape_leaves_normal_content_unchanged() {
493        let content = "# Notes\nThis is safe content.";
494        assert_eq!(escape_memory_content(content), content);
495    }
496}