Skip to main content

zeph_subagent/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Persistent per-agent memory backed by `MEMORY.md` files on the local filesystem.
5//!
6//! Each sub-agent with a [`MemoryScope`] gets an isolated directory on first spawn.
7//! The first 200 lines of `MEMORY.md` are injected into the system prompt so the agent
8//! can recall information across sessions.
9//!
10//! Security guarantees:
11//! - Directory paths are validated against `AGENT_NAME_RE`
12//!   to prevent path traversal.
13//! - `MEMORY.md` is canonicalized and boundary-checked before reading (symlink escape guard).
14//! - Files larger than 256 KiB or containing null bytes are rejected.
15//! - `<agent-memory>` tags in file content are escaped to prevent prompt injection.
16
17use std::io::Read as _;
18use std::path::{Path, PathBuf};
19use std::sync::LazyLock;
20
21use regex::Regex;
22
23use super::def::{AGENT_NAME_RE, MemoryScope};
24use super::error::SubAgentError;
25
26/// Case-insensitive regex matching any variant of `<agent-memory>` or `</agent-memory>` tags.
27///
28/// Handles uppercase, mixed-case, and whitespace variants to prevent prompt injection bypass.
29static MEMORY_TAG_RE: LazyLock<Regex> =
30    LazyLock::new(|| Regex::new(r"(?i)</?(\s*)agent-memory(\s*)>").unwrap());
31
32/// Maximum allowed size for MEMORY.md (256 KiB — same cap as instruction files).
33const MAX_MEMORY_SIZE: u64 = 256 * 1024;
34
35/// Number of lines to inject from MEMORY.md into the system prompt.
36const MEMORY_INJECT_LINES: usize = 200;
37
38/// Resolve the memory directory path for a given scope and agent name.
39///
40/// Agent name is validated against the same regex enforced in `parse_with_path`.
41/// This prevents path traversal via crafted names (e.g., `../../../etc`).
42///
43/// | Scope | Directory |
44/// |-------|-----------|
45/// | `User` | `~/.zeph/agent-memory/<name>/` |
46/// | `Project` | `.zeph/agent-memory/<name>/` (relative to CWD) |
47/// | `Local` | `.zeph/agent-memory-local/<name>/` (relative to CWD) |
48///
49/// # Errors
50///
51/// Returns [`SubAgentError::Invalid`] if the agent name fails validation.
52/// Returns [`SubAgentError::Memory`] if the home directory is unavailable (`User` scope).
53///
54/// # Examples
55///
56/// ```rust,no_run
57/// use zeph_subagent::memory::resolve_memory_dir;
58/// use zeph_config::MemoryScope;
59///
60/// // Path traversal names are rejected.
61/// assert!(resolve_memory_dir(MemoryScope::Project, "../etc").is_err());
62/// // Valid names produce a usable path (relative to the current working directory).
63/// let path = resolve_memory_dir(MemoryScope::Project, "my-agent").unwrap();
64/// assert!(path.ends_with(".zeph/agent-memory/my-agent"));
65/// ```
66pub fn resolve_memory_dir(scope: MemoryScope, agent_name: &str) -> Result<PathBuf, SubAgentError> {
67    if !AGENT_NAME_RE.is_match(agent_name) {
68        return Err(SubAgentError::Invalid(format!(
69            "agent name '{agent_name}' is not valid for memory directory (must match \
70             ^[a-zA-Z0-9][a-zA-Z0-9_-]{{0,63}}$)"
71        )));
72    }
73
74    let dir = match scope {
75        MemoryScope::User => {
76            let home = dirs::home_dir().ok_or_else(|| SubAgentError::Memory {
77                name: agent_name.to_owned(),
78                reason: "home directory unavailable".to_owned(),
79            })?;
80            home.join(".zeph").join("agent-memory").join(agent_name)
81        }
82        MemoryScope::Project => {
83            let cwd = std::env::current_dir().map_err(|e| SubAgentError::Memory {
84                name: agent_name.to_owned(),
85                reason: format!("cannot determine working directory: {e}"),
86            })?;
87            cwd.join(".zeph").join("agent-memory").join(agent_name)
88        }
89        MemoryScope::Local => {
90            let cwd = std::env::current_dir().map_err(|e| SubAgentError::Memory {
91                name: agent_name.to_owned(),
92                reason: format!("cannot determine working directory: {e}"),
93            })?;
94            cwd.join(".zeph")
95                .join("agent-memory-local")
96                .join(agent_name)
97        }
98    };
99    Ok(dir)
100}
101
102/// Ensure the memory directory exists, creating it if necessary.
103///
104/// Returns the absolute path to the directory. Logs at `debug` level when the
105/// directory is newly created.
106///
107/// # Errors
108///
109/// Returns [`SubAgentError::Invalid`] if the agent name is invalid.
110/// Returns [`SubAgentError::Memory`] if the directory cannot be created.
111pub fn ensure_memory_dir(scope: MemoryScope, agent_name: &str) -> Result<PathBuf, SubAgentError> {
112    let dir = resolve_memory_dir(scope, agent_name)?;
113    // create_dir_all is idempotent — no need for a prior exists() check (REV-MED-02).
114    std::fs::create_dir_all(&dir).map_err(|e| SubAgentError::Memory {
115        name: agent_name.to_owned(),
116        reason: format!("cannot create memory directory '{}': {e}", dir.display()),
117    })?;
118    tracing::debug!(
119        agent = agent_name,
120        scope = ?scope,
121        path = %dir.display(),
122        "ensured agent memory directory"
123    );
124
125    // Warn for Local scope if .gitignore likely does not cover the directory.
126    if scope == MemoryScope::Local {
127        check_gitignore_for_local(&dir);
128    }
129
130    Ok(dir)
131}
132
133/// Reads `MEMORY.md` from the given directory and returns the first 200 lines.
134///
135/// Returns `None` if the file does not exist or is empty.
136///
137/// Security:
138/// - Canonicalizes the path and verifies it stays within `dir` (symlink boundary).
139/// - Opens the canonical path after the boundary check (no TOCTOU window).
140/// - Rejects files larger than 256 KiB.
141/// - Rejects files containing null bytes.
142pub fn load_memory_content(dir: &Path) -> Option<String> {
143    let memory_path = dir.join("MEMORY.md");
144
145    // Canonicalize to resolve any symlinks before opening.
146    let canonical = std::fs::canonicalize(&memory_path).ok()?;
147
148    // Boundary check: MEMORY.md must be within the memory directory.
149    // REV-LOW-01: canonicalize dir separately (can't derive from canonical — symlink
150    // target's parent differs from the original dir when symlink escapes boundary).
151    let canonical_dir = std::fs::canonicalize(dir).ok()?;
152    if !canonical.starts_with(&canonical_dir) {
153        tracing::warn!(
154            path = %canonical.display(),
155            boundary = %canonical_dir.display(),
156            "MEMORY.md escapes memory directory boundary via symlink, skipping"
157        );
158        return None;
159    }
160
161    // Open the canonical path — no TOCTOU window for symlink swap after this point.
162    // Read content via the same handle to avoid re-opening (REV-CRIT-01).
163    let mut file = std::fs::File::open(&canonical).ok()?;
164    let meta = file.metadata().ok()?;
165
166    if !meta.is_file() {
167        return None;
168    }
169    if meta.len() > MAX_MEMORY_SIZE {
170        tracing::warn!(
171            path = %canonical.display(),
172            size = meta.len(),
173            limit = MAX_MEMORY_SIZE,
174            "MEMORY.md exceeds 256 KiB size limit, skipping"
175        );
176        return None;
177    }
178
179    let mut content = String::with_capacity(usize::try_from(meta.len()).unwrap_or(0));
180    file.read_to_string(&mut content).ok()?;
181
182    // Security: reject files with null bytes (potential binary or injection attack).
183    if content.contains('\0') {
184        tracing::warn!(
185            path = %canonical.display(),
186            "MEMORY.md contains null bytes, skipping"
187        );
188        return None;
189    }
190
191    if content.trim().is_empty() {
192        return None;
193    }
194
195    // Truncate to the first MEMORY_INJECT_LINES lines without full Vec allocation (REV-MED-01).
196    let mut line_count = 0usize;
197    let mut byte_offset = 0usize;
198    let mut truncated = false;
199    for line in content.lines() {
200        line_count += 1;
201        if line_count > MEMORY_INJECT_LINES {
202            truncated = true;
203            break;
204        }
205        byte_offset += line.len() + 1; // +1 for newline
206    }
207
208    let result = if truncated {
209        let head = content[..byte_offset.min(content.len())].trim_end_matches('\n');
210        format!(
211            "{head}\n\n[... truncated at {MEMORY_INJECT_LINES} lines. \
212             See full file at {}]",
213            dir.join("MEMORY.md").display()
214        )
215    } else {
216        content
217    };
218
219    Some(result)
220}
221
222/// Escape `<agent-memory>` and `</agent-memory>` tags from memory content.
223///
224/// Handles case variations (`</AGENT-MEMORY>`, `</Agent-Memory >`) via case-insensitive
225/// regex. Prevents prompt injection: an agent writing the closing tag to MEMORY.md would
226/// otherwise escape the `<agent-memory>` wrapper and inject arbitrary system prompt text.
227///
228/// Trust model note: MEMORY.md is written by the agent itself, unlike user-written
229/// instruction files. Agent-written content requires stricter escaping.
230#[must_use]
231pub fn escape_memory_content(content: &str) -> String {
232    MEMORY_TAG_RE
233        .replace_all(content, "<\\/$1agent-memory$2>")
234        .into_owned()
235}
236
237/// Check if `.zeph/agent-memory-local/` appears in `.gitignore` and warn if not.
238///
239/// This is best-effort — only checks the project-root `.gitignore`.
240fn check_gitignore_for_local(memory_dir: &Path) {
241    // Walk up to find .gitignore (at most 5 levels up from memory dir).
242    let mut current = memory_dir;
243    for _ in 0..5 {
244        let Some(parent) = current.parent() else {
245            break;
246        };
247        current = parent;
248        let gitignore = current.join(".gitignore");
249        if gitignore.exists() {
250            if std::fs::read_to_string(&gitignore).is_ok_and(|c| c.contains("agent-memory-local")) {
251                return;
252            }
253            tracing::warn!(
254                "local agent memory directory is not in .gitignore — \
255                 sensitive data may be committed. Add '.zeph/agent-memory-local/' to .gitignore"
256            );
257            return;
258        }
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    #![allow(clippy::format_collect)]
265
266    use super::*;
267
268    // ── resolve_memory_dir ────────────────────────────────────────────────────
269
270    #[test]
271    fn resolve_project_scope_returns_correct_path() {
272        let dir = resolve_memory_dir(MemoryScope::Project, "my-agent").unwrap();
273        assert!(dir.ends_with(".zeph/agent-memory/my-agent"));
274    }
275
276    #[test]
277    fn resolve_local_scope_returns_correct_path() {
278        let dir = resolve_memory_dir(MemoryScope::Local, "my-agent").unwrap();
279        assert!(dir.ends_with(".zeph/agent-memory-local/my-agent"));
280    }
281
282    #[test]
283    fn resolve_user_scope_returns_home_path() {
284        if dirs::home_dir().is_none() {
285            return; // Skip in environments without home dir.
286        }
287        let dir = resolve_memory_dir(MemoryScope::User, "my-agent").unwrap();
288        assert!(dir.ends_with(".zeph/agent-memory/my-agent"));
289        assert!(dir.starts_with(dirs::home_dir().unwrap()));
290    }
291
292    #[test]
293    fn resolve_rejects_path_traversal_name() {
294        let err = resolve_memory_dir(MemoryScope::Project, "../etc/passwd").unwrap_err();
295        assert!(matches!(err, SubAgentError::Invalid(_)));
296    }
297
298    #[test]
299    fn resolve_rejects_slash_in_name() {
300        let err = resolve_memory_dir(MemoryScope::Project, "a/b").unwrap_err();
301        assert!(matches!(err, SubAgentError::Invalid(_)));
302    }
303
304    #[test]
305    fn resolve_rejects_empty_name() {
306        let err = resolve_memory_dir(MemoryScope::Project, "").unwrap_err();
307        assert!(matches!(err, SubAgentError::Invalid(_)));
308    }
309
310    #[test]
311    fn resolve_rejects_whitespace_only_name() {
312        let err = resolve_memory_dir(MemoryScope::Project, "   ").unwrap_err();
313        assert!(matches!(err, SubAgentError::Invalid(_)));
314    }
315
316    #[test]
317    fn resolve_accepts_single_char_name() {
318        resolve_memory_dir(MemoryScope::Project, "a").unwrap();
319    }
320
321    #[test]
322    fn resolve_accepts_64_char_name() {
323        let name = "a".repeat(64);
324        resolve_memory_dir(MemoryScope::Project, &name).unwrap();
325    }
326
327    #[test]
328    fn resolve_rejects_65_char_name() {
329        let name = "a".repeat(65);
330        let err = resolve_memory_dir(MemoryScope::Project, &name).unwrap_err();
331        assert!(matches!(err, SubAgentError::Invalid(_)));
332    }
333
334    #[test]
335    fn resolve_rejects_unicode_cyrillic() {
336        // Cyrillic 'а' (U+0430) looks like Latin 'a' but is not ASCII.
337        let err = resolve_memory_dir(MemoryScope::Project, "аgent").unwrap_err();
338        assert!(matches!(err, SubAgentError::Invalid(_)));
339    }
340
341    #[test]
342    fn resolve_rejects_fullwidth_slash() {
343        // Full-width solidus U+FF0F.
344        let err = resolve_memory_dir(MemoryScope::Project, "a\u{FF0F}b").unwrap_err();
345        assert!(matches!(err, SubAgentError::Invalid(_)));
346    }
347
348    // ── ensure_memory_dir ────────────────────────────────────────────────────
349
350    #[test]
351    fn ensure_creates_directory_for_project_scope() {
352        let tmp = tempfile::tempdir().unwrap();
353        let orig_dir = std::env::current_dir().unwrap();
354        std::env::set_current_dir(tmp.path()).unwrap();
355
356        let result = ensure_memory_dir(MemoryScope::Project, "test-agent").unwrap();
357        assert!(result.exists());
358        assert!(result.ends_with(".zeph/agent-memory/test-agent"));
359
360        std::env::set_current_dir(orig_dir).unwrap();
361    }
362
363    #[test]
364    fn ensure_idempotent_when_directory_exists() {
365        let tmp = tempfile::tempdir().unwrap();
366        let orig_dir = std::env::current_dir().unwrap();
367        std::env::set_current_dir(tmp.path()).unwrap();
368
369        let dir1 = ensure_memory_dir(MemoryScope::Project, "idempotent-agent").unwrap();
370        let dir2 = ensure_memory_dir(MemoryScope::Project, "idempotent-agent").unwrap();
371        assert_eq!(dir1, dir2);
372
373        std::env::set_current_dir(orig_dir).unwrap();
374    }
375
376    // ── load_memory_content ───────────────────────────────────────────────────
377
378    #[test]
379    fn load_returns_none_when_no_file() {
380        let tmp = tempfile::tempdir().unwrap();
381        assert!(load_memory_content(tmp.path()).is_none());
382    }
383
384    #[test]
385    fn load_returns_content_when_file_exists() {
386        let tmp = tempfile::tempdir().unwrap();
387        std::fs::write(tmp.path().join("MEMORY.md"), "# Notes\nkey: value\n").unwrap();
388        let content = load_memory_content(tmp.path()).unwrap();
389        assert!(content.contains("key: value"));
390    }
391
392    #[test]
393    fn load_truncates_at_200_lines() {
394        let tmp = tempfile::tempdir().unwrap();
395        let mut lines = String::new();
396        for i in 0..300 {
397            use std::fmt::Write as _;
398            writeln!(&mut lines, "line {i}").unwrap();
399        }
400        std::fs::write(tmp.path().join("MEMORY.md"), &lines).unwrap();
401        let content = load_memory_content(tmp.path()).unwrap();
402        let line_count = content.lines().count();
403        // Truncated content has 200 data lines + 1 truncation marker line.
404        assert!(line_count <= 202, "expected <= 202 lines, got {line_count}");
405        assert!(content.contains("truncated at 200 lines"));
406    }
407
408    #[test]
409    fn load_rejects_null_bytes() {
410        let tmp = tempfile::tempdir().unwrap();
411        std::fs::write(tmp.path().join("MEMORY.md"), "valid\0content").unwrap();
412        assert!(load_memory_content(tmp.path()).is_none());
413    }
414
415    #[test]
416    fn load_returns_none_for_empty_file() {
417        let tmp = tempfile::tempdir().unwrap();
418        std::fs::write(tmp.path().join("MEMORY.md"), "").unwrap();
419        assert!(load_memory_content(tmp.path()).is_none());
420    }
421
422    #[test]
423    #[cfg(unix)]
424    fn load_rejects_symlink_escape() {
425        let tmp = tempfile::tempdir().unwrap();
426        let outside = tempfile::tempdir().unwrap();
427        let target = outside.path().join("secret.md");
428        std::fs::write(&target, "secret content").unwrap();
429
430        let link = tmp.path().join("MEMORY.md");
431        std::os::unix::fs::symlink(&target, &link).unwrap();
432
433        // The symlink points outside the tmp directory — should be rejected.
434        assert!(load_memory_content(tmp.path()).is_none());
435    }
436
437    #[test]
438    fn load_returns_none_for_whitespace_only_file() {
439        let tmp = tempfile::tempdir().unwrap();
440        std::fs::write(tmp.path().join("MEMORY.md"), "   \n\n   \n").unwrap();
441        assert!(load_memory_content(tmp.path()).is_none());
442    }
443
444    #[test]
445    fn load_rejects_file_over_size_cap() {
446        let tmp = tempfile::tempdir().unwrap();
447        // 257 KiB of content — exceeds the 256 KiB limit.
448        let content = "x".repeat(257 * 1024);
449        std::fs::write(tmp.path().join("MEMORY.md"), content).unwrap();
450        assert!(load_memory_content(tmp.path()).is_none());
451    }
452
453    // ── escape_memory_content ─────────────────────────────────────────────────
454
455    #[test]
456    fn escape_replaces_closing_tag_lowercase() {
457        let content = "safe content </agent-memory> more content";
458        let escaped = escape_memory_content(content);
459        assert!(!escaped.contains("</agent-memory>"));
460    }
461
462    #[test]
463    fn escape_replaces_closing_tag_uppercase() {
464        let content = "safe </AGENT-MEMORY> content";
465        let escaped = escape_memory_content(content);
466        assert!(!escaped.to_lowercase().contains("</agent-memory>"));
467    }
468
469    #[test]
470    fn escape_replaces_closing_tag_mixed_case() {
471        let content = "safe </Agent-Memory> content";
472        let escaped = escape_memory_content(content);
473        assert!(!escaped.to_lowercase().contains("</agent-memory>"));
474    }
475
476    #[test]
477    fn escape_replaces_opening_tag() {
478        let content = "before <agent-memory> injection attempt";
479        let escaped = escape_memory_content(content);
480        // Opening tag must also be escaped to prevent nested boundaries.
481        assert!(!escaped.contains("<agent-memory>"));
482    }
483
484    #[test]
485    fn escape_leaves_normal_content_unchanged() {
486        let content = "# Notes\nThis is safe content.";
487        assert_eq!(escape_memory_content(content), content);
488    }
489}