Skip to main content

zeph_subagent/
memory.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use std::io::Read as _;
5use std::path::{Path, PathBuf};
6use std::sync::LazyLock;
7
8use regex::Regex;
9
10use super::def::{AGENT_NAME_RE, MemoryScope};
11use super::error::SubAgentError;
12
13/// Case-insensitive regex matching any variant of `<agent-memory>` or `</agent-memory>` tags.
14///
15/// Handles uppercase, mixed-case, and whitespace variants to prevent prompt injection bypass.
16static MEMORY_TAG_RE: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"(?i)</?(\s*)agent-memory(\s*)>").unwrap());
18
19/// Maximum allowed size for MEMORY.md (256 KiB — same cap as instruction files).
20const MAX_MEMORY_SIZE: u64 = 256 * 1024;
21
22/// Number of lines to inject from MEMORY.md into the system prompt.
23const MEMORY_INJECT_LINES: usize = 200;
24
25/// Resolve the memory directory path for a given scope and agent name.
26///
27/// Agent name is validated against the same regex enforced in `parse_with_path`.
28/// This prevents path traversal via crafted names (e.g., `../../../etc`).
29///
30/// # Errors
31///
32/// Returns [`SubAgentError::Invalid`] if the agent name fails validation.
33/// Returns [`SubAgentError::Memory`] if the home directory is unavailable (`User` scope).
34pub fn resolve_memory_dir(scope: MemoryScope, agent_name: &str) -> Result<PathBuf, SubAgentError> {
35    if !AGENT_NAME_RE.is_match(agent_name) {
36        return Err(SubAgentError::Invalid(format!(
37            "agent name '{agent_name}' is not valid for memory directory (must match \
38             ^[a-zA-Z0-9][a-zA-Z0-9_-]{{0,63}}$)"
39        )));
40    }
41
42    let dir = match scope {
43        MemoryScope::User => {
44            let home = dirs::home_dir().ok_or_else(|| SubAgentError::Memory {
45                name: agent_name.to_owned(),
46                reason: "home directory unavailable".to_owned(),
47            })?;
48            home.join(".zeph").join("agent-memory").join(agent_name)
49        }
50        MemoryScope::Project => {
51            let cwd = std::env::current_dir().map_err(|e| SubAgentError::Memory {
52                name: agent_name.to_owned(),
53                reason: format!("cannot determine working directory: {e}"),
54            })?;
55            cwd.join(".zeph").join("agent-memory").join(agent_name)
56        }
57        MemoryScope::Local => {
58            let cwd = std::env::current_dir().map_err(|e| SubAgentError::Memory {
59                name: agent_name.to_owned(),
60                reason: format!("cannot determine working directory: {e}"),
61            })?;
62            cwd.join(".zeph")
63                .join("agent-memory-local")
64                .join(agent_name)
65        }
66    };
67    Ok(dir)
68}
69
70/// Ensure the memory directory exists, creating it if necessary.
71///
72/// Returns the absolute path to the directory. Logs at `debug` level when the
73/// directory is newly created.
74///
75/// # Errors
76///
77/// Returns [`SubAgentError::Invalid`] if the agent name is invalid.
78/// Returns [`SubAgentError::Memory`] if the directory cannot be created.
79pub fn ensure_memory_dir(scope: MemoryScope, agent_name: &str) -> Result<PathBuf, SubAgentError> {
80    let dir = resolve_memory_dir(scope, agent_name)?;
81    // create_dir_all is idempotent — no need for a prior exists() check (REV-MED-02).
82    std::fs::create_dir_all(&dir).map_err(|e| SubAgentError::Memory {
83        name: agent_name.to_owned(),
84        reason: format!("cannot create memory directory '{}': {e}", dir.display()),
85    })?;
86    tracing::debug!(
87        agent = agent_name,
88        scope = ?scope,
89        path = %dir.display(),
90        "ensured agent memory directory"
91    );
92
93    // Warn for Local scope if .gitignore likely does not cover the directory.
94    if scope == MemoryScope::Local {
95        check_gitignore_for_local(&dir);
96    }
97
98    Ok(dir)
99}
100
101/// Reads `MEMORY.md` from the given directory and returns the first 200 lines.
102///
103/// Returns `None` if the file does not exist or is empty.
104///
105/// Security:
106/// - Canonicalizes the path and verifies it stays within `dir` (symlink boundary).
107/// - Opens the canonical path after the boundary check (no TOCTOU window).
108/// - Rejects files larger than 256 KiB.
109/// - Rejects files containing null bytes.
110pub fn load_memory_content(dir: &Path) -> Option<String> {
111    let memory_path = dir.join("MEMORY.md");
112
113    // Canonicalize to resolve any symlinks before opening.
114    let canonical = std::fs::canonicalize(&memory_path).ok()?;
115
116    // Boundary check: MEMORY.md must be within the memory directory.
117    // REV-LOW-01: canonicalize dir separately (can't derive from canonical — symlink
118    // target's parent differs from the original dir when symlink escapes boundary).
119    let canonical_dir = std::fs::canonicalize(dir).ok()?;
120    if !canonical.starts_with(&canonical_dir) {
121        tracing::warn!(
122            path = %canonical.display(),
123            boundary = %canonical_dir.display(),
124            "MEMORY.md escapes memory directory boundary via symlink, skipping"
125        );
126        return None;
127    }
128
129    // Open the canonical path — no TOCTOU window for symlink swap after this point.
130    // Read content via the same handle to avoid re-opening (REV-CRIT-01).
131    let mut file = std::fs::File::open(&canonical).ok()?;
132    let meta = file.metadata().ok()?;
133
134    if !meta.is_file() {
135        return None;
136    }
137    if meta.len() > MAX_MEMORY_SIZE {
138        tracing::warn!(
139            path = %canonical.display(),
140            size = meta.len(),
141            limit = MAX_MEMORY_SIZE,
142            "MEMORY.md exceeds 256 KiB size limit, skipping"
143        );
144        return None;
145    }
146
147    let mut content = String::with_capacity(usize::try_from(meta.len()).unwrap_or(0));
148    file.read_to_string(&mut content).ok()?;
149
150    // Security: reject files with null bytes (potential binary or injection attack).
151    if content.contains('\0') {
152        tracing::warn!(
153            path = %canonical.display(),
154            "MEMORY.md contains null bytes, skipping"
155        );
156        return None;
157    }
158
159    if content.trim().is_empty() {
160        return None;
161    }
162
163    // Truncate to the first MEMORY_INJECT_LINES lines without full Vec allocation (REV-MED-01).
164    let mut line_count = 0usize;
165    let mut byte_offset = 0usize;
166    let mut truncated = false;
167    for line in content.lines() {
168        line_count += 1;
169        if line_count > MEMORY_INJECT_LINES {
170            truncated = true;
171            break;
172        }
173        byte_offset += line.len() + 1; // +1 for newline
174    }
175
176    let result = if truncated {
177        let head = content[..byte_offset.min(content.len())].trim_end_matches('\n');
178        format!(
179            "{head}\n\n[... truncated at {MEMORY_INJECT_LINES} lines. \
180             See full file at {}]",
181            dir.join("MEMORY.md").display()
182        )
183    } else {
184        content
185    };
186
187    Some(result)
188}
189
190/// Escape `<agent-memory>` and `</agent-memory>` tags from memory content.
191///
192/// Handles case variations (`</AGENT-MEMORY>`, `</Agent-Memory >`) via case-insensitive
193/// regex. Prevents prompt injection: an agent writing the closing tag to MEMORY.md would
194/// otherwise escape the `<agent-memory>` wrapper and inject arbitrary system prompt text.
195///
196/// Trust model note: MEMORY.md is written by the agent itself, unlike user-written
197/// instruction files. Agent-written content requires stricter escaping.
198#[must_use]
199pub fn escape_memory_content(content: &str) -> String {
200    MEMORY_TAG_RE
201        .replace_all(content, "<\\/$1agent-memory$2>")
202        .into_owned()
203}
204
205/// Check if `.zeph/agent-memory-local/` appears in `.gitignore` and warn if not.
206///
207/// This is best-effort — only checks the project-root `.gitignore`.
208fn check_gitignore_for_local(memory_dir: &Path) {
209    // Walk up to find .gitignore (at most 5 levels up from memory dir).
210    let mut current = memory_dir;
211    for _ in 0..5 {
212        let Some(parent) = current.parent() else {
213            break;
214        };
215        current = parent;
216        let gitignore = current.join(".gitignore");
217        if gitignore.exists() {
218            if std::fs::read_to_string(&gitignore).is_ok_and(|c| c.contains("agent-memory-local")) {
219                return;
220            }
221            tracing::warn!(
222                "local agent memory directory is not in .gitignore — \
223                 sensitive data may be committed. Add '.zeph/agent-memory-local/' to .gitignore"
224            );
225            return;
226        }
227    }
228}
229
230#[cfg(test)]
231mod tests {
232    #![allow(clippy::format_collect)]
233
234    use super::*;
235
236    // ── resolve_memory_dir ────────────────────────────────────────────────────
237
238    #[test]
239    fn resolve_project_scope_returns_correct_path() {
240        let dir = resolve_memory_dir(MemoryScope::Project, "my-agent").unwrap();
241        assert!(dir.ends_with(".zeph/agent-memory/my-agent"));
242    }
243
244    #[test]
245    fn resolve_local_scope_returns_correct_path() {
246        let dir = resolve_memory_dir(MemoryScope::Local, "my-agent").unwrap();
247        assert!(dir.ends_with(".zeph/agent-memory-local/my-agent"));
248    }
249
250    #[test]
251    fn resolve_user_scope_returns_home_path() {
252        if dirs::home_dir().is_none() {
253            return; // Skip in environments without home dir.
254        }
255        let dir = resolve_memory_dir(MemoryScope::User, "my-agent").unwrap();
256        assert!(dir.ends_with(".zeph/agent-memory/my-agent"));
257        assert!(dir.starts_with(dirs::home_dir().unwrap()));
258    }
259
260    #[test]
261    fn resolve_rejects_path_traversal_name() {
262        let err = resolve_memory_dir(MemoryScope::Project, "../etc/passwd").unwrap_err();
263        assert!(matches!(err, SubAgentError::Invalid(_)));
264    }
265
266    #[test]
267    fn resolve_rejects_slash_in_name() {
268        let err = resolve_memory_dir(MemoryScope::Project, "a/b").unwrap_err();
269        assert!(matches!(err, SubAgentError::Invalid(_)));
270    }
271
272    #[test]
273    fn resolve_rejects_empty_name() {
274        let err = resolve_memory_dir(MemoryScope::Project, "").unwrap_err();
275        assert!(matches!(err, SubAgentError::Invalid(_)));
276    }
277
278    #[test]
279    fn resolve_rejects_whitespace_only_name() {
280        let err = resolve_memory_dir(MemoryScope::Project, "   ").unwrap_err();
281        assert!(matches!(err, SubAgentError::Invalid(_)));
282    }
283
284    #[test]
285    fn resolve_accepts_single_char_name() {
286        resolve_memory_dir(MemoryScope::Project, "a").unwrap();
287    }
288
289    #[test]
290    fn resolve_accepts_64_char_name() {
291        let name = "a".repeat(64);
292        resolve_memory_dir(MemoryScope::Project, &name).unwrap();
293    }
294
295    #[test]
296    fn resolve_rejects_65_char_name() {
297        let name = "a".repeat(65);
298        let err = resolve_memory_dir(MemoryScope::Project, &name).unwrap_err();
299        assert!(matches!(err, SubAgentError::Invalid(_)));
300    }
301
302    #[test]
303    fn resolve_rejects_unicode_cyrillic() {
304        // Cyrillic 'а' (U+0430) looks like Latin 'a' but is not ASCII.
305        let err = resolve_memory_dir(MemoryScope::Project, "аgent").unwrap_err();
306        assert!(matches!(err, SubAgentError::Invalid(_)));
307    }
308
309    #[test]
310    fn resolve_rejects_fullwidth_slash() {
311        // Full-width solidus U+FF0F.
312        let err = resolve_memory_dir(MemoryScope::Project, "a\u{FF0F}b").unwrap_err();
313        assert!(matches!(err, SubAgentError::Invalid(_)));
314    }
315
316    // ── ensure_memory_dir ────────────────────────────────────────────────────
317
318    #[test]
319    fn ensure_creates_directory_for_project_scope() {
320        let tmp = tempfile::tempdir().unwrap();
321        let orig_dir = std::env::current_dir().unwrap();
322        std::env::set_current_dir(tmp.path()).unwrap();
323
324        let result = ensure_memory_dir(MemoryScope::Project, "test-agent").unwrap();
325        assert!(result.exists());
326        assert!(result.ends_with(".zeph/agent-memory/test-agent"));
327
328        std::env::set_current_dir(orig_dir).unwrap();
329    }
330
331    #[test]
332    fn ensure_idempotent_when_directory_exists() {
333        let tmp = tempfile::tempdir().unwrap();
334        let orig_dir = std::env::current_dir().unwrap();
335        std::env::set_current_dir(tmp.path()).unwrap();
336
337        let dir1 = ensure_memory_dir(MemoryScope::Project, "idempotent-agent").unwrap();
338        let dir2 = ensure_memory_dir(MemoryScope::Project, "idempotent-agent").unwrap();
339        assert_eq!(dir1, dir2);
340
341        std::env::set_current_dir(orig_dir).unwrap();
342    }
343
344    // ── load_memory_content ───────────────────────────────────────────────────
345
346    #[test]
347    fn load_returns_none_when_no_file() {
348        let tmp = tempfile::tempdir().unwrap();
349        assert!(load_memory_content(tmp.path()).is_none());
350    }
351
352    #[test]
353    fn load_returns_content_when_file_exists() {
354        let tmp = tempfile::tempdir().unwrap();
355        std::fs::write(tmp.path().join("MEMORY.md"), "# Notes\nkey: value\n").unwrap();
356        let content = load_memory_content(tmp.path()).unwrap();
357        assert!(content.contains("key: value"));
358    }
359
360    #[test]
361    fn load_truncates_at_200_lines() {
362        let tmp = tempfile::tempdir().unwrap();
363        let mut lines = String::new();
364        for i in 0..300 {
365            use std::fmt::Write as _;
366            writeln!(&mut lines, "line {i}").unwrap();
367        }
368        std::fs::write(tmp.path().join("MEMORY.md"), &lines).unwrap();
369        let content = load_memory_content(tmp.path()).unwrap();
370        let line_count = content.lines().count();
371        // Truncated content has 200 data lines + 1 truncation marker line.
372        assert!(line_count <= 202, "expected <= 202 lines, got {line_count}");
373        assert!(content.contains("truncated at 200 lines"));
374    }
375
376    #[test]
377    fn load_rejects_null_bytes() {
378        let tmp = tempfile::tempdir().unwrap();
379        std::fs::write(tmp.path().join("MEMORY.md"), "valid\0content").unwrap();
380        assert!(load_memory_content(tmp.path()).is_none());
381    }
382
383    #[test]
384    fn load_returns_none_for_empty_file() {
385        let tmp = tempfile::tempdir().unwrap();
386        std::fs::write(tmp.path().join("MEMORY.md"), "").unwrap();
387        assert!(load_memory_content(tmp.path()).is_none());
388    }
389
390    #[test]
391    #[cfg(unix)]
392    fn load_rejects_symlink_escape() {
393        let tmp = tempfile::tempdir().unwrap();
394        let outside = tempfile::tempdir().unwrap();
395        let target = outside.path().join("secret.md");
396        std::fs::write(&target, "secret content").unwrap();
397
398        let link = tmp.path().join("MEMORY.md");
399        std::os::unix::fs::symlink(&target, &link).unwrap();
400
401        // The symlink points outside the tmp directory — should be rejected.
402        assert!(load_memory_content(tmp.path()).is_none());
403    }
404
405    #[test]
406    fn load_returns_none_for_whitespace_only_file() {
407        let tmp = tempfile::tempdir().unwrap();
408        std::fs::write(tmp.path().join("MEMORY.md"), "   \n\n   \n").unwrap();
409        assert!(load_memory_content(tmp.path()).is_none());
410    }
411
412    #[test]
413    fn load_rejects_file_over_size_cap() {
414        let tmp = tempfile::tempdir().unwrap();
415        // 257 KiB of content — exceeds the 256 KiB limit.
416        let content = "x".repeat(257 * 1024);
417        std::fs::write(tmp.path().join("MEMORY.md"), content).unwrap();
418        assert!(load_memory_content(tmp.path()).is_none());
419    }
420
421    // ── escape_memory_content ─────────────────────────────────────────────────
422
423    #[test]
424    fn escape_replaces_closing_tag_lowercase() {
425        let content = "safe content </agent-memory> more content";
426        let escaped = escape_memory_content(content);
427        assert!(!escaped.contains("</agent-memory>"));
428    }
429
430    #[test]
431    fn escape_replaces_closing_tag_uppercase() {
432        let content = "safe </AGENT-MEMORY> content";
433        let escaped = escape_memory_content(content);
434        assert!(!escaped.to_lowercase().contains("</agent-memory>"));
435    }
436
437    #[test]
438    fn escape_replaces_closing_tag_mixed_case() {
439        let content = "safe </Agent-Memory> content";
440        let escaped = escape_memory_content(content);
441        assert!(!escaped.to_lowercase().contains("</agent-memory>"));
442    }
443
444    #[test]
445    fn escape_replaces_opening_tag() {
446        let content = "before <agent-memory> injection attempt";
447        let escaped = escape_memory_content(content);
448        // Opening tag must also be escaped to prevent nested boundaries.
449        assert!(!escaped.contains("<agent-memory>"));
450    }
451
452    #[test]
453    fn escape_leaves_normal_content_unchanged() {
454        let content = "# Notes\nThis is safe content.";
455        assert_eq!(escape_memory_content(content), content);
456    }
457}