Skip to main content

agent_code_lib/memory/
scanner.rs

1//! Memory file scanner and relevance-based selection.
2//!
3//! Scans the memory directory for .md files, reads frontmatter
4//! headers, and selects relevant memories based on description
5//! matching. Caps at 200 files, returns newest-first.
6
7use std::path::{Path, PathBuf};
8use std::time::SystemTime;
9
10use super::types::MemoryMeta;
11
12/// Maximum memory files to scan.
13const MAX_MEMORY_FILES: usize = 200;
14
15/// Maximum memories to surface per turn.
16const MAX_RELEVANT_PER_TURN: usize = 5;
17
18/// Maximum frontmatter lines to read per file.
19const MAX_FRONTMATTER_LINES: usize = 30;
20
21/// A scanned memory file header (metadata only, not full content).
22#[derive(Debug, Clone)]
23pub struct MemoryHeader {
24    pub filename: String,
25    pub path: PathBuf,
26    pub modified: SystemTime,
27    pub meta: Option<MemoryMeta>,
28}
29
30/// Scan the memory directory for all .md files (excluding MEMORY.md).
31/// Returns headers sorted by modification time (newest first), capped at 200.
32pub fn scan_memory_files(memory_dir: &Path) -> Vec<MemoryHeader> {
33    if !memory_dir.is_dir() {
34        return Vec::new();
35    }
36
37    let mut headers: Vec<MemoryHeader> = std::fs::read_dir(memory_dir)
38        .ok()
39        .into_iter()
40        .flatten()
41        .flatten()
42        .filter(|entry| {
43            let path = entry.path();
44            path.is_file()
45                && path.extension().is_some_and(|e| e == "md")
46                && path.file_name().is_some_and(|n| n != "MEMORY.md")
47        })
48        .filter_map(|entry| {
49            let path = entry.path();
50            let modified = entry.metadata().ok()?.modified().ok()?;
51            let meta = read_frontmatter_only(&path);
52            let filename = path.file_name()?.to_str()?.to_string();
53            Some(MemoryHeader {
54                filename,
55                path,
56                modified,
57                meta,
58            })
59        })
60        .collect();
61
62    // Sort newest first.
63    headers.sort_by(|a, b| b.modified.cmp(&a.modified));
64
65    // Cap at max.
66    headers.truncate(MAX_MEMORY_FILES);
67
68    headers
69}
70
71/// Read only the YAML frontmatter from a memory file (first 30 lines).
72fn read_frontmatter_only(path: &Path) -> Option<MemoryMeta> {
73    let content = std::fs::read_to_string(path).ok()?;
74    let trimmed = content.trim_start();
75
76    if !trimmed.starts_with("---") {
77        return None;
78    }
79
80    let after_first = &trimmed[3..];
81    let closing = after_first
82        .lines()
83        .take(MAX_FRONTMATTER_LINES)
84        .position(|line| line.trim() == "---")?;
85
86    let yaml_lines: Vec<&str> = after_first.lines().take(closing).collect();
87    let yaml = yaml_lines.join("\n");
88
89    parse_simple_yaml(&yaml)
90}
91
92/// Simple YAML parser for memory frontmatter (key: value pairs).
93fn parse_simple_yaml(yaml: &str) -> Option<MemoryMeta> {
94    let mut name = String::new();
95    let mut description = String::new();
96    let mut memory_type = None;
97
98    for line in yaml.lines() {
99        let line = line.trim();
100        if line.is_empty() || line.starts_with('#') {
101            continue;
102        }
103        if let Some((key, value)) = line.split_once(':') {
104            let key = key.trim();
105            let value = value.trim().trim_matches('"').trim_matches('\'');
106            match key {
107                "name" => name = value.to_string(),
108                "description" => description = value.to_string(),
109                "type" => {
110                    memory_type = match value {
111                        "user" => Some(super::types::MemoryType::User),
112                        "feedback" => Some(super::types::MemoryType::Feedback),
113                        "project" => Some(super::types::MemoryType::Project),
114                        "reference" => Some(super::types::MemoryType::Reference),
115                        _ => None,
116                    };
117                }
118                _ => {}
119            }
120        }
121    }
122
123    if name.is_empty() && description.is_empty() {
124        return None;
125    }
126
127    Some(MemoryMeta {
128        name,
129        description,
130        memory_type,
131    })
132}
133
134/// Select the most relevant memories for a given conversation context.
135///
136/// Uses keyword matching on memory descriptions against the user's
137/// recent messages. Returns up to MAX_RELEVANT_PER_TURN file paths.
138pub fn select_relevant(
139    headers: &[MemoryHeader],
140    recent_text: &str,
141    already_surfaced: &std::collections::HashSet<PathBuf>,
142) -> Vec<PathBuf> {
143    if headers.is_empty() || recent_text.is_empty() {
144        return Vec::new();
145    }
146
147    let words: Vec<&str> = recent_text
148        .split_whitespace()
149        .filter(|w| w.len() > 3) // Skip short words.
150        .collect();
151
152    let mut scored: Vec<(&MemoryHeader, usize)> = headers
153        .iter()
154        .filter(|h| !already_surfaced.contains(&h.path))
155        .map(|h| {
156            let desc = h
157                .meta
158                .as_ref()
159                .map(|m| format!("{} {}", m.name, m.description))
160                .unwrap_or_else(|| h.filename.clone())
161                .to_lowercase();
162
163            let score: usize = words
164                .iter()
165                .filter(|w| desc.contains(&w.to_lowercase()))
166                .count();
167
168            (h, score)
169        })
170        .filter(|(_, score)| *score > 0)
171        .collect();
172
173    scored.sort_by(|a, b| b.1.cmp(&a.1));
174    scored.truncate(MAX_RELEVANT_PER_TURN);
175
176    scored.iter().map(|(h, _)| h.path.clone()).collect()
177}
178
179#[cfg(test)]
180mod tests {
181    use super::*;
182
183    fn write_memory_file(dir: &std::path::Path, name: &str, frontmatter: &str, body: &str) {
184        let content = format!("---\n{frontmatter}\n---\n\n{body}");
185        std::fs::write(dir.join(name), content).unwrap();
186    }
187
188    #[test]
189    fn test_scan_empty_dir() {
190        let dir = tempfile::tempdir().unwrap();
191        let headers = scan_memory_files(dir.path());
192        assert!(headers.is_empty());
193    }
194
195    #[test]
196    fn test_scan_finds_md_files() {
197        let dir = tempfile::tempdir().unwrap();
198        write_memory_file(
199            dir.path(),
200            "prefs.md",
201            "name: Preferences\ndescription: User prefs\ntype: user",
202            "I prefer Rust",
203        );
204        write_memory_file(
205            dir.path(),
206            "project.md",
207            "name: Project\ndescription: Project info\ntype: project",
208            "Working on agent-code",
209        );
210        // Non-md file should be ignored.
211        std::fs::write(dir.path().join("notes.txt"), "not a memory").unwrap();
212        // MEMORY.md should be ignored.
213        std::fs::write(dir.path().join("MEMORY.md"), "index").unwrap();
214
215        let headers = scan_memory_files(dir.path());
216        assert_eq!(headers.len(), 2);
217    }
218
219    #[test]
220    fn test_scan_parses_frontmatter() {
221        let dir = tempfile::tempdir().unwrap();
222        write_memory_file(
223            dir.path(),
224            "test.md",
225            "name: My Memory\ndescription: test description\ntype: feedback",
226            "content here",
227        );
228
229        let headers = scan_memory_files(dir.path());
230        assert_eq!(headers.len(), 1);
231        let meta = headers[0].meta.as_ref().unwrap();
232        assert_eq!(meta.name, "My Memory");
233        assert_eq!(meta.description, "test description");
234        assert!(matches!(
235            meta.memory_type,
236            Some(super::super::types::MemoryType::Feedback)
237        ));
238    }
239
240    #[test]
241    fn test_select_relevant_by_keyword() {
242        let dir = tempfile::tempdir().unwrap();
243        write_memory_file(
244            dir.path(),
245            "rust.md",
246            "name: Rust Prefs\ndescription: rust programming preferences\ntype: user",
247            "I like Rust",
248        );
249        write_memory_file(
250            dir.path(),
251            "python.md",
252            "name: Python Prefs\ndescription: python programming preferences\ntype: user",
253            "I like Python",
254        );
255
256        let headers = scan_memory_files(dir.path());
257        let surfaced = std::collections::HashSet::new();
258
259        // Search for "rust" should find rust.md.
260        let results = select_relevant(&headers, "tell me about rust programming", &surfaced);
261        assert!(!results.is_empty());
262        assert!(results.iter().any(|p| p.to_str().unwrap().contains("rust")));
263    }
264
265    #[test]
266    fn test_select_relevant_skips_surfaced() {
267        let dir = tempfile::tempdir().unwrap();
268        write_memory_file(
269            dir.path(),
270            "rust.md",
271            "name: Rust\ndescription: rust preferences\ntype: user",
272            "content",
273        );
274
275        let headers = scan_memory_files(dir.path());
276        let mut surfaced = std::collections::HashSet::new();
277        surfaced.insert(dir.path().join("rust.md"));
278
279        // Already surfaced — should not be returned.
280        let results = select_relevant(&headers, "rust programming", &surfaced);
281        assert!(results.is_empty());
282    }
283
284    #[test]
285    fn test_select_relevant_max_limit() {
286        let dir = tempfile::tempdir().unwrap();
287        for i in 0..10 {
288            write_memory_file(
289                dir.path(),
290                &format!("topic{i}.md"),
291                &format!("name: Topic {i}\ndescription: matching keyword stuff\ntype: user"),
292                "content about keyword",
293            );
294        }
295
296        let headers = scan_memory_files(dir.path());
297        let surfaced = std::collections::HashSet::new();
298        let results = select_relevant(&headers, "keyword matching stuff topic", &surfaced);
299        assert!(results.len() <= MAX_RELEVANT_PER_TURN);
300    }
301}