Skip to main content

ralph_core/
memory_parser.rs

1//! Markdown parser for the memories file format.
2//!
3//! Parses `.ralph/agent/memories.md` into a vector of `Memory` structs.
4//! The format uses:
5//! - `## Section` headers to denote memory types
6//! - `### mem-{id}` headers for individual memories
7//! - `> content` blockquotes for memory content
8//! - `<!-- tags: ... | created: ... -->` HTML comments for metadata
9
10use regex::Regex;
11use std::sync::LazyLock;
12
13use crate::memory::{Memory, MemoryType};
14
15/// Regex to match section headers like `## Patterns`
16static SECTION_RE: LazyLock<Regex> =
17    LazyLock::new(|| Regex::new(r"^## (Patterns|Decisions|Fixes|Context)").unwrap());
18
19/// Regex to match memory ID headers like `### mem-1737372000-a1b2`
20static MEMORY_ID_RE: LazyLock<Regex> =
21    LazyLock::new(|| Regex::new(r"^### (mem-\d+-[0-9a-f]{4})").unwrap());
22
23/// Regex to match blockquote content lines like `> content`
24static CONTENT_RE: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^> (.+)$").unwrap());
25
26/// Regex to match metadata HTML comments like `<!-- tags: a, b | created: 2025-01-20 -->`
27static METADATA_RE: LazyLock<Regex> = LazyLock::new(|| {
28    Regex::new(r"<!-- tags: ([^|]*) \| created: (\d{4}-\d{2}-\d{2}) -->").unwrap()
29});
30
31/// Parse a memories markdown file into a vector of Memory structs.
32///
33/// # Arguments
34/// * `markdown` - The contents of a `.ralph/agent/memories.md` file
35///
36/// # Returns
37/// A vector of parsed memories. Malformed memory blocks are skipped.
38///
39/// # Example
40/// ```
41/// use ralph_core::memory_parser::parse_memories;
42///
43/// let markdown = "# Memories\n\n## Patterns\n\n### mem-1737372000-a1b2\n> Uses barrel exports\n<!-- tags: imports, structure | created: 2025-01-20 -->\n";
44///
45/// let memories = parse_memories(markdown);
46/// assert_eq!(memories.len(), 1);
47/// assert_eq!(memories[0].content, "Uses barrel exports");
48/// ```
49pub fn parse_memories(markdown: &str) -> Vec<Memory> {
50    let mut memories = Vec::new();
51    let mut current_type = MemoryType::Pattern;
52    let mut current_id: Option<String> = None;
53    let mut current_content: Vec<String> = Vec::new();
54    let mut current_tags: Vec<String> = Vec::new();
55    let mut current_created: Option<String> = None;
56
57    for line in markdown.lines() {
58        if let Some(caps) = SECTION_RE.captures(line) {
59            // Flush any pending memory before switching sections
60            flush_memory(
61                &mut memories,
62                &mut current_id,
63                current_type,
64                &mut current_content,
65                &mut current_tags,
66                &mut current_created,
67            );
68            current_type = MemoryType::from_section(&caps[1]).unwrap_or(MemoryType::Pattern);
69        } else if let Some(caps) = MEMORY_ID_RE.captures(line) {
70            // Flush any pending memory before starting a new one
71            flush_memory(
72                &mut memories,
73                &mut current_id,
74                current_type,
75                &mut current_content,
76                &mut current_tags,
77                &mut current_created,
78            );
79            current_id = Some(caps[1].to_string());
80        } else if let Some(caps) = CONTENT_RE.captures(line) {
81            current_content.push(caps[1].to_string());
82        } else if let Some(caps) = METADATA_RE.captures(line) {
83            current_tags = caps[1]
84                .split(',')
85                .map(|s| s.trim().to_string())
86                .filter(|s| !s.is_empty())
87                .collect();
88            current_created = Some(caps[2].to_string());
89        }
90    }
91
92    // Flush any remaining memory
93    flush_memory(
94        &mut memories,
95        &mut current_id,
96        current_type,
97        &mut current_content,
98        &mut current_tags,
99        &mut current_created,
100    );
101
102    memories
103}
104
105/// Helper to finalize and push a memory if we have enough data.
106fn flush_memory(
107    memories: &mut Vec<Memory>,
108    current_id: &mut Option<String>,
109    current_type: MemoryType,
110    current_content: &mut Vec<String>,
111    current_tags: &mut Vec<String>,
112    current_created: &mut Option<String>,
113) {
114    if let Some(id) = current_id.take()
115        && !current_content.is_empty()
116    {
117        memories.push(Memory {
118            id,
119            memory_type: current_type,
120            content: current_content.join("\n"),
121            tags: std::mem::take(current_tags),
122            created: current_created
123                .take()
124                .unwrap_or_else(|| chrono::Utc::now().format("%Y-%m-%d").to_string()),
125        });
126    }
127    current_content.clear();
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    #[test]
135    fn test_parse_single_memory() {
136        let markdown = r"# Memories
137
138## Patterns
139
140### mem-1737372000-a1b2
141> Uses barrel exports for modules
142<!-- tags: imports, structure | created: 2025-01-20 -->
143";
144
145        let memories = parse_memories(markdown);
146        assert_eq!(memories.len(), 1);
147
148        let mem = &memories[0];
149        assert_eq!(mem.id, "mem-1737372000-a1b2");
150        assert_eq!(mem.memory_type, MemoryType::Pattern);
151        assert_eq!(mem.content, "Uses barrel exports for modules");
152        assert_eq!(mem.tags, vec!["imports", "structure"]);
153        assert_eq!(mem.created, "2025-01-20");
154    }
155
156    #[test]
157    fn test_parse_multiple_sections() {
158        let markdown = r"# Memories
159
160## Patterns
161
162### mem-1737372000-a1b2
163> Uses barrel exports
164<!-- tags: imports | created: 2025-01-20 -->
165
166## Decisions
167
168### mem-1737372100-c3d4
169> Chose Postgres over SQLite
170<!-- tags: database | created: 2025-01-20 -->
171
172## Fixes
173
174### mem-1737372200-e5f6
175> ECONNREFUSED means start docker
176<!-- tags: docker, debugging | created: 2025-01-21 -->
177
178## Context
179
180### mem-1737372300-a7b8
181> ralph-core is the shared library
182<!-- tags: architecture | created: 2025-01-21 -->
183";
184
185        let memories = parse_memories(markdown);
186        assert_eq!(memories.len(), 4);
187
188        assert_eq!(memories[0].memory_type, MemoryType::Pattern);
189        assert_eq!(memories[1].memory_type, MemoryType::Decision);
190        assert_eq!(memories[2].memory_type, MemoryType::Fix);
191        assert_eq!(memories[3].memory_type, MemoryType::Context);
192    }
193
194    #[test]
195    fn test_parse_multiline_content() {
196        let markdown = r"# Memories
197
198## Patterns
199
200### mem-1737372000-a1b2
201> First line of content
202> Second line of content
203> Third line of content
204<!-- tags: multiline | created: 2025-01-20 -->
205";
206
207        let memories = parse_memories(markdown);
208        assert_eq!(memories.len(), 1);
209        assert_eq!(
210            memories[0].content,
211            "First line of content\nSecond line of content\nThird line of content"
212        );
213    }
214
215    #[test]
216    fn test_parse_missing_metadata_uses_defaults() {
217        let markdown = r"# Memories
218
219## Patterns
220
221### mem-1737372000-a1b2
222> Some content without metadata
223";
224
225        let memories = parse_memories(markdown);
226        assert_eq!(memories.len(), 1);
227
228        let mem = &memories[0];
229        assert_eq!(mem.content, "Some content without metadata");
230        assert!(mem.tags.is_empty());
231        // Created should default to today's date
232        let today = chrono::Utc::now().format("%Y-%m-%d").to_string();
233        assert_eq!(mem.created, today);
234    }
235
236    #[test]
237    fn test_parse_ignores_malformed_blocks() {
238        let markdown = r"# Memories
239
240## Patterns
241
242### mem-1737372000-a1b2
243> Valid memory
244<!-- tags: valid | created: 2025-01-20 -->
245
246### mem-invalid-format
247> This has an invalid ID format and will be skipped
248
249### mem-1737372100-c3d4
250> Another valid memory
251<!-- tags: also-valid | created: 2025-01-21 -->
252";
253
254        let memories = parse_memories(markdown);
255        // The invalid one should be skipped based on the regex not matching
256        // Actually it should match since the regex looks for mem-\d+-[0-9a-f]{4}
257        // "mem-invalid-format" won't match, so it won't create a new memory block
258        assert_eq!(memories.len(), 2);
259        assert_eq!(memories[0].id, "mem-1737372000-a1b2");
260        assert_eq!(memories[1].id, "mem-1737372100-c3d4");
261    }
262
263    #[test]
264    fn test_parse_empty_file() {
265        let memories = parse_memories("");
266        assert!(memories.is_empty());
267    }
268
269    #[test]
270    fn test_parse_empty_tags() {
271        let markdown = r"# Memories
272
273## Patterns
274
275### mem-1737372000-a1b2
276> Content with empty tags
277<!-- tags:  | created: 2025-01-20 -->
278";
279
280        let memories = parse_memories(markdown);
281        assert_eq!(memories.len(), 1);
282        assert!(memories[0].tags.is_empty());
283    }
284
285    #[test]
286    fn test_parse_memory_without_content_is_skipped() {
287        let markdown = r"# Memories
288
289## Patterns
290
291### mem-1737372000-a1b2
292<!-- tags: no-content | created: 2025-01-20 -->
293
294### mem-1737372100-c3d4
295> This one has content
296<!-- tags: valid | created: 2025-01-20 -->
297";
298
299        let memories = parse_memories(markdown);
300        // Memory without content should be skipped
301        assert_eq!(memories.len(), 1);
302        assert_eq!(memories[0].id, "mem-1737372100-c3d4");
303    }
304}