ricecoder_storage/markdown_config/
parser.rs1use crate::markdown_config::error::{MarkdownConfigError, MarkdownConfigResult};
4use crate::markdown_config::types::ParsedMarkdown;
5use std::path::Path;
6
7#[derive(Debug, Clone)]
9pub struct MarkdownParser;
10
11impl MarkdownParser {
12 pub fn new() -> Self {
14 Self
15 }
16
17 pub fn parse(&self, content: &str) -> MarkdownConfigResult<ParsedMarkdown> {
28 self.parse_with_context(content, None)
29 }
30
31 pub fn parse_with_context(
33 &self,
34 content: &str,
35 file_path: Option<&Path>,
36 ) -> MarkdownConfigResult<ParsedMarkdown> {
37 let trimmed = content.trim();
38
39 if !trimmed.starts_with("---") {
41 return Ok(ParsedMarkdown::new(None, content.to_string()));
43 }
44
45 let rest = &trimmed[3..]; let closing_delimiter_pos = rest.find("---");
48
49 match closing_delimiter_pos {
50 Some(pos) => {
51 let frontmatter = rest[..pos].trim().to_string();
53 let body_start = pos + 3; let body = rest[body_start..].trim().to_string();
55
56 if frontmatter.is_empty() {
58 let msg = match file_path {
59 Some(path) => format!(
60 "Frontmatter cannot be empty in {}",
61 path.display()
62 ),
63 None => "Frontmatter cannot be empty".to_string(),
64 };
65 return Err(MarkdownConfigError::parse_error(msg));
66 }
67
68 Ok(ParsedMarkdown::new(Some(frontmatter), body))
69 }
70 None => {
71 let msg = match file_path {
73 Some(path) => format!(
74 "Unclosed frontmatter in {}: found opening '---' but no closing '---'",
75 path.display()
76 ),
77 None => "Unclosed frontmatter: found opening '---' but no closing '---'"
78 .to_string(),
79 };
80 Err(MarkdownConfigError::parse_error(msg))
81 }
82 }
83 }
84
85}
86
87impl Default for MarkdownParser {
88 fn default() -> Self {
89 Self::new()
90 }
91}
92
93#[cfg(test)]
94mod tests {
95 use super::*;
96
97 #[test]
98 fn test_parse_with_frontmatter() {
99 let parser = MarkdownParser::new();
100 let content = r#"---
101name: test-agent
102description: A test agent
103---
104# Test Content
105This is the body"#;
106
107 let result = parser.parse(content).unwrap();
108 assert_eq!(
109 result.frontmatter,
110 Some("name: test-agent\ndescription: A test agent".to_string())
111 );
112 assert_eq!(result.content, "# Test Content\nThis is the body");
113 }
114
115 #[test]
116 fn test_parse_without_frontmatter() {
117 let parser = MarkdownParser::new();
118 let content = "# Test Content\nThis is the body";
119
120 let result = parser.parse(content).unwrap();
121 assert_eq!(result.frontmatter, None);
122 assert_eq!(result.content, "# Test Content\nThis is the body");
123 }
124
125 #[test]
126 fn test_parse_empty_frontmatter() {
127 let parser = MarkdownParser::new();
128 let content = r#"---
129---
130# Test Content"#;
131
132 let result = parser.parse(content);
133 assert!(result.is_err());
134 }
135
136 #[test]
137 fn test_parse_unclosed_frontmatter() {
138 let parser = MarkdownParser::new();
139 let content = r#"---
140name: test
141# Test Content"#;
142
143 let result = parser.parse(content);
144 assert!(result.is_err());
145 }
146
147 #[test]
148 fn test_parse_with_whitespace() {
149 let parser = MarkdownParser::new();
150 let content = r#" ---
151name: test
152 ---
153 # Content"#;
154
155 let result = parser.parse(content).unwrap();
156 assert_eq!(result.frontmatter, Some("name: test".to_string()));
157 assert_eq!(result.content, "# Content");
158 }
159
160 #[test]
161 fn test_parse_multiline_frontmatter() {
162 let parser = MarkdownParser::new();
163 let content = r#"---
164name: test-agent
165description: A test agent
166model: gpt-4
167temperature: 0.7
168---
169# Test Content"#;
170
171 let result = parser.parse(content).unwrap();
172 assert!(result.frontmatter.is_some());
173 let fm = result.frontmatter.unwrap();
174 assert!(fm.contains("name: test-agent"));
175 assert!(fm.contains("model: gpt-4"));
176 }
177
178 #[test]
179 fn test_parse_empty_body() {
180 let parser = MarkdownParser::new();
181 let content = r#"---
182name: test
183---"#;
184
185 let result = parser.parse(content).unwrap();
186 assert_eq!(result.frontmatter, Some("name: test".to_string()));
187 assert_eq!(result.content, "");
188 }
189
190 #[test]
191 fn test_parse_complex_yaml_frontmatter() {
192 let parser = MarkdownParser::new();
193 let content = r#"---
194name: complex-agent
195description: Complex agent
196model: gpt-4
197temperature: 0.7
198max_tokens: 2000
199tools:
200 - tool1
201 - tool2
202---
203# Complex Content
204With multiple lines
205And formatting"#;
206
207 let result = parser.parse(content).unwrap();
208 assert!(result.frontmatter.is_some());
209 let fm = result.frontmatter.unwrap();
210 assert!(fm.contains("tools:"));
211 assert!(fm.contains("- tool1"));
212 }
213
214 #[test]
215 fn test_parse_frontmatter_with_special_characters() {
216 let parser = MarkdownParser::new();
217 let content = r#"---
218name: test-agent
219description: "Agent with special chars: @#$%^&*()"
220---
221Content"#;
222
223 let result = parser.parse(content).unwrap();
224 assert!(result.frontmatter.is_some());
225 assert!(result.frontmatter.unwrap().contains("@#$%^&*()"));
226 }
227
228 #[test]
229 fn test_parse_frontmatter_with_quotes() {
230 let parser = MarkdownParser::new();
231 let content = r#"---
232name: "test-agent"
233description: 'Single quoted'
234---
235Content"#;
236
237 let result = parser.parse(content).unwrap();
238 assert!(result.frontmatter.is_some());
239 }
240
241 #[test]
242 fn test_parse_body_with_code_blocks() {
243 let parser = MarkdownParser::new();
244 let content = r#"---
245name: test
246---
247# Content
248
249```rust
250fn main() {
251 println!("Hello");
252}
253```
254
255More content"#;
256
257 let result = parser.parse(content).unwrap();
258 assert!(result.content.contains("```rust"));
259 assert!(result.content.contains("fn main()"));
260 }
261
262 #[test]
263 fn test_parse_body_with_frontmatter_like_content() {
264 let parser = MarkdownParser::new();
265 let content = r#"---
266name: test
267---
268# Content
269
270This mentions --- but it's in the body
271So it should be fine"#;
272
273 let result = parser.parse(content).unwrap();
274 assert!(result.content.contains("---"));
275 }
276
277 #[test]
278 fn test_parse_with_context_error_message() {
279 let parser = MarkdownParser::new();
280 let content = r#"---
281---
282Content"#;
283 let path = Path::new("test.agent.md");
284
285 let result = parser.parse_with_context(content, Some(path));
286 assert!(result.is_err());
287 let error_msg = result.unwrap_err().to_string();
288 assert!(error_msg.contains("test.agent.md"));
289 }
290
291 #[test]
292 fn test_parse_consistency() {
293 let parser = MarkdownParser::new();
294 let content = r#"---
295name: test-agent
296description: Test
297---
298Body content"#;
299
300 let result1 = parser.parse(content).unwrap();
301 let result2 = parser.parse(content).unwrap();
302
303 assert_eq!(result1, result2);
304 }
305
306 #[test]
307 fn test_parse_only_frontmatter_delimiter() {
308 let parser = MarkdownParser::new();
309 let content = "---";
310
311 let result = parser.parse(content);
313 assert!(result.is_err());
314 }
315
316 #[test]
317 fn test_parse_multiple_delimiters_in_body() {
318 let parser = MarkdownParser::new();
319 let content = r#"---
320name: test
321---
322First section
323---
324Second section
325---
326Third section"#;
327
328 let result = parser.parse(content).unwrap();
329 assert_eq!(result.frontmatter, Some("name: test".to_string()));
330 assert!(result.content.contains("First section"));
331 assert!(result.content.contains("Second section"));
332 assert!(result.content.contains("Third section"));
333 }
334
335 #[test]
336 fn test_parse_very_long_frontmatter() {
337 let parser = MarkdownParser::new();
338 let mut frontmatter = String::from("---\n");
339 for i in 0..100 {
340 frontmatter.push_str(&format!("field{}: value{}\n", i, i));
341 }
342 frontmatter.push_str("---\nBody");
343
344 let result = parser.parse(&frontmatter).unwrap();
345 assert!(result.frontmatter.is_some());
346 assert!(result.frontmatter.unwrap().contains("field99"));
347 }
348
349 #[test]
350 fn test_parse_very_long_body() {
351 let parser = MarkdownParser::new();
352 let mut body = String::from("# Content\n");
353 for i in 0..1000 {
354 body.push_str(&format!("Line {}\n", i));
355 }
356 let content = format!("---\nname: test\n---\n{}", body);
357
358 let result = parser.parse(&content).unwrap();
359 assert!(result.content.contains("Line 999"));
360 }
361
362 #[test]
363 fn test_parse_unicode_content() {
364 let parser = MarkdownParser::new();
365 let content = r#"---
366name: 测试代理
367description: 日本語のテスト
368---
369# 内容
370Ελληνικά
371العربية"#;
372
373 let result = parser.parse(content).unwrap();
374 assert!(result.frontmatter.unwrap().contains("测试代理"));
375 assert!(result.content.contains("Ελληνικά"));
376 }
377
378 #[test]
379 fn test_parse_windows_line_endings() {
380 let parser = MarkdownParser::new();
381 let content = "---\r\nname: test\r\n---\r\nBody";
382
383 let result = parser.parse(content).unwrap();
384 assert!(result.frontmatter.is_some());
385 }
386
387 #[test]
388 fn test_parse_mixed_line_endings() {
389 let parser = MarkdownParser::new();
390 let content = "---\nname: test\r\n---\nBody";
391
392 let result = parser.parse(content).unwrap();
393 assert!(result.frontmatter.is_some());
394 }
395
396 #[test]
397 fn test_parse_tabs_in_frontmatter() {
398 let parser = MarkdownParser::new();
399 let content = "---\nname:\ttest\n---\nBody";
400
401 let result = parser.parse(content).unwrap();
402 assert!(result.frontmatter.is_some());
403 }
404
405 #[test]
406 fn test_parse_empty_content() {
407 let parser = MarkdownParser::new();
408 let content = "";
409
410 let result = parser.parse(content).unwrap();
411 assert_eq!(result.frontmatter, None);
412 assert_eq!(result.content, "");
413 }
414
415 #[test]
416 fn test_parse_only_whitespace() {
417 let parser = MarkdownParser::new();
418 let content = " \n \n ";
419
420 let result = parser.parse(content).unwrap();
421 assert_eq!(result.frontmatter, None);
422 }
423}