agentic_navigation_guide/
parser.rs

1//! Parser for navigation guide markdown files
2
3use crate::errors::{Result, SyntaxError};
4use crate::types::{FilesystemItem, NavigationGuide, NavigationGuideLine};
5use regex::Regex;
6
7/// Parser for navigation guide markdown content
8pub struct Parser {
9    /// Regular expression for detecting list items
10    list_item_regex: Regex,
11    /// Regular expression for parsing path and comment
12    path_comment_regex: Regex,
13}
14
15impl Parser {
16    /// Create a new parser instance
17    pub fn new() -> Self {
18        Self {
19            list_item_regex: Regex::new(r"^(\s*)-\s+(.+)$").unwrap(),
20            path_comment_regex: Regex::new(r"^([^#]+?)(?:\s*#\s*(.*))?$").unwrap(),
21        }
22    }
23
24    /// Parse navigation guide content from a markdown string
25    pub fn parse(&self, content: &str) -> Result<NavigationGuide> {
26        // Find the guide block
27        let (prologue, guide_content, epilogue, line_offset) = self.extract_guide_block(content)?;
28
29        // Parse the guide content
30        let items = self.parse_guide_content(&guide_content, line_offset)?;
31
32        Ok(NavigationGuide {
33            items,
34            prologue,
35            epilogue,
36        })
37    }
38
39    /// Extract the guide block from the markdown content
40    fn extract_guide_block(
41        &self,
42        content: &str,
43    ) -> Result<(Option<String>, String, Option<String>, usize)> {
44        let lines: Vec<&str> = content.lines().collect();
45        let mut start_idx = None;
46        let mut end_idx = None;
47
48        // Find the opening and closing markers
49        for (idx, line) in lines.iter().enumerate() {
50            if line.trim() == "<agentic-navigation-guide>" {
51                if start_idx.is_some() {
52                    return Err(SyntaxError::MultipleGuideBlocks { line: idx + 1 }.into());
53                }
54                start_idx = Some(idx);
55            } else if line.trim() == "</agentic-navigation-guide>" {
56                end_idx = Some(idx);
57                break;
58            }
59        }
60
61        // Validate markers
62        let start = start_idx.ok_or(SyntaxError::MissingOpeningMarker { line: 1 })?;
63        let end = end_idx.ok_or(SyntaxError::MissingClosingMarker { line: lines.len() })?;
64
65        // Extract prologue, guide content, and epilogue
66        let prologue = if start > 0 {
67            Some(lines[..start].join("\n"))
68        } else {
69            None
70        };
71
72        let guide_content = lines[start + 1..end].join("\n");
73
74        let epilogue = if end + 1 < lines.len() {
75            Some(lines[end + 1..].join("\n"))
76        } else {
77            None
78        };
79
80        // Calculate line offset: prologue lines + opening tag line
81        let line_offset = start + 1;
82
83        Ok((prologue, guide_content, epilogue, line_offset))
84    }
85
86    /// Parse the guide content into navigation guide lines
87    fn parse_guide_content(
88        &self,
89        content: &str,
90        line_offset: usize,
91    ) -> Result<Vec<NavigationGuideLine>> {
92        if content.trim().is_empty() {
93            return Err(SyntaxError::EmptyGuideBlock.into());
94        }
95
96        let mut items = Vec::new();
97        let mut indent_size = None;
98        let lines: Vec<&str> = content.lines().collect();
99
100        for (idx, line) in lines.iter().enumerate() {
101            // Calculate the actual line number in the file
102            let line_number = idx + 1 + line_offset;
103
104            // Check for blank lines
105            if line.trim().is_empty() {
106                return Err(SyntaxError::BlankLineInGuide { line: line_number }.into());
107            }
108
109            // Parse the list item
110            if let Some(captures) = self.list_item_regex.captures(line) {
111                let indent = captures.get(1).unwrap().as_str().len();
112                let content = captures.get(2).unwrap().as_str();
113
114                // Determine indent size from first indented item
115                if indent > 0 && indent_size.is_none() {
116                    indent_size = Some(indent);
117                }
118
119                // Validate indentation
120                let indent_level = if indent == 0 {
121                    0
122                } else if let Some(size) = indent_size {
123                    if indent % size != 0 {
124                        return Err(
125                            SyntaxError::InvalidIndentationLevel { line: line_number }.into()
126                        );
127                    }
128                    indent / size
129                } else {
130                    // First indented item
131                    1
132                };
133
134                // Parse path and comment
135                let (path, comment) = self.parse_path_comment(content, line_number)?;
136
137                // Determine item type
138                let item = if path == "..." {
139                    FilesystemItem::Placeholder { comment }
140                } else if path.ends_with('/') {
141                    FilesystemItem::Directory {
142                        path: path.trim_end_matches('/').to_string(),
143                        comment,
144                        children: Vec::new(),
145                    }
146                } else {
147                    // Could be a file or symlink - we'll treat as file for now
148                    FilesystemItem::File { path, comment }
149                };
150
151                items.push(NavigationGuideLine {
152                    line_number,
153                    indent_level,
154                    item,
155                });
156            } else {
157                return Err(SyntaxError::InvalidListFormat { line: line_number }.into());
158            }
159        }
160
161        // Build the hierarchy
162        let hierarchical_items = self.build_hierarchy(items)?;
163
164        Ok(hierarchical_items)
165    }
166
167    /// Parse path and optional comment from item content
168    fn parse_path_comment(
169        &self,
170        content: &str,
171        line_number: usize,
172    ) -> Result<(String, Option<String>)> {
173        if let Some(captures) = self.path_comment_regex.captures(content) {
174            let path = captures.get(1).unwrap().as_str().trim().to_string();
175            let comment = captures.get(2).map(|m| m.as_str().trim().to_string());
176
177            // Validate path
178            if path.is_empty() {
179                return Err(SyntaxError::InvalidPathFormat {
180                    line: line_number,
181                    path: String::new(),
182                }
183                .into());
184            }
185
186            // Check for special directories (but allow "..." placeholder)
187            if path == "..." {
188                // Allowed as placeholder
189            } else if path == "." || path == ".." || path == "./" || path == "../" {
190                return Err(SyntaxError::InvalidSpecialDirectory {
191                    line: line_number,
192                    path,
193                }
194                .into());
195            }
196
197            Ok((path, comment))
198        } else {
199            Err(SyntaxError::InvalidPathFormat {
200                line: line_number,
201                path: content.to_string(),
202            }
203            .into())
204        }
205    }
206
207    /// Build a hierarchical structure from flat list items
208    fn build_hierarchy(&self, items: Vec<NavigationGuideLine>) -> Result<Vec<NavigationGuideLine>> {
209        if items.is_empty() {
210            return Ok(Vec::new());
211        }
212
213        // First pass: organize items by their parent-child relationships
214        let mut result: Vec<NavigationGuideLine> = Vec::new();
215        let mut parent_indices: Vec<Option<usize>> = vec![None; items.len()];
216
217        // Find parent index for each item
218        for i in 0..items.len() {
219            let current_level = items[i].indent_level;
220
221            if current_level == 0 {
222                parent_indices[i] = None; // Root item
223            } else {
224                // Find the nearest preceding directory at level current_level - 1
225                let mut parent_found = false;
226                for j in (0..i).rev() {
227                    if items[j].indent_level == current_level - 1 && items[j].is_directory() {
228                        parent_indices[i] = Some(j);
229                        parent_found = true;
230                        break;
231                    } else if items[j].indent_level < current_level - 1 {
232                        // Gone too far up the hierarchy
233                        break;
234                    }
235                }
236
237                if !parent_found {
238                    return Err(SyntaxError::InvalidIndentationLevel {
239                        line: items[i].line_number,
240                    }
241                    .into());
242                }
243            }
244        }
245
246        // Second pass: build the tree
247        // We need to process items in reverse order to ensure children are complete before adding to parents
248        let mut processed_items: Vec<Option<NavigationGuideLine>> =
249            items.into_iter().map(Some).collect();
250
251        // Process from last to first
252        for i in (0..processed_items.len()).rev() {
253            if let Some(item) = processed_items[i].take() {
254                if let Some(parent_idx) = parent_indices[i] {
255                    // Add this item to its parent's children
256                    if let Some(ref mut parent) = processed_items[parent_idx] {
257                        match &mut parent.item {
258                            FilesystemItem::Directory { children, .. } => {
259                                // Insert at the beginning to maintain order
260                                children.insert(0, item);
261                            }
262                            _ => {
263                                return Err(SyntaxError::InvalidIndentationLevel {
264                                    line: item.line_number,
265                                }
266                                .into());
267                            }
268                        }
269                    }
270                } else {
271                    // Root item - add to result
272                    result.insert(0, item);
273                }
274            }
275        }
276
277        Ok(result)
278    }
279}
280
281impl Default for Parser {
282    fn default() -> Self {
283        Self::new()
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290
291    #[test]
292    fn test_parse_minimal_guide() {
293        let content = r#"<agentic-navigation-guide>
294- src/
295  - main.rs
296- Cargo.toml
297</agentic-navigation-guide>"#;
298
299        let parser = Parser::new();
300        let guide = parser.parse(content).unwrap();
301        assert_eq!(guide.items.len(), 2); // src/ and Cargo.toml at root level
302
303        // Check that src/ contains main.rs as a child
304        let src_item = &guide.items[0];
305        assert!(src_item.is_directory());
306        assert_eq!(src_item.path(), "src");
307
308        if let Some(children) = src_item.children() {
309            assert_eq!(children.len(), 1);
310            assert_eq!(children[0].path(), "main.rs");
311        } else {
312            panic!("src/ should have children");
313        }
314    }
315
316    #[test]
317    fn test_missing_opening_marker() {
318        let content = r#"- src/
319</agentic-navigation-guide>"#;
320
321        let parser = Parser::new();
322        let result = parser.parse(content);
323        assert!(matches!(
324            result,
325            Err(crate::errors::AppError::Syntax(
326                SyntaxError::MissingOpeningMarker { .. }
327            ))
328        ));
329    }
330
331    #[test]
332    fn test_parse_with_comments() {
333        let content = r#"<agentic-navigation-guide>
334- src/ # source code
335- Cargo.toml # project manifest
336</agentic-navigation-guide>"#;
337
338        let parser = Parser::new();
339        let guide = parser.parse(content).unwrap();
340        assert_eq!(guide.items.len(), 2);
341        assert_eq!(guide.items[0].comment(), Some("source code"));
342        assert_eq!(guide.items[1].comment(), Some("project manifest"));
343    }
344
345    #[test]
346    fn test_trailing_whitespace_allowed() {
347        let content = r#"<agentic-navigation-guide>
348- foo.rs  
349- bar.rs          
350- baz/     
351  - qux.rs      
352</agentic-navigation-guide>"#;
353
354        let parser = Parser::new();
355        let guide = parser.parse(content).unwrap();
356        assert_eq!(guide.items.len(), 3);
357        assert_eq!(guide.items[0].path(), "foo.rs");
358        assert_eq!(guide.items[1].path(), "bar.rs");
359        assert_eq!(guide.items[2].path(), "baz");
360
361        if let Some(children) = guide.items[2].children() {
362            assert_eq!(children.len(), 1);
363            assert_eq!(children[0].path(), "qux.rs");
364        } else {
365            panic!("baz/ should have children");
366        }
367    }
368
369    #[test]
370    fn test_parse_placeholder() {
371        let content = r#"<agentic-navigation-guide>
372- src/
373  - main.rs
374  - ... # other source files
375- docs/
376  - README.md
377  - ...
378</agentic-navigation-guide>"#;
379
380        let parser = Parser::new();
381        let guide = parser.parse(content).unwrap();
382        assert_eq!(guide.items.len(), 2); // src/ and docs/ at root level
383
384        // Check src/ contains main.rs and a placeholder
385        let src_item = &guide.items[0];
386        if let Some(children) = src_item.children() {
387            assert_eq!(children.len(), 2);
388            assert_eq!(children[0].path(), "main.rs");
389            assert!(children[1].is_placeholder());
390            assert_eq!(children[1].comment(), Some("other source files"));
391        } else {
392            panic!("src/ should have children");
393        }
394
395        // Check docs/ contains README.md and a placeholder
396        let docs_item = &guide.items[1];
397        if let Some(children) = docs_item.children() {
398            assert_eq!(children.len(), 2);
399            assert_eq!(children[0].path(), "README.md");
400            assert!(children[1].is_placeholder());
401            assert_eq!(children[1].comment(), None);
402        } else {
403            panic!("docs/ should have children");
404        }
405    }
406}