agentic_navigation_guide/
parser.rs

1//! Parser for navigation guide markdown files
2
3use crate::errors::{Result, SyntaxError};
4use crate::types::{FilesystemItem, NavigationGuide, NavigationGuideLine};
5use regex::Regex;
6
7/// Parser for navigation guide markdown content
8pub struct Parser {
9    /// Regular expression for detecting list items
10    list_item_regex: Regex,
11    /// Regular expression for parsing path and comment
12    path_comment_regex: Regex,
13}
14
15impl Parser {
16    /// Create a new parser instance
17    pub fn new() -> Self {
18        Self {
19            list_item_regex: Regex::new(r"^(\s*)-\s+(.+)$").unwrap(),
20            path_comment_regex: Regex::new(r"^([^#]+?)(?:\s*#\s*(.*))?$").unwrap(),
21        }
22    }
23
24    /// Parse navigation guide content from a markdown string
25    pub fn parse(&self, content: &str) -> Result<NavigationGuide> {
26        // Find the guide block
27        let (prologue, guide_content, epilogue, line_offset) = self.extract_guide_block(content)?;
28
29        // Parse the guide content
30        let items = self.parse_guide_content(&guide_content, line_offset)?;
31
32        Ok(NavigationGuide {
33            items,
34            prologue,
35            epilogue,
36        })
37    }
38
39    /// Extract the guide block from the markdown content
40    fn extract_guide_block(
41        &self,
42        content: &str,
43    ) -> Result<(Option<String>, String, Option<String>, usize)> {
44        let lines: Vec<&str> = content.lines().collect();
45        let mut start_idx = None;
46        let mut end_idx = None;
47
48        // Find the opening and closing markers
49        for (idx, line) in lines.iter().enumerate() {
50            if line.trim() == "<agentic-navigation-guide>" {
51                if start_idx.is_some() {
52                    return Err(SyntaxError::MultipleGuideBlocks { line: idx + 1 }.into());
53                }
54                start_idx = Some(idx);
55            } else if line.trim() == "</agentic-navigation-guide>" {
56                end_idx = Some(idx);
57                break;
58            }
59        }
60
61        // Validate markers
62        let start = start_idx.ok_or(SyntaxError::MissingOpeningMarker { line: 1 })?;
63        let end = end_idx.ok_or(SyntaxError::MissingClosingMarker { line: lines.len() })?;
64
65        // Extract prologue, guide content, and epilogue
66        let prologue = if start > 0 {
67            Some(lines[..start].join("\n"))
68        } else {
69            None
70        };
71
72        let guide_content = lines[start + 1..end].join("\n");
73
74        let epilogue = if end + 1 < lines.len() {
75            Some(lines[end + 1..].join("\n"))
76        } else {
77            None
78        };
79
80        // Calculate line offset: prologue lines + opening tag line
81        let line_offset = start + 1;
82
83        Ok((prologue, guide_content, epilogue, line_offset))
84    }
85
86    /// Parse the guide content into navigation guide lines
87    fn parse_guide_content(&self, content: &str, line_offset: usize) -> Result<Vec<NavigationGuideLine>> {
88        if content.trim().is_empty() {
89            return Err(SyntaxError::EmptyGuideBlock.into());
90        }
91
92        let mut items = Vec::new();
93        let mut indent_size = None;
94        let lines: Vec<&str> = content.lines().collect();
95
96        for (idx, line) in lines.iter().enumerate() {
97            // Calculate the actual line number in the file
98            let line_number = idx + 1 + line_offset;
99
100            // Check for blank lines
101            if line.trim().is_empty() {
102                return Err(SyntaxError::BlankLineInGuide { line: line_number }.into());
103            }
104
105            // Parse the list item
106            if let Some(captures) = self.list_item_regex.captures(line) {
107                let indent = captures.get(1).unwrap().as_str().len();
108                let content = captures.get(2).unwrap().as_str();
109
110                // Determine indent size from first indented item
111                if indent > 0 && indent_size.is_none() {
112                    indent_size = Some(indent);
113                }
114
115                // Validate indentation
116                let indent_level = if indent == 0 {
117                    0
118                } else if let Some(size) = indent_size {
119                    if indent % size != 0 {
120                        return Err(
121                            SyntaxError::InvalidIndentationLevel { line: line_number }.into()
122                        );
123                    }
124                    indent / size
125                } else {
126                    // First indented item
127                    1
128                };
129
130                // Parse path and comment
131                let (path, comment) = self.parse_path_comment(content, line_number)?;
132
133                // Determine item type
134                let item = if path.ends_with('/') {
135                    FilesystemItem::Directory {
136                        path: path.trim_end_matches('/').to_string(),
137                        comment,
138                        children: Vec::new(),
139                    }
140                } else {
141                    // Could be a file or symlink - we'll treat as file for now
142                    FilesystemItem::File { path, comment }
143                };
144
145                items.push(NavigationGuideLine {
146                    line_number,
147                    indent_level,
148                    item,
149                });
150            } else {
151                return Err(SyntaxError::InvalidListFormat { line: line_number }.into());
152            }
153        }
154
155        // Build the hierarchy
156        let hierarchical_items = self.build_hierarchy(items)?;
157
158        Ok(hierarchical_items)
159    }
160
161    /// Parse path and optional comment from item content
162    fn parse_path_comment(
163        &self,
164        content: &str,
165        line_number: usize,
166    ) -> Result<(String, Option<String>)> {
167        if let Some(captures) = self.path_comment_regex.captures(content) {
168            let path = captures.get(1).unwrap().as_str().trim().to_string();
169            let comment = captures.get(2).map(|m| m.as_str().trim().to_string());
170
171            // Validate path
172            if path.is_empty() {
173                return Err(SyntaxError::InvalidPathFormat {
174                    line: line_number,
175                    path: String::new(),
176                }
177                .into());
178            }
179
180            // Check for special directories
181            if path == "." || path == ".." || path == "./" || path == "../" {
182                return Err(SyntaxError::InvalidSpecialDirectory {
183                    line: line_number,
184                    path,
185                }
186                .into());
187            }
188
189            Ok((path, comment))
190        } else {
191            Err(SyntaxError::InvalidPathFormat {
192                line: line_number,
193                path: content.to_string(),
194            }
195            .into())
196        }
197    }
198
199    /// Build a hierarchical structure from flat list items
200    fn build_hierarchy(&self, items: Vec<NavigationGuideLine>) -> Result<Vec<NavigationGuideLine>> {
201        if items.is_empty() {
202            return Ok(Vec::new());
203        }
204
205        // First pass: organize items by their parent-child relationships
206        let mut result: Vec<NavigationGuideLine> = Vec::new();
207        let mut parent_indices: Vec<Option<usize>> = vec![None; items.len()];
208
209        // Find parent index for each item
210        for i in 0..items.len() {
211            let current_level = items[i].indent_level;
212
213            if current_level == 0 {
214                parent_indices[i] = None; // Root item
215            } else {
216                // Find the nearest preceding directory at level current_level - 1
217                let mut parent_found = false;
218                for j in (0..i).rev() {
219                    if items[j].indent_level == current_level - 1 && items[j].is_directory() {
220                        parent_indices[i] = Some(j);
221                        parent_found = true;
222                        break;
223                    } else if items[j].indent_level < current_level - 1 {
224                        // Gone too far up the hierarchy
225                        break;
226                    }
227                }
228
229                if !parent_found {
230                    return Err(SyntaxError::InvalidIndentationLevel {
231                        line: items[i].line_number,
232                    }
233                    .into());
234                }
235            }
236        }
237
238        // Second pass: build the tree
239        // We need to process items in reverse order to ensure children are complete before adding to parents
240        let mut processed_items: Vec<Option<NavigationGuideLine>> =
241            items.into_iter().map(Some).collect();
242
243        // Process from last to first
244        for i in (0..processed_items.len()).rev() {
245            if let Some(item) = processed_items[i].take() {
246                if let Some(parent_idx) = parent_indices[i] {
247                    // Add this item to its parent's children
248                    if let Some(ref mut parent) = processed_items[parent_idx] {
249                        match &mut parent.item {
250                            FilesystemItem::Directory { children, .. } => {
251                                // Insert at the beginning to maintain order
252                                children.insert(0, item);
253                            }
254                            _ => {
255                                return Err(SyntaxError::InvalidIndentationLevel {
256                                    line: item.line_number,
257                                }
258                                .into());
259                            }
260                        }
261                    }
262                } else {
263                    // Root item - add to result
264                    result.insert(0, item);
265                }
266            }
267        }
268
269        Ok(result)
270    }
271}
272
273impl Default for Parser {
274    fn default() -> Self {
275        Self::new()
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    #[test]
284    fn test_parse_minimal_guide() {
285        let content = r#"<agentic-navigation-guide>
286- src/
287  - main.rs
288- Cargo.toml
289</agentic-navigation-guide>"#;
290
291        let parser = Parser::new();
292        let guide = parser.parse(content).unwrap();
293        assert_eq!(guide.items.len(), 2); // src/ and Cargo.toml at root level
294
295        // Check that src/ contains main.rs as a child
296        let src_item = &guide.items[0];
297        assert!(src_item.is_directory());
298        assert_eq!(src_item.path(), "src");
299
300        if let Some(children) = src_item.children() {
301            assert_eq!(children.len(), 1);
302            assert_eq!(children[0].path(), "main.rs");
303        } else {
304            panic!("src/ should have children");
305        }
306    }
307
308    #[test]
309    fn test_missing_opening_marker() {
310        let content = r#"- src/
311</agentic-navigation-guide>"#;
312
313        let parser = Parser::new();
314        let result = parser.parse(content);
315        assert!(matches!(
316            result,
317            Err(crate::errors::AppError::Syntax(
318                SyntaxError::MissingOpeningMarker { .. }
319            ))
320        ));
321    }
322
323    #[test]
324    fn test_parse_with_comments() {
325        let content = r#"<agentic-navigation-guide>
326- src/ # source code
327- Cargo.toml # project manifest
328</agentic-navigation-guide>"#;
329
330        let parser = Parser::new();
331        let guide = parser.parse(content).unwrap();
332        assert_eq!(guide.items.len(), 2);
333        assert_eq!(guide.items[0].comment(), Some("source code"));
334        assert_eq!(guide.items[1].comment(), Some("project manifest"));
335    }
336
337    #[test]
338    fn test_trailing_whitespace_allowed() {
339        let content = r#"<agentic-navigation-guide>
340- foo.rs  
341- bar.rs          
342- baz/     
343  - qux.rs      
344</agentic-navigation-guide>"#;
345
346        let parser = Parser::new();
347        let guide = parser.parse(content).unwrap();
348        assert_eq!(guide.items.len(), 3);
349        assert_eq!(guide.items[0].path(), "foo.rs");
350        assert_eq!(guide.items[1].path(), "bar.rs");
351        assert_eq!(guide.items[2].path(), "baz");
352        
353        if let Some(children) = guide.items[2].children() {
354            assert_eq!(children.len(), 1);
355            assert_eq!(children[0].path(), "qux.rs");
356        } else {
357            panic!("baz/ should have children");
358        }
359    }
360}