agentic_navigation_guide/
parser.rs

1//! Parser for navigation guide markdown files
2
3use crate::errors::{Result, SyntaxError};
4use crate::types::{FilesystemItem, NavigationGuide, NavigationGuideLine};
5use regex::Regex;
6
7/// Parser for navigation guide markdown content
8pub struct Parser {
9    /// Regular expression for detecting list items
10    list_item_regex: Regex,
11    /// Regular expression for parsing path and comment
12    path_comment_regex: Regex,
13}
14
15impl Parser {
16    /// Create a new parser instance
17    pub fn new() -> Self {
18        Self {
19            list_item_regex: Regex::new(r"^(\s*)-\s+(.+)$").unwrap(),
20            path_comment_regex: Regex::new(r"^([^#]+?)(?:\s*#\s*(.*))?$").unwrap(),
21        }
22    }
23
24    /// Parse navigation guide content from a markdown string
25    pub fn parse(&self, content: &str) -> Result<NavigationGuide> {
26        // Find the guide block
27        let (prologue, guide_content, epilogue, line_offset) = self.extract_guide_block(content)?;
28
29        // Parse the guide content
30        let items = self.parse_guide_content(&guide_content, line_offset)?;
31
32        Ok(NavigationGuide {
33            items,
34            prologue,
35            epilogue,
36        })
37    }
38
39    /// Extract the guide block from the markdown content
40    fn extract_guide_block(
41        &self,
42        content: &str,
43    ) -> Result<(Option<String>, String, Option<String>, usize)> {
44        let lines: Vec<&str> = content.lines().collect();
45        let mut start_idx = None;
46        let mut end_idx = None;
47
48        // Find the opening and closing markers
49        for (idx, line) in lines.iter().enumerate() {
50            if line.trim() == "<agentic-navigation-guide>" {
51                if start_idx.is_some() {
52                    return Err(SyntaxError::MultipleGuideBlocks { line: idx + 1 }.into());
53                }
54                start_idx = Some(idx);
55            } else if line.trim() == "</agentic-navigation-guide>" {
56                end_idx = Some(idx);
57                break;
58            }
59        }
60
61        // Validate markers
62        let start = start_idx.ok_or(SyntaxError::MissingOpeningMarker { line: 1 })?;
63        let end = end_idx.ok_or(SyntaxError::MissingClosingMarker { line: lines.len() })?;
64
65        // Extract prologue, guide content, and epilogue
66        let prologue = if start > 0 {
67            Some(lines[..start].join("\n"))
68        } else {
69            None
70        };
71
72        let guide_content = lines[start + 1..end].join("\n");
73
74        let epilogue = if end + 1 < lines.len() {
75            Some(lines[end + 1..].join("\n"))
76        } else {
77            None
78        };
79
80        // Calculate line offset: prologue lines + opening tag line
81        let line_offset = start + 1;
82
83        Ok((prologue, guide_content, epilogue, line_offset))
84    }
85
86    /// Parse the guide content into navigation guide lines
87    fn parse_guide_content(
88        &self,
89        content: &str,
90        line_offset: usize,
91    ) -> Result<Vec<NavigationGuideLine>> {
92        if content.trim().is_empty() {
93            return Err(SyntaxError::EmptyGuideBlock.into());
94        }
95
96        let mut items = Vec::new();
97        let mut indent_size = None;
98        let lines: Vec<&str> = content.lines().collect();
99
100        for (idx, line) in lines.iter().enumerate() {
101            // Calculate the actual line number in the file
102            let line_number = idx + 1 + line_offset;
103
104            // Check for blank lines
105            if line.trim().is_empty() {
106                return Err(SyntaxError::BlankLineInGuide { line: line_number }.into());
107            }
108
109            // Parse the list item
110            if let Some(captures) = self.list_item_regex.captures(line) {
111                let indent = captures.get(1).unwrap().as_str().len();
112                let content = captures.get(2).unwrap().as_str();
113
114                // Determine indent size from first indented item
115                if indent > 0 && indent_size.is_none() {
116                    indent_size = Some(indent);
117                }
118
119                // Validate indentation
120                let indent_level = if indent == 0 {
121                    0
122                } else if let Some(size) = indent_size {
123                    if indent % size != 0 {
124                        return Err(
125                            SyntaxError::InvalidIndentationLevel { line: line_number }.into()
126                        );
127                    }
128                    indent / size
129                } else {
130                    // First indented item
131                    1
132                };
133
134                // Parse path and comment
135                let (path, comment) = self.parse_path_comment(content, line_number)?;
136
137                // Determine item type
138                let item = if path.ends_with('/') {
139                    FilesystemItem::Directory {
140                        path: path.trim_end_matches('/').to_string(),
141                        comment,
142                        children: Vec::new(),
143                    }
144                } else {
145                    // Could be a file or symlink - we'll treat as file for now
146                    FilesystemItem::File { path, comment }
147                };
148
149                items.push(NavigationGuideLine {
150                    line_number,
151                    indent_level,
152                    item,
153                });
154            } else {
155                return Err(SyntaxError::InvalidListFormat { line: line_number }.into());
156            }
157        }
158
159        // Build the hierarchy
160        let hierarchical_items = self.build_hierarchy(items)?;
161
162        Ok(hierarchical_items)
163    }
164
165    /// Parse path and optional comment from item content
166    fn parse_path_comment(
167        &self,
168        content: &str,
169        line_number: usize,
170    ) -> Result<(String, Option<String>)> {
171        if let Some(captures) = self.path_comment_regex.captures(content) {
172            let path = captures.get(1).unwrap().as_str().trim().to_string();
173            let comment = captures.get(2).map(|m| m.as_str().trim().to_string());
174
175            // Validate path
176            if path.is_empty() {
177                return Err(SyntaxError::InvalidPathFormat {
178                    line: line_number,
179                    path: String::new(),
180                }
181                .into());
182            }
183
184            // Check for special directories
185            if path == "." || path == ".." || path == "./" || path == "../" {
186                return Err(SyntaxError::InvalidSpecialDirectory {
187                    line: line_number,
188                    path,
189                }
190                .into());
191            }
192
193            Ok((path, comment))
194        } else {
195            Err(SyntaxError::InvalidPathFormat {
196                line: line_number,
197                path: content.to_string(),
198            }
199            .into())
200        }
201    }
202
203    /// Build a hierarchical structure from flat list items
204    fn build_hierarchy(&self, items: Vec<NavigationGuideLine>) -> Result<Vec<NavigationGuideLine>> {
205        if items.is_empty() {
206            return Ok(Vec::new());
207        }
208
209        // First pass: organize items by their parent-child relationships
210        let mut result: Vec<NavigationGuideLine> = Vec::new();
211        let mut parent_indices: Vec<Option<usize>> = vec![None; items.len()];
212
213        // Find parent index for each item
214        for i in 0..items.len() {
215            let current_level = items[i].indent_level;
216
217            if current_level == 0 {
218                parent_indices[i] = None; // Root item
219            } else {
220                // Find the nearest preceding directory at level current_level - 1
221                let mut parent_found = false;
222                for j in (0..i).rev() {
223                    if items[j].indent_level == current_level - 1 && items[j].is_directory() {
224                        parent_indices[i] = Some(j);
225                        parent_found = true;
226                        break;
227                    } else if items[j].indent_level < current_level - 1 {
228                        // Gone too far up the hierarchy
229                        break;
230                    }
231                }
232
233                if !parent_found {
234                    return Err(SyntaxError::InvalidIndentationLevel {
235                        line: items[i].line_number,
236                    }
237                    .into());
238                }
239            }
240        }
241
242        // Second pass: build the tree
243        // We need to process items in reverse order to ensure children are complete before adding to parents
244        let mut processed_items: Vec<Option<NavigationGuideLine>> =
245            items.into_iter().map(Some).collect();
246
247        // Process from last to first
248        for i in (0..processed_items.len()).rev() {
249            if let Some(item) = processed_items[i].take() {
250                if let Some(parent_idx) = parent_indices[i] {
251                    // Add this item to its parent's children
252                    if let Some(ref mut parent) = processed_items[parent_idx] {
253                        match &mut parent.item {
254                            FilesystemItem::Directory { children, .. } => {
255                                // Insert at the beginning to maintain order
256                                children.insert(0, item);
257                            }
258                            _ => {
259                                return Err(SyntaxError::InvalidIndentationLevel {
260                                    line: item.line_number,
261                                }
262                                .into());
263                            }
264                        }
265                    }
266                } else {
267                    // Root item - add to result
268                    result.insert(0, item);
269                }
270            }
271        }
272
273        Ok(result)
274    }
275}
276
277impl Default for Parser {
278    fn default() -> Self {
279        Self::new()
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286
287    #[test]
288    fn test_parse_minimal_guide() {
289        let content = r#"<agentic-navigation-guide>
290- src/
291  - main.rs
292- Cargo.toml
293</agentic-navigation-guide>"#;
294
295        let parser = Parser::new();
296        let guide = parser.parse(content).unwrap();
297        assert_eq!(guide.items.len(), 2); // src/ and Cargo.toml at root level
298
299        // Check that src/ contains main.rs as a child
300        let src_item = &guide.items[0];
301        assert!(src_item.is_directory());
302        assert_eq!(src_item.path(), "src");
303
304        if let Some(children) = src_item.children() {
305            assert_eq!(children.len(), 1);
306            assert_eq!(children[0].path(), "main.rs");
307        } else {
308            panic!("src/ should have children");
309        }
310    }
311
312    #[test]
313    fn test_missing_opening_marker() {
314        let content = r#"- src/
315</agentic-navigation-guide>"#;
316
317        let parser = Parser::new();
318        let result = parser.parse(content);
319        assert!(matches!(
320            result,
321            Err(crate::errors::AppError::Syntax(
322                SyntaxError::MissingOpeningMarker { .. }
323            ))
324        ));
325    }
326
327    #[test]
328    fn test_parse_with_comments() {
329        let content = r#"<agentic-navigation-guide>
330- src/ # source code
331- Cargo.toml # project manifest
332</agentic-navigation-guide>"#;
333
334        let parser = Parser::new();
335        let guide = parser.parse(content).unwrap();
336        assert_eq!(guide.items.len(), 2);
337        assert_eq!(guide.items[0].comment(), Some("source code"));
338        assert_eq!(guide.items[1].comment(), Some("project manifest"));
339    }
340
341    #[test]
342    fn test_trailing_whitespace_allowed() {
343        let content = r#"<agentic-navigation-guide>
344- foo.rs  
345- bar.rs          
346- baz/     
347  - qux.rs      
348</agentic-navigation-guide>"#;
349
350        let parser = Parser::new();
351        let guide = parser.parse(content).unwrap();
352        assert_eq!(guide.items.len(), 3);
353        assert_eq!(guide.items[0].path(), "foo.rs");
354        assert_eq!(guide.items[1].path(), "bar.rs");
355        assert_eq!(guide.items[2].path(), "baz");
356
357        if let Some(children) = guide.items[2].children() {
358            assert_eq!(children.len(), 1);
359            assert_eq!(children[0].path(), "qux.rs");
360        } else {
361            panic!("baz/ should have children");
362        }
363    }
364}