agentic_navigation_guide/
parser.rs

1//! Parser for navigation guide markdown files
2
3use crate::errors::{Result, SyntaxError};
4use crate::types::{FilesystemItem, NavigationGuide, NavigationGuideLine};
5use regex::Regex;
6
7/// Parser for navigation guide markdown content
8pub struct Parser {
9    /// Regular expression for detecting list items
10    list_item_regex: Regex,
11    /// Regular expression for parsing path and comment
12    path_comment_regex: Regex,
13}
14
15impl Parser {
16    /// Create a new parser instance
17    pub fn new() -> Self {
18        Self {
19            list_item_regex: Regex::new(r"^(\s*)-\s+(.+)$").unwrap(),
20            path_comment_regex: Regex::new(r"^([^#]+?)(?:\s*#\s*(.*))?$").unwrap(),
21        }
22    }
23
24    /// Parse navigation guide content from a markdown string
25    pub fn parse(&self, content: &str) -> Result<NavigationGuide> {
26        // Find the guide block
27        let (prologue, guide_content, epilogue, line_offset, ignore) =
28            self.extract_guide_block(content)?;
29
30        // Parse the guide content
31        let items = self.parse_guide_content(&guide_content, line_offset)?;
32
33        Ok(NavigationGuide {
34            items,
35            prologue,
36            epilogue,
37            ignore,
38        })
39    }
40
41    /// Extract the guide block from the markdown content
42    #[allow(clippy::type_complexity)]
43    fn extract_guide_block(
44        &self,
45        content: &str,
46    ) -> Result<(Option<String>, String, Option<String>, usize, bool)> {
47        let lines: Vec<&str> = content.lines().collect();
48        let mut start_idx = None;
49        let mut end_idx = None;
50        let mut ignore = false;
51
52        // Find the opening and closing markers
53        for (idx, line) in lines.iter().enumerate() {
54            let trimmed = line.trim();
55
56            // Check for opening tag with or without attributes
57            if trimmed.starts_with("<agentic-navigation-guide") && trimmed.ends_with(">") {
58                if start_idx.is_some() {
59                    return Err(SyntaxError::MultipleGuideBlocks { line: idx + 1 }.into());
60                }
61                start_idx = Some(idx);
62
63                // Parse ignore attribute if present
64                ignore = self.parse_ignore_attribute(trimmed);
65            } else if trimmed == "</agentic-navigation-guide>" {
66                end_idx = Some(idx);
67                break;
68            }
69        }
70
71        // Validate markers
72        let start = start_idx.ok_or(SyntaxError::MissingOpeningMarker { line: 1 })?;
73        let end = end_idx.ok_or(SyntaxError::MissingClosingMarker { line: lines.len() })?;
74
75        // Extract prologue, guide content, and epilogue
76        let prologue = if start > 0 {
77            Some(lines[..start].join("\n"))
78        } else {
79            None
80        };
81
82        let guide_content = lines[start + 1..end].join("\n");
83
84        let epilogue = if end + 1 < lines.len() {
85            Some(lines[end + 1..].join("\n"))
86        } else {
87            None
88        };
89
90        // Calculate line offset: prologue lines + opening tag line
91        let line_offset = start + 1;
92
93        Ok((prologue, guide_content, epilogue, line_offset, ignore))
94    }
95
96    /// Parse the ignore attribute from the opening tag
97    /// Supports both `ignore=true` and `ignore="true"` formats
98    fn parse_ignore_attribute(&self, tag: &str) -> bool {
99        // Check for ignore=true or ignore="true"
100        if tag.contains("ignore=true") || tag.contains("ignore=\"true\"") {
101            return true;
102        }
103        false
104    }
105
106    /// Parse the guide content into navigation guide lines
107    fn parse_guide_content(
108        &self,
109        content: &str,
110        line_offset: usize,
111    ) -> Result<Vec<NavigationGuideLine>> {
112        if content.trim().is_empty() {
113            return Err(SyntaxError::EmptyGuideBlock.into());
114        }
115
116        let mut items = Vec::new();
117        let mut indent_size = None;
118        let lines: Vec<&str> = content.lines().collect();
119
120        for (idx, line) in lines.iter().enumerate() {
121            // Calculate the actual line number in the file
122            let line_number = idx + 1 + line_offset;
123
124            // Check for blank lines
125            if line.trim().is_empty() {
126                return Err(SyntaxError::BlankLineInGuide { line: line_number }.into());
127            }
128
129            // Parse the list item
130            if let Some(captures) = self.list_item_regex.captures(line) {
131                let indent = captures.get(1).unwrap().as_str().len();
132                let content = captures.get(2).unwrap().as_str();
133
134                // Determine indent size from first indented item
135                if indent > 0 && indent_size.is_none() {
136                    indent_size = Some(indent);
137                }
138
139                // Validate indentation
140                let indent_level = if indent == 0 {
141                    0
142                } else if let Some(size) = indent_size {
143                    if indent % size != 0 {
144                        return Err(
145                            SyntaxError::InvalidIndentationLevel { line: line_number }.into()
146                        );
147                    }
148                    indent / size
149                } else {
150                    // First indented item
151                    1
152                };
153
154                // Parse path and comment
155                let (path, comment) = self.parse_path_comment(content, line_number)?;
156
157                // Determine item type
158                let item = if path == "..." {
159                    FilesystemItem::Placeholder { comment }
160                } else if path.ends_with('/') {
161                    FilesystemItem::Directory {
162                        path: path.trim_end_matches('/').to_string(),
163                        comment,
164                        children: Vec::new(),
165                    }
166                } else {
167                    // Could be a file or symlink - we'll treat as file for now
168                    FilesystemItem::File { path, comment }
169                };
170
171                items.push(NavigationGuideLine {
172                    line_number,
173                    indent_level,
174                    item,
175                });
176            } else {
177                return Err(SyntaxError::InvalidListFormat { line: line_number }.into());
178            }
179        }
180
181        // Build the hierarchy
182        let hierarchical_items = self.build_hierarchy(items)?;
183
184        Ok(hierarchical_items)
185    }
186
187    /// Parse path and optional comment from item content
188    fn parse_path_comment(
189        &self,
190        content: &str,
191        line_number: usize,
192    ) -> Result<(String, Option<String>)> {
193        if let Some(captures) = self.path_comment_regex.captures(content) {
194            let path = captures.get(1).unwrap().as_str().trim().to_string();
195            let comment = captures.get(2).map(|m| m.as_str().trim().to_string());
196
197            // Validate path
198            if path.is_empty() {
199                return Err(SyntaxError::InvalidPathFormat {
200                    line: line_number,
201                    path: String::new(),
202                }
203                .into());
204            }
205
206            // Check for special directories (but allow "..." placeholder)
207            if path == "..." {
208                // Allowed as placeholder
209            } else if path == "." || path == ".." || path == "./" || path == "../" {
210                return Err(SyntaxError::InvalidSpecialDirectory {
211                    line: line_number,
212                    path,
213                }
214                .into());
215            }
216
217            Ok((path, comment))
218        } else {
219            Err(SyntaxError::InvalidPathFormat {
220                line: line_number,
221                path: content.to_string(),
222            }
223            .into())
224        }
225    }
226
227    /// Build a hierarchical structure from flat list items
228    fn build_hierarchy(&self, items: Vec<NavigationGuideLine>) -> Result<Vec<NavigationGuideLine>> {
229        if items.is_empty() {
230            return Ok(Vec::new());
231        }
232
233        // First pass: organize items by their parent-child relationships
234        let mut result: Vec<NavigationGuideLine> = Vec::new();
235        let mut parent_indices: Vec<Option<usize>> = vec![None; items.len()];
236
237        // Find parent index for each item
238        for i in 0..items.len() {
239            let current_level = items[i].indent_level;
240
241            if current_level == 0 {
242                parent_indices[i] = None; // Root item
243            } else {
244                // Find the nearest preceding directory at level current_level - 1
245                let mut parent_found = false;
246                for j in (0..i).rev() {
247                    if items[j].indent_level == current_level - 1 && items[j].is_directory() {
248                        parent_indices[i] = Some(j);
249                        parent_found = true;
250                        break;
251                    } else if items[j].indent_level < current_level - 1 {
252                        // Gone too far up the hierarchy
253                        break;
254                    }
255                }
256
257                if !parent_found {
258                    return Err(SyntaxError::InvalidIndentationLevel {
259                        line: items[i].line_number,
260                    }
261                    .into());
262                }
263            }
264        }
265
266        // Second pass: build the tree
267        // We need to process items in reverse order to ensure children are complete before adding to parents
268        let mut processed_items: Vec<Option<NavigationGuideLine>> =
269            items.into_iter().map(Some).collect();
270
271        // Process from last to first
272        for i in (0..processed_items.len()).rev() {
273            if let Some(item) = processed_items[i].take() {
274                if let Some(parent_idx) = parent_indices[i] {
275                    // Add this item to its parent's children
276                    if let Some(ref mut parent) = processed_items[parent_idx] {
277                        match &mut parent.item {
278                            FilesystemItem::Directory { children, .. } => {
279                                // Insert at the beginning to maintain order
280                                children.insert(0, item);
281                            }
282                            _ => {
283                                return Err(SyntaxError::InvalidIndentationLevel {
284                                    line: item.line_number,
285                                }
286                                .into());
287                            }
288                        }
289                    }
290                } else {
291                    // Root item - add to result
292                    result.insert(0, item);
293                }
294            }
295        }
296
297        Ok(result)
298    }
299}
300
301impl Default for Parser {
302    fn default() -> Self {
303        Self::new()
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    #[test]
312    fn test_parse_minimal_guide() {
313        let content = r#"<agentic-navigation-guide>
314- src/
315  - main.rs
316- Cargo.toml
317</agentic-navigation-guide>"#;
318
319        let parser = Parser::new();
320        let guide = parser.parse(content).unwrap();
321        assert_eq!(guide.items.len(), 2); // src/ and Cargo.toml at root level
322
323        // Check that src/ contains main.rs as a child
324        let src_item = &guide.items[0];
325        assert!(src_item.is_directory());
326        assert_eq!(src_item.path(), "src");
327
328        if let Some(children) = src_item.children() {
329            assert_eq!(children.len(), 1);
330            assert_eq!(children[0].path(), "main.rs");
331        } else {
332            panic!("src/ should have children");
333        }
334    }
335
336    #[test]
337    fn test_missing_opening_marker() {
338        let content = r#"- src/
339</agentic-navigation-guide>"#;
340
341        let parser = Parser::new();
342        let result = parser.parse(content);
343        assert!(matches!(
344            result,
345            Err(crate::errors::AppError::Syntax(
346                SyntaxError::MissingOpeningMarker { .. }
347            ))
348        ));
349    }
350
351    #[test]
352    fn test_parse_with_comments() {
353        let content = r#"<agentic-navigation-guide>
354- src/ # source code
355- Cargo.toml # project manifest
356</agentic-navigation-guide>"#;
357
358        let parser = Parser::new();
359        let guide = parser.parse(content).unwrap();
360        assert_eq!(guide.items.len(), 2);
361        assert_eq!(guide.items[0].comment(), Some("source code"));
362        assert_eq!(guide.items[1].comment(), Some("project manifest"));
363    }
364
365    #[test]
366    fn test_trailing_whitespace_allowed() {
367        let content = r#"<agentic-navigation-guide>
368- foo.rs  
369- bar.rs          
370- baz/     
371  - qux.rs      
372</agentic-navigation-guide>"#;
373
374        let parser = Parser::new();
375        let guide = parser.parse(content).unwrap();
376        assert_eq!(guide.items.len(), 3);
377        assert_eq!(guide.items[0].path(), "foo.rs");
378        assert_eq!(guide.items[1].path(), "bar.rs");
379        assert_eq!(guide.items[2].path(), "baz");
380
381        if let Some(children) = guide.items[2].children() {
382            assert_eq!(children.len(), 1);
383            assert_eq!(children[0].path(), "qux.rs");
384        } else {
385            panic!("baz/ should have children");
386        }
387    }
388
389    #[test]
390    fn test_parse_placeholder() {
391        let content = r#"<agentic-navigation-guide>
392- src/
393  - main.rs
394  - ... # other source files
395- docs/
396  - README.md
397  - ...
398</agentic-navigation-guide>"#;
399
400        let parser = Parser::new();
401        let guide = parser.parse(content).unwrap();
402        assert_eq!(guide.items.len(), 2); // src/ and docs/ at root level
403
404        // Check src/ contains main.rs and a placeholder
405        let src_item = &guide.items[0];
406        if let Some(children) = src_item.children() {
407            assert_eq!(children.len(), 2);
408            assert_eq!(children[0].path(), "main.rs");
409            assert!(children[1].is_placeholder());
410            assert_eq!(children[1].comment(), Some("other source files"));
411        } else {
412            panic!("src/ should have children");
413        }
414
415        // Check docs/ contains README.md and a placeholder
416        let docs_item = &guide.items[1];
417        if let Some(children) = docs_item.children() {
418            assert_eq!(children.len(), 2);
419            assert_eq!(children[0].path(), "README.md");
420            assert!(children[1].is_placeholder());
421            assert_eq!(children[1].comment(), None);
422        } else {
423            panic!("docs/ should have children");
424        }
425    }
426
427    #[test]
428    fn test_parse_ignore_attribute_unquoted() {
429        let content = r#"<agentic-navigation-guide ignore=true>
430- src/
431  - main.rs
432- Cargo.toml
433</agentic-navigation-guide>"#;
434
435        let parser = Parser::new();
436        let guide = parser.parse(content).unwrap();
437        assert!(guide.ignore);
438        assert_eq!(guide.items.len(), 2);
439    }
440
441    #[test]
442    fn test_parse_ignore_attribute_quoted() {
443        let content = r#"<agentic-navigation-guide ignore="true">
444- src/
445  - main.rs
446- Cargo.toml
447</agentic-navigation-guide>"#;
448
449        let parser = Parser::new();
450        let guide = parser.parse(content).unwrap();
451        assert!(guide.ignore);
452        assert_eq!(guide.items.len(), 2);
453    }
454
455    #[test]
456    fn test_parse_without_ignore_attribute() {
457        let content = r#"<agentic-navigation-guide>
458- src/
459  - main.rs
460- Cargo.toml
461</agentic-navigation-guide>"#;
462
463        let parser = Parser::new();
464        let guide = parser.parse(content).unwrap();
465        assert!(!guide.ignore);
466        assert_eq!(guide.items.len(), 2);
467    }
468
469    #[test]
470    fn test_parse_ignore_attribute_with_spaces() {
471        let content = r#"<agentic-navigation-guide  ignore=true  >
472- src/
473  - main.rs
474</agentic-navigation-guide>"#;
475
476        let parser = Parser::new();
477        let guide = parser.parse(content).unwrap();
478        assert!(guide.ignore);
479        assert_eq!(guide.items.len(), 1);
480    }
481}