Skip to main content

agm_core/parser/
mod.rs

1//! Parser: converts raw `.agm` text into an unvalidated AST.
2//!
3//! The parser is hand-written and line-oriented. Each line is classified
4//! (scalar field, list, block, node declaration, comment, blank) and
5//! indentation tracking determines field boundaries.
6
7pub mod fields;
8pub mod header;
9pub mod lexer;
10pub mod mem;
11pub mod node;
12pub mod sidecar;
13pub mod state;
14pub mod structured;
15
16pub use lexer::{Line, LineKind, classify_line, lex};
17
18use crate::error::{AgmError, ErrorCode, ErrorLocation};
19use crate::model::file::AgmFile;
20
21/// Result type for parser operations.
22pub type ParseResult<T> = Result<T, Vec<AgmError>>;
23
24/// Parses raw AGM source text into an unvalidated `AgmFile`.
25pub fn parse(input: &str) -> ParseResult<AgmFile> {
26    let lines = lex(input)?;
27    let mut pos = 0;
28    let mut errors = Vec::new();
29
30    let header = header::parse_header(&lines, &mut pos, &mut errors);
31
32    let mut nodes = Vec::new();
33    while pos < lines.len() {
34        match &lines[pos].kind {
35            LineKind::Blank | LineKind::Comment | LineKind::TestExpectHeader(_) => {
36                pos += 1;
37            }
38            LineKind::NodeDeclaration(_) => {
39                nodes.push(node::parse_node(&lines, &mut pos, &mut errors));
40            }
41            _ => {
42                errors.push(AgmError::new(
43                    ErrorCode::P003,
44                    format!("Unexpected content at line {}", lines[pos].number),
45                    ErrorLocation::new(None, Some(lines[pos].number), None),
46                ));
47                pos += 1;
48            }
49        }
50    }
51
52    if nodes.is_empty() {
53        errors.push(AgmError::new(
54            ErrorCode::P008,
55            "Empty file (no nodes)",
56            ErrorLocation::new(None, None, None),
57        ));
58    }
59
60    if errors.iter().any(|e| e.is_error()) {
61        Err(errors)
62    } else {
63        Ok(AgmFile { header, nodes })
64    }
65}
66
67// ---------------------------------------------------------------------------
68// Tests
69// ---------------------------------------------------------------------------
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74    use crate::error::ErrorCode;
75    use crate::model::fields::{FieldValue, NodeType, Priority};
76
77    // -----------------------------------------------------------------------
78    // Helper: minimal valid AGM input
79    // -----------------------------------------------------------------------
80
81    fn minimal_valid(node_id: &str, node_type: &str) -> String {
82        format!(
83            "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\nnode {node_id}\ntype: {node_type}\nsummary: A test node\n"
84        )
85    }
86
87    fn errors_contain(errors: &[AgmError], code: ErrorCode) -> bool {
88        errors.iter().any(|e| e.code == code)
89    }
90
91    // -----------------------------------------------------------------------
92    // A: Minimal valid files
93    // -----------------------------------------------------------------------
94
95    #[test]
96    fn test_parse_minimal_valid_file_returns_ok() {
97        let input = minimal_valid("test.node", "facts");
98        let result = parse(&input);
99        assert!(result.is_ok(), "expected Ok, got: {:?}", result);
100        let file = result.unwrap();
101        assert_eq!(file.nodes.len(), 1);
102        assert_eq!(file.nodes[0].id, "test.node");
103    }
104
105    #[test]
106    fn test_parse_minimal_header_and_empty_node_returns_ok() {
107        // Node without type/summary still parses (validator catches it).
108        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\nnode bare.node\n";
109        let result = parse(input);
110        // Parser accepts it; validator (Step 6) would flag missing type/summary.
111        assert!(result.is_ok(), "expected Ok, got: {:?}", result);
112        let file = result.unwrap();
113        assert_eq!(file.nodes.len(), 1);
114        assert_eq!(file.nodes[0].id, "bare.node");
115    }
116
117    #[test]
118    fn test_parse_multiple_nodes_returns_all() {
119        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
120            node a.one\ntype: facts\nsummary: one\n\n\
121            node a.two\ntype: rules\nsummary: two\n\n\
122            node a.three\ntype: workflow\nsummary: three\n";
123        let file = parse(input).unwrap();
124        assert_eq!(file.nodes.len(), 3);
125        assert_eq!(file.nodes[0].id, "a.one");
126        assert_eq!(file.nodes[1].id, "a.two");
127        assert_eq!(file.nodes[2].id, "a.three");
128    }
129
130    // -----------------------------------------------------------------------
131    // B: Header validation
132    // -----------------------------------------------------------------------
133
134    #[test]
135    fn test_parse_agm_valid_format_accepted() {
136        let input = minimal_valid("n.node", "facts");
137        let file = parse(&input).unwrap();
138        assert_eq!(file.header.agm, "1.0");
139    }
140
141    #[test]
142    fn test_parse_agm_invalid_format_returns_p001() {
143        let input = "agm: latest\npackage: test.pkg\nversion: 0.1.0\n\nnode n.node\ntype: facts\nsummary: s\n";
144        let errors = parse(input).unwrap_err();
145        assert!(errors_contain(&errors, ErrorCode::P001));
146    }
147
148    #[test]
149    fn test_parse_agm_three_part_version_returns_p001() {
150        let input = "agm: 1.0.0\npackage: test.pkg\nversion: 0.1.0\n\nnode n.node\ntype: facts\nsummary: s\n";
151        let errors = parse(input).unwrap_err();
152        assert!(errors_contain(&errors, ErrorCode::P001));
153    }
154
155    #[test]
156    fn test_parse_package_valid_dotted_accepted() {
157        let input = minimal_valid("n.node", "facts");
158        let file = parse(&input).unwrap();
159        assert_eq!(file.header.package, "test.pkg");
160    }
161
162    #[test]
163    fn test_parse_package_uppercase_returns_p001() {
164        let input =
165            "agm: 1.0\npackage: Test.pkg\nversion: 0.1.0\n\nnode n.node\ntype: facts\nsummary: s\n";
166        let errors = parse(input).unwrap_err();
167        assert!(errors_contain(&errors, ErrorCode::P001));
168    }
169
170    #[test]
171    fn test_parse_package_with_hyphen_returns_p001() {
172        let input =
173            "agm: 1.0\npackage: test-pkg\nversion: 0.1.0\n\nnode n.node\ntype: facts\nsummary: s\n";
174        let errors = parse(input).unwrap_err();
175        assert!(errors_contain(&errors, ErrorCode::P001));
176    }
177
178    #[test]
179    fn test_parse_version_valid_semver_accepted() {
180        let input = minimal_valid("n.node", "facts");
181        let file = parse(&input).unwrap();
182        assert_eq!(file.header.version, "0.1.0");
183    }
184
185    #[test]
186    fn test_parse_version_invalid_semver_returns_p001() {
187        let input = "agm: 1.0\npackage: test.pkg\nversion: not-a-version\n\nnode n.node\ntype: facts\nsummary: s\n";
188        let errors = parse(input).unwrap_err();
189        assert!(errors_contain(&errors, ErrorCode::P001));
190    }
191
192    // -----------------------------------------------------------------------
193    // C: Missing required header fields
194    // -----------------------------------------------------------------------
195
196    #[test]
197    fn test_parse_missing_agm_returns_p001() {
198        let input = "package: test.pkg\nversion: 0.1.0\n\nnode n.node\ntype: facts\nsummary: s\n";
199        let errors = parse(input).unwrap_err();
200        assert!(errors_contain(&errors, ErrorCode::P001));
201        assert!(
202            errors
203                .iter()
204                .any(|e| e.code == ErrorCode::P001 && e.message.contains("'agm'"))
205        );
206    }
207
208    #[test]
209    fn test_parse_missing_package_returns_p001() {
210        let input = "agm: 1.0\nversion: 0.1.0\n\nnode n.node\ntype: facts\nsummary: s\n";
211        let errors = parse(input).unwrap_err();
212        assert!(errors_contain(&errors, ErrorCode::P001));
213        assert!(
214            errors
215                .iter()
216                .any(|e| e.code == ErrorCode::P001 && e.message.contains("'package'"))
217        );
218    }
219
220    #[test]
221    fn test_parse_missing_version_returns_p001() {
222        let input = "agm: 1.0\npackage: test.pkg\n\nnode n.node\ntype: facts\nsummary: s\n";
223        let errors = parse(input).unwrap_err();
224        assert!(errors_contain(&errors, ErrorCode::P001));
225        assert!(
226            errors
227                .iter()
228                .any(|e| e.code == ErrorCode::P001 && e.message.contains("'version'"))
229        );
230    }
231
232    // -----------------------------------------------------------------------
233    // D: Imports
234    // -----------------------------------------------------------------------
235
236    #[test]
237    fn test_parse_imports_inline_with_constraints() {
238        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\
239            imports: [shared.security@^1.0.0, core.utils]\n\n\
240            node n.node\ntype: facts\nsummary: s\n";
241        let file = parse(input).unwrap();
242        let imports = file.header.imports.unwrap();
243        assert_eq!(imports.len(), 2);
244        assert_eq!(imports[0].package, "shared.security");
245        assert_eq!(imports[0].version_constraint.as_deref(), Some("^1.0.0"));
246        assert_eq!(imports[1].package, "core.utils");
247    }
248
249    #[test]
250    fn test_parse_imports_indented_list() {
251        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\
252            imports:\n  - shared.security@^1.0.0\n  - core.utils\n\n\
253            node n.node\ntype: facts\nsummary: s\n";
254        let file = parse(input).unwrap();
255        let imports = file.header.imports.unwrap();
256        assert_eq!(imports.len(), 2);
257        assert_eq!(imports[0].package, "shared.security");
258    }
259
260    #[test]
261    fn test_parse_imports_empty_list() {
262        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\
263            imports: []\n\n\
264            node n.node\ntype: facts\nsummary: s\n";
265        let file = parse(input).unwrap();
266        let imports = file.header.imports.unwrap();
267        assert_eq!(imports.len(), 0);
268    }
269
270    #[test]
271    fn test_parse_imports_invalid_entry_returns_error() {
272        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\
273            imports: [@bad]\n\n\
274            node n.node\ntype: facts\nsummary: s\n";
275        // Invalid entries produce P001 errors but parsing continues.
276        // The file may still parse successfully if no other hard errors occur.
277        let result = parse(input);
278        match result {
279            Ok(file) => {
280                let imports = file.header.imports.unwrap();
281                assert_eq!(imports.len(), 0); // bad entry was rejected
282            }
283            Err(errors) => {
284                assert!(errors_contain(&errors, ErrorCode::P001));
285            }
286        }
287    }
288
289    // -----------------------------------------------------------------------
290    // E: Scalar fields
291    // -----------------------------------------------------------------------
292
293    #[test]
294    fn test_parse_node_scalar_type_and_summary() {
295        let input = minimal_valid("auth.login", "workflow");
296        let file = parse(&input).unwrap();
297        assert_eq!(file.nodes[0].node_type, NodeType::Workflow);
298        assert_eq!(file.nodes[0].summary, "A test node");
299    }
300
301    #[test]
302    fn test_parse_node_scalar_priority_valid() {
303        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
304            node n.node\ntype: facts\nsummary: s\npriority: critical\n";
305        let file = parse(input).unwrap();
306        assert_eq!(file.nodes[0].priority, Some(Priority::Critical));
307    }
308
309    #[test]
310    fn test_parse_header_scalar_title_and_owner() {
311        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\
312            title: My Package\nowner: team@example.com\n\n\
313            node n.node\ntype: facts\nsummary: s\n";
314        let file = parse(input).unwrap();
315        assert_eq!(file.header.title.as_deref(), Some("My Package"));
316        assert_eq!(file.header.owner.as_deref(), Some("team@example.com"));
317    }
318
319    // -----------------------------------------------------------------------
320    // F: Inline lists
321    // -----------------------------------------------------------------------
322
323    #[test]
324    fn test_parse_node_inline_list_tags() {
325        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
326            node n.node\ntype: facts\nsummary: s\ntags: [auth, security]\n";
327        let file = parse(input).unwrap();
328        assert_eq!(
329            file.nodes[0].tags.as_deref(),
330            Some(vec!["auth".to_owned(), "security".to_owned()].as_slice())
331        );
332    }
333
334    #[test]
335    fn test_parse_node_inline_list_depends() {
336        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
337            node n.node\ntype: workflow\nsummary: s\ndepends: [a.one, a.two]\n";
338        let file = parse(input).unwrap();
339        assert_eq!(
340            file.nodes[0].depends.as_deref(),
341            Some(vec!["a.one".to_owned(), "a.two".to_owned()].as_slice())
342        );
343    }
344
345    // -----------------------------------------------------------------------
346    // G: Indented lists
347    // -----------------------------------------------------------------------
348
349    #[test]
350    fn test_parse_node_indented_list_items() {
351        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
352            node n.node\ntype: facts\nsummary: s\nitems:\n  - item one\n  - item two\n";
353        let file = parse(input).unwrap();
354        assert_eq!(
355            file.nodes[0].items.as_deref(),
356            Some(vec!["item one".to_owned(), "item two".to_owned()].as_slice())
357        );
358    }
359
360    #[test]
361    fn test_parse_node_indented_list_with_blanks_between() {
362        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
363            node n.node\ntype: facts\nsummary: s\nitems:\n  - item one\n\n  - item two\n";
364        let file = parse(input).unwrap();
365        assert_eq!(file.nodes[0].items.as_ref().unwrap().len(), 2);
366    }
367
368    // -----------------------------------------------------------------------
369    // H: Block fields
370    // -----------------------------------------------------------------------
371
372    #[test]
373    fn test_parse_node_block_detail() {
374        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
375            node n.node\ntype: facts\nsummary: s\ndetail:\n  This is the detail.\n";
376        let file = parse(input).unwrap();
377        assert_eq!(file.nodes[0].detail.as_deref(), Some("This is the detail."));
378    }
379
380    #[test]
381    fn test_parse_node_block_preserves_internal_blanks() {
382        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
383            node n.node\ntype: facts\nsummary: s\ndetail:\n  line one\n\n  line two\n";
384        let file = parse(input).unwrap();
385        let detail = file.nodes[0].detail.as_deref().unwrap();
386        assert!(detail.contains('\n'), "expected internal newline");
387        assert!(detail.contains("line one"));
388        assert!(detail.contains("line two"));
389    }
390
391    #[test]
392    fn test_parse_node_block_strips_base_indent() {
393        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
394            node n.node\ntype: facts\nsummary: s\ndetail:\n    four spaces\n    second line\n";
395        let file = parse(input).unwrap();
396        let detail = file.nodes[0].detail.as_deref().unwrap();
397        assert!(!detail.starts_with(' '), "leading spaces not stripped");
398        assert!(detail.starts_with("four"));
399    }
400
401    // -----------------------------------------------------------------------
402    // I: Node boundaries and IDs
403    // -----------------------------------------------------------------------
404
405    #[test]
406    fn test_parse_two_nodes_boundary_correct() {
407        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
408            node a.one\ntype: facts\nsummary: first\n\n\
409            node a.two\ntype: rules\nsummary: second\n";
410        let file = parse(input).unwrap();
411        assert_eq!(file.nodes.len(), 2);
412        assert_eq!(file.nodes[0].id, "a.one");
413        assert_eq!(file.nodes[1].id, "a.two");
414    }
415
416    #[test]
417    fn test_parse_node_id_dotted_valid() {
418        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
419            node billing.invoice.create\ntype: facts\nsummary: s\n";
420        let file = parse(input).unwrap();
421        assert_eq!(file.nodes[0].id, "billing.invoice.create");
422    }
423
424    #[test]
425    fn test_parse_node_id_uppercase_returns_p002() {
426        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
427            node Auth.Login\ntype: facts\nsummary: s\n";
428        let errors = parse(input).unwrap_err();
429        assert!(errors_contain(&errors, ErrorCode::P002));
430    }
431
432    #[test]
433    fn test_parse_node_id_empty_returns_p002() {
434        let input =
435            "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\nnode\ntype: facts\nsummary: s\n";
436        let errors = parse(input).unwrap_err();
437        assert!(errors_contain(&errors, ErrorCode::P002));
438    }
439
440    // -----------------------------------------------------------------------
441    // J: Duplicate fields
442    // -----------------------------------------------------------------------
443
444    #[test]
445    fn test_parse_node_duplicate_scalar_returns_p006() {
446        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
447            node n.node\ntype: facts\nsummary: first\nsummary: second\n";
448        let errors = parse(input).unwrap_err();
449        assert!(errors_contain(&errors, ErrorCode::P006));
450    }
451
452    #[test]
453    fn test_parse_node_duplicate_list_returns_p006() {
454        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
455            node n.node\ntype: facts\nsummary: s\ntags: [a]\ntags: [b]\n";
456        let errors = parse(input).unwrap_err();
457        assert!(errors_contain(&errors, ErrorCode::P006));
458    }
459
460    #[test]
461    fn test_parse_header_duplicate_field_returns_p006() {
462        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\ntitle: A\ntitle: B\n\n\
463            node n.node\ntype: facts\nsummary: s\n";
464        let errors = parse(input).unwrap_err();
465        assert!(errors_contain(&errors, ErrorCode::P006));
466    }
467
468    // -----------------------------------------------------------------------
469    // K: Unknown fields
470    // -----------------------------------------------------------------------
471
472    #[test]
473    fn test_parse_node_unknown_scalar_stored_in_extra() {
474        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
475            node n.node\ntype: facts\nsummary: s\ncustom_field: some value\n";
476        let file = parse(input).unwrap();
477        assert_eq!(
478            file.nodes[0].extra_fields.get("custom_field"),
479            Some(&FieldValue::Scalar("some value".to_owned()))
480        );
481    }
482
483    #[test]
484    fn test_parse_node_unknown_list_stored_in_extra() {
485        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
486            node n.node\ntype: facts\nsummary: s\ncustom_list: [x, y]\n";
487        let file = parse(input).unwrap();
488        assert_eq!(
489            file.nodes[0].extra_fields.get("custom_list"),
490            Some(&FieldValue::List(vec!["x".to_owned(), "y".to_owned()]))
491        );
492    }
493
494    // -----------------------------------------------------------------------
495    // L: Comments and blanks
496    // -----------------------------------------------------------------------
497
498    #[test]
499    fn test_parse_comments_between_fields_skipped() {
500        let input = "agm: 1.0\n# a comment\npackage: test.pkg\n# another\nversion: 0.1.0\n\n\
501            node n.node\n# comment inside node\ntype: facts\nsummary: s\n";
502        let file = parse(input).unwrap();
503        assert_eq!(file.nodes[0].summary, "s");
504    }
505
506    #[test]
507    fn test_parse_blanks_between_nodes_skipped() {
508        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\n\n\
509            node a.one\ntype: facts\nsummary: first\n\n\n\
510            node a.two\ntype: rules\nsummary: second\n";
511        let file = parse(input).unwrap();
512        assert_eq!(file.nodes.len(), 2);
513    }
514
515    // -----------------------------------------------------------------------
516    // M: Spans
517    // -----------------------------------------------------------------------
518
519    #[test]
520    fn test_parse_node_span_correct_single_node() {
521        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
522            node n.node\ntype: facts\nsummary: s\n";
523        let file = parse(input).unwrap();
524        let span = &file.nodes[0].span;
525        assert!(span.start_line > 0);
526        assert!(span.end_line >= span.start_line);
527    }
528
529    #[test]
530    fn test_parse_node_span_correct_multiple_nodes() {
531        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
532            node a.one\ntype: facts\nsummary: first\n\n\
533            node a.two\ntype: rules\nsummary: second\n";
534        let file = parse(input).unwrap();
535        let span0 = &file.nodes[0].span;
536        let span1 = &file.nodes[1].span;
537        assert!(span0.start_line < span1.start_line);
538        assert!(span0.end_line < span1.start_line);
539    }
540
541    // -----------------------------------------------------------------------
542    // N: Edge cases
543    // -----------------------------------------------------------------------
544
545    #[test]
546    fn test_parse_no_nodes_returns_p008() {
547        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n";
548        let errors = parse(input).unwrap_err();
549        assert!(errors_contain(&errors, ErrorCode::P008));
550    }
551
552    #[test]
553    fn test_parse_structured_field_parsed_into_typed_field() {
554        // After Step 6, `verify:` is parsed into node.verify (not extra_fields).
555        // An entry with an unknown type is skipped with an error, but parsing succeeds
556        // as long as there are no hard parse errors.
557        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
558            node n.node\ntype: facts\nsummary: s\nverify:\n  - type: command\n    run: cargo check\n";
559        let file = parse(input).unwrap();
560        // `verify` is structured — parsed into node.verify, not extra_fields.
561        assert!(!file.nodes[0].extra_fields.contains_key("verify"));
562        assert!(file.nodes[0].verify.is_some());
563    }
564
565    #[test]
566    fn test_parse_body_marker_assigns_to_detail() {
567        let input = "agm: 1.0\npackage: test.pkg\nversion: 0.1.0\n\n\
568            node n.node\ntype: facts\nsummary: s\nbody: |\n  This is body text.\n";
569        let file = parse(input).unwrap();
570        assert_eq!(file.nodes[0].detail.as_deref(), Some("This is body text."));
571    }
572}