Skip to main content

agm_core/parser/
state.rs

1//! Parser for `.agm.state` sidecar files.
2
3use std::collections::BTreeMap;
4use std::str::FromStr;
5
6use crate::error::{AgmError, ErrorCode, ErrorLocation};
7use crate::model::execution::ExecutionStatus;
8use crate::model::state::{NodeState, StateFile};
9use crate::parser::ParseResult;
10use crate::parser::sidecar::{SidecarLineKind, lex_sidecar};
11
12// ---------------------------------------------------------------------------
13// parse_state
14// ---------------------------------------------------------------------------
15
16/// Parses raw `.agm.state` text into a [`StateFile`].
17///
18/// Returns `Err(Vec<AgmError>)` if any Error-severity diagnostics are
19/// produced (missing required headers, bad enum values, duplicate node IDs).
20/// Warnings (unknown fields) are returned inside the `Ok` payload via the
21/// diagnostic approach — since `ParseResult<StateFile>` only carries errors
22/// we collect warnings as `AgmError` with `Severity::Warning` but still
23/// return `Ok` as long as no errors are present.
24///
25/// To keep the API consistent with the rest of the codebase, warnings are
26/// currently accumulated but not returned (they would be returned via a
27/// `DiagnosticCollection` in a higher-level API). Fatal errors always cause
28/// `Err`.
29pub fn parse_state(input: &str) -> ParseResult<StateFile> {
30    let lines = lex_sidecar(input)?;
31    let mut pos = 0;
32    let mut errors: Vec<AgmError> = Vec::new();
33
34    // ------------------------------------------------------------------
35    // 1. Consume header lines (# key: value)
36    // ------------------------------------------------------------------
37    let mut format_version: Option<String> = None;
38    let mut package: Option<String> = None;
39    let mut version: Option<String> = None;
40    let mut session_id: Option<String> = None;
41    let mut started_at: Option<String> = None;
42    let mut updated_at: Option<String> = None;
43
44    while pos < lines.len() {
45        match &lines[pos].kind {
46            SidecarLineKind::Blank | SidecarLineKind::Comment(_) => {
47                pos += 1;
48            }
49            SidecarLineKind::Header(key, value) => {
50                match key.as_str() {
51                    "agm.state" => format_version = Some(value.clone()),
52                    "package" => package = Some(value.clone()),
53                    "version" => version = Some(value.clone()),
54                    "session_id" => session_id = Some(value.clone()),
55                    "started_at" => started_at = Some(value.clone()),
56                    "updated_at" => updated_at = Some(value.clone()),
57                    _ => {
58                        // unknown header — emit warning but continue
59                        errors.push(AgmError::new(
60                            ErrorCode::P009,
61                            format!("Unknown header field '{}' in state file", key),
62                            ErrorLocation::new(None, Some(lines[pos].number), None),
63                        ));
64                    }
65                }
66                pos += 1;
67            }
68            // Once we hit a non-header line, headers are done
69            _ => break,
70        }
71    }
72
73    // Validate required headers
74    for (field, present) in [
75        ("agm.state", format_version.is_some()),
76        ("package", package.is_some()),
77        ("version", version.is_some()),
78        ("session_id", session_id.is_some()),
79        ("started_at", started_at.is_some()),
80        ("updated_at", updated_at.is_some()),
81    ] {
82        if !present {
83            errors.push(AgmError::new(
84                ErrorCode::P001,
85                format!("Missing required header field '{field}' in state file"),
86                ErrorLocation::new(None, Some(1), None),
87            ));
88        }
89    }
90
91    // ------------------------------------------------------------------
92    // 2. Parse node blocks
93    // ------------------------------------------------------------------
94    let mut nodes: BTreeMap<String, NodeState> = BTreeMap::new();
95
96    while pos < lines.len() {
97        match &lines[pos].kind {
98            SidecarLineKind::Blank | SidecarLineKind::Comment(_) => {
99                pos += 1;
100            }
101            SidecarLineKind::BlockDecl(keyword, node_id) if keyword == "state" => {
102                let node_id = node_id.clone();
103                let line_num = lines[pos].number;
104                pos += 1;
105
106                // Collect fields for this block
107                let mut exec_status: Option<ExecutionStatus> = None;
108                let mut executed_by: Option<String> = None;
109                let mut executed_at: Option<String> = None;
110                let mut execution_log: Option<String> = None;
111                let mut retry_count: u32 = 0;
112
113                while pos < lines.len() {
114                    match &lines[pos].kind {
115                        SidecarLineKind::Blank => break,
116                        SidecarLineKind::Comment(_) => {
117                            pos += 1;
118                        }
119                        SidecarLineKind::BlockDecl(_, _) => break,
120                        SidecarLineKind::Field(key, value) => {
121                            let field_line = lines[pos].number;
122                            match key.as_str() {
123                                "execution_status" => match ExecutionStatus::from_str(value) {
124                                    Ok(s) => exec_status = Some(s),
125                                    Err(_) => {
126                                        errors.push(AgmError::new(
127                                            ErrorCode::P003,
128                                            format!(
129                                                "Invalid execution_status value '{}' in node '{}'",
130                                                value, node_id
131                                            ),
132                                            ErrorLocation::new(None, Some(field_line), None),
133                                        ));
134                                    }
135                                },
136                                "executed_by" => {
137                                    executed_by = if value.is_empty() {
138                                        None
139                                    } else {
140                                        Some(value.clone())
141                                    };
142                                }
143                                "executed_at" => {
144                                    executed_at = if value.is_empty() {
145                                        None
146                                    } else {
147                                        Some(value.clone())
148                                    };
149                                }
150                                "execution_log" => {
151                                    execution_log = if value.is_empty() {
152                                        None
153                                    } else {
154                                        Some(value.clone())
155                                    };
156                                }
157                                "retry_count" => match value.parse::<u32>() {
158                                    Ok(n) => retry_count = n,
159                                    Err(_) => {
160                                        errors.push(AgmError::new(
161                                            ErrorCode::P003,
162                                            format!(
163                                                "Invalid retry_count value '{}' in node '{}'",
164                                                value, node_id
165                                            ),
166                                            ErrorLocation::new(None, Some(field_line), None),
167                                        ));
168                                    }
169                                },
170                                unknown => {
171                                    errors.push(AgmError::new(
172                                        ErrorCode::P009,
173                                        format!(
174                                            "Unknown field '{}' in state block '{}'",
175                                            unknown, node_id
176                                        ),
177                                        ErrorLocation::new(None, Some(field_line), None),
178                                    ));
179                                }
180                            }
181                            pos += 1;
182                        }
183                        SidecarLineKind::Header(_, _) | SidecarLineKind::Continuation(_) => {
184                            pos += 1;
185                        }
186                    }
187                }
188
189                // execution_status is required
190                let final_status = match exec_status {
191                    Some(s) => s,
192                    None => {
193                        errors.push(AgmError::new(
194                            ErrorCode::P001,
195                            format!("Missing required field 'execution_status' in state block '{node_id}'"),
196                            ErrorLocation::new(None, Some(line_num), None),
197                        ));
198                        ExecutionStatus::Pending // placeholder
199                    }
200                };
201
202                // Duplicate node ID check
203                use std::collections::btree_map::Entry;
204                match nodes.entry(node_id.clone()) {
205                    Entry::Occupied(_) => {
206                        errors.push(AgmError::new(
207                            ErrorCode::P006,
208                            format!("Duplicate node ID '{node_id}' in state file"),
209                            ErrorLocation::new(None, Some(line_num), None),
210                        ));
211                    }
212                    Entry::Vacant(slot) => {
213                        slot.insert(NodeState {
214                            execution_status: final_status,
215                            executed_by,
216                            executed_at,
217                            execution_log,
218                            retry_count,
219                        });
220                    }
221                }
222            }
223            SidecarLineKind::BlockDecl(keyword, _) => {
224                // BlockDecl with unexpected keyword
225                errors.push(AgmError::new(
226                    ErrorCode::P003,
227                    format!(
228                        "Unexpected block keyword '{}' in state file (expected 'state')",
229                        keyword
230                    ),
231                    ErrorLocation::new(None, Some(lines[pos].number), None),
232                ));
233                pos += 1;
234            }
235            SidecarLineKind::Field(key, _) => {
236                // Field outside a block
237                errors.push(AgmError::new(
238                    ErrorCode::P003,
239                    format!("Field '{}' outside of a 'state' block", key),
240                    ErrorLocation::new(None, Some(lines[pos].number), None),
241                ));
242                pos += 1;
243            }
244            SidecarLineKind::Continuation(_) => {
245                pos += 1;
246            }
247            SidecarLineKind::Header(_, _) => {
248                pos += 1;
249            }
250        }
251    }
252
253    // ------------------------------------------------------------------
254    // 3. Return
255    // ------------------------------------------------------------------
256    if errors.iter().any(|e| e.is_error()) {
257        Err(errors)
258    } else {
259        Ok(StateFile {
260            format_version: format_version.unwrap_or_default(),
261            package: package.unwrap_or_default(),
262            version: version.unwrap_or_default(),
263            session_id: session_id.unwrap_or_default(),
264            started_at: started_at.unwrap_or_default(),
265            updated_at: updated_at.unwrap_or_default(),
266            nodes,
267        })
268    }
269}
270
271// ---------------------------------------------------------------------------
272// Tests
273// ---------------------------------------------------------------------------
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278    use crate::error::ErrorCode;
279
280    fn minimal_state() -> &'static str {
281        "# agm.state: 1.0\n\
282         # package: test.pkg\n\
283         # version: 0.1.0\n\
284         # session_id: run-001\n\
285         # started_at: 2026-04-08T10:00:00Z\n\
286         # updated_at: 2026-04-08T10:00:00Z\n"
287    }
288
289    fn full_state() -> &'static str {
290        "# agm.state: 1.0\n\
291         # package: acme.migration\n\
292         # version: 1.0.0\n\
293         # session_id: run-2026-04-08-153200\n\
294         # started_at: 2026-04-08T15:32:00Z\n\
295         # updated_at: 2026-04-08T15:35:00Z\n\
296         \n\
297         state migration.025.data\n\
298         execution_status: ready\n\
299         retry_count: 0\n\
300         \n\
301         state migration.025.schema\n\
302         execution_status: completed\n\
303         executed_by: shell-agent\n\
304         executed_at: 2026-04-08T15:30:00Z\n\
305         retry_count: 0\n\
306         execution_log: .agm/logs/migration.025.schema.log\n"
307    }
308
309    fn errors_contain(errors: &[AgmError], code: ErrorCode) -> bool {
310        errors.iter().any(|e| e.code == code)
311    }
312
313    // -----------------------------------------------------------------------
314    // A: Valid minimal — no nodes
315    // -----------------------------------------------------------------------
316
317    #[test]
318    fn test_parse_state_minimal_valid_returns_ok() {
319        let result = parse_state(minimal_state());
320        assert!(result.is_ok(), "expected Ok, got: {:?}", result);
321        let sf = result.unwrap();
322        assert_eq!(sf.format_version, "1.0");
323        assert_eq!(sf.package, "test.pkg");
324        assert_eq!(sf.version, "0.1.0");
325        assert_eq!(sf.session_id, "run-001");
326        assert!(sf.nodes.is_empty());
327    }
328
329    // -----------------------------------------------------------------------
330    // B: Valid full — with nodes
331    // -----------------------------------------------------------------------
332
333    #[test]
334    fn test_parse_state_full_valid_returns_nodes() {
335        let sf = parse_state(full_state()).unwrap();
336        assert_eq!(sf.nodes.len(), 2);
337        assert_eq!(
338            sf.nodes["migration.025.data"].execution_status,
339            ExecutionStatus::Ready
340        );
341        assert_eq!(
342            sf.nodes["migration.025.schema"].execution_status,
343            ExecutionStatus::Completed
344        );
345        assert_eq!(
346            sf.nodes["migration.025.schema"].executed_by.as_deref(),
347            Some("shell-agent")
348        );
349        assert_eq!(
350            sf.nodes["migration.025.schema"].execution_log.as_deref(),
351            Some(".agm/logs/migration.025.schema.log")
352        );
353    }
354
355    // -----------------------------------------------------------------------
356    // C: Missing required header fields
357    // -----------------------------------------------------------------------
358
359    #[test]
360    fn test_parse_state_missing_agm_state_header_returns_p001() {
361        let input = "# package: test.pkg\n\
362                     # version: 0.1.0\n\
363                     # session_id: run-001\n\
364                     # started_at: 2026-04-08T10:00:00Z\n\
365                     # updated_at: 2026-04-08T10:00:00Z\n";
366        let errors = parse_state(input).unwrap_err();
367        assert!(errors_contain(&errors, ErrorCode::P001));
368        assert!(
369            errors
370                .iter()
371                .any(|e| e.code == ErrorCode::P001 && e.message.contains("agm.state"))
372        );
373    }
374
375    #[test]
376    fn test_parse_state_missing_package_header_returns_p001() {
377        let input = "# agm.state: 1.0\n\
378                     # version: 0.1.0\n\
379                     # session_id: run-001\n\
380                     # started_at: 2026-04-08T10:00:00Z\n\
381                     # updated_at: 2026-04-08T10:00:00Z\n";
382        let errors = parse_state(input).unwrap_err();
383        assert!(
384            errors
385                .iter()
386                .any(|e| e.code == ErrorCode::P001 && e.message.contains("package"))
387        );
388    }
389
390    #[test]
391    fn test_parse_state_missing_session_id_returns_p001() {
392        let input = "# agm.state: 1.0\n\
393                     # package: test.pkg\n\
394                     # version: 0.1.0\n\
395                     # started_at: 2026-04-08T10:00:00Z\n\
396                     # updated_at: 2026-04-08T10:00:00Z\n";
397        let errors = parse_state(input).unwrap_err();
398        assert!(
399            errors
400                .iter()
401                .any(|e| e.code == ErrorCode::P001 && e.message.contains("session_id"))
402        );
403    }
404
405    // -----------------------------------------------------------------------
406    // D: Duplicate node ID
407    // -----------------------------------------------------------------------
408
409    #[test]
410    fn test_parse_state_duplicate_node_id_returns_p006() {
411        let input = format!(
412            "{}\n\
413             state dup.node\n\
414             execution_status: pending\n\
415             retry_count: 0\n\
416             \n\
417             state dup.node\n\
418             execution_status: ready\n\
419             retry_count: 0\n",
420            minimal_state()
421        );
422        let errors = parse_state(&input).unwrap_err();
423        assert!(errors_contain(&errors, ErrorCode::P006));
424    }
425
426    // -----------------------------------------------------------------------
427    // E: Invalid execution_status
428    // -----------------------------------------------------------------------
429
430    #[test]
431    fn test_parse_state_bad_status_returns_p003() {
432        let input = format!(
433            "{}\n\
434             state bad.node\n\
435             execution_status: running\n\
436             retry_count: 0\n",
437            minimal_state()
438        );
439        let errors = parse_state(&input).unwrap_err();
440        assert!(errors_contain(&errors, ErrorCode::P003));
441        assert!(
442            errors
443                .iter()
444                .any(|e| e.code == ErrorCode::P003 && e.message.contains("running"))
445        );
446    }
447
448    // -----------------------------------------------------------------------
449    // F: Unknown field produces P009 warning
450    // -----------------------------------------------------------------------
451
452    #[test]
453    fn test_parse_state_unknown_field_returns_p009_warning() {
454        let input = format!(
455            "{}\n\
456             state known.node\n\
457             execution_status: pending\n\
458             retry_count: 0\n\
459             mystery_field: some value\n",
460            minimal_state()
461        );
462        // Warnings don't cause Err — file should parse successfully
463        let result = parse_state(&input);
464        // P009 is a warning — should not block Ok
465        assert!(
466            result.is_ok(),
467            "expected Ok with warnings, got: {:?}",
468            result
469        );
470    }
471
472    // -----------------------------------------------------------------------
473    // G: Invalid retry_count
474    // -----------------------------------------------------------------------
475
476    #[test]
477    fn test_parse_state_bad_retry_count_returns_p003() {
478        let input = format!(
479            "{}\n\
480             state node.one\n\
481             execution_status: pending\n\
482             retry_count: not-a-number\n",
483            minimal_state()
484        );
485        let errors = parse_state(&input).unwrap_err();
486        assert!(errors_contain(&errors, ErrorCode::P003));
487    }
488
489    // -----------------------------------------------------------------------
490    // H: All execution statuses parsed correctly
491    // -----------------------------------------------------------------------
492
493    #[test]
494    fn test_parse_state_all_statuses_parsed_correctly() {
495        let statuses = [
496            ("pending", ExecutionStatus::Pending),
497            ("ready", ExecutionStatus::Ready),
498            ("in_progress", ExecutionStatus::InProgress),
499            ("completed", ExecutionStatus::Completed),
500            ("failed", ExecutionStatus::Failed),
501            ("blocked", ExecutionStatus::Blocked),
502            ("skipped", ExecutionStatus::Skipped),
503        ];
504
505        for (status_str, expected) in &statuses {
506            let input = format!(
507                "{}\n\
508                 state test.node\n\
509                 execution_status: {}\n\
510                 retry_count: 0\n",
511                minimal_state(),
512                status_str
513            );
514            let sf =
515                parse_state(&input).unwrap_or_else(|_| panic!("failed for status {status_str}"));
516            assert_eq!(sf.nodes["test.node"].execution_status, *expected);
517        }
518    }
519
520    // -----------------------------------------------------------------------
521    // I: Comments are ignored
522    // -----------------------------------------------------------------------
523
524    #[test]
525    fn test_parse_state_comments_are_ignored() {
526        let input = "# agm.state: 1.0\n\
527                     # package: test.pkg\n\
528                     # version: 0.1.0\n\
529                     # session_id: run-001\n\
530                     # started_at: 2026-04-08T10:00:00Z\n\
531                     # updated_at: 2026-04-08T10:00:00Z\n\
532                     # This is a comment\n\
533                     \n\
534                     state n.one\n\
535                     # comment inside block\n\
536                     execution_status: pending\n\
537                     retry_count: 0\n";
538        let sf = parse_state(input).unwrap();
539        assert_eq!(sf.nodes.len(), 1);
540    }
541
542    // -----------------------------------------------------------------------
543    // J: Blank lines between blocks
544    // -----------------------------------------------------------------------
545
546    #[test]
547    fn test_parse_state_blank_lines_between_blocks_ok() {
548        let input = format!(
549            "{}\n\
550             state n.one\n\
551             execution_status: pending\n\
552             retry_count: 0\n\
553             \n\
554             \n\
555             state n.two\n\
556             execution_status: ready\n\
557             retry_count: 0\n",
558            minimal_state()
559        );
560        let sf = parse_state(&input).unwrap();
561        assert_eq!(sf.nodes.len(), 2);
562    }
563
564    // -----------------------------------------------------------------------
565    // K: Header values preserved exactly
566    // -----------------------------------------------------------------------
567
568    #[test]
569    fn test_parse_state_header_timestamps_preserved() {
570        let sf = parse_state(full_state()).unwrap();
571        assert_eq!(sf.started_at, "2026-04-08T15:32:00Z");
572        assert_eq!(sf.updated_at, "2026-04-08T15:35:00Z");
573    }
574
575    // -----------------------------------------------------------------------
576    // L: Optional fields absent when not specified
577    // -----------------------------------------------------------------------
578
579    #[test]
580    fn test_parse_state_optional_fields_absent_when_not_specified() {
581        let input = format!(
582            "{}\n\
583             state n.minimal\n\
584             execution_status: pending\n\
585             retry_count: 0\n",
586            minimal_state()
587        );
588        let sf = parse_state(&input).unwrap();
589        let node = &sf.nodes["n.minimal"];
590        assert!(node.executed_by.is_none());
591        assert!(node.executed_at.is_none());
592        assert!(node.execution_log.is_none());
593    }
594
595    // -----------------------------------------------------------------------
596    // M: Empty input is error (no headers)
597    // -----------------------------------------------------------------------
598
599    #[test]
600    fn test_parse_state_empty_input_returns_error() {
601        let result = parse_state("");
602        assert!(result.is_err());
603    }
604
605    // -----------------------------------------------------------------------
606    // N: retry_count defaults to 0 when absent
607    // -----------------------------------------------------------------------
608
609    #[test]
610    fn test_parse_state_retry_count_defaults_to_zero() {
611        let input = format!(
612            "{}\n\
613             state n.one\n\
614             execution_status: pending\n",
615            minimal_state()
616        );
617        let sf = parse_state(&input).unwrap();
618        assert_eq!(sf.nodes["n.one"].retry_count, 0);
619    }
620
621    // -----------------------------------------------------------------------
622    // O: Multiple missing headers all reported
623    // -----------------------------------------------------------------------
624
625    #[test]
626    fn test_parse_state_multiple_missing_headers_all_reported() {
627        // Only agm.state header — missing 5 others
628        let input = "# agm.state: 1.0\n";
629        let errors = parse_state(input).unwrap_err();
630        let p001_count = errors.iter().filter(|e| e.code == ErrorCode::P001).count();
631        assert!(
632            p001_count >= 5,
633            "expected at least 5 P001 errors, got {p001_count}"
634        );
635    }
636}