Skip to main content

agm_core/parser/
fields.rs

1//! Shared field-parsing helpers used by both header and node parsers.
2
3use std::collections::HashSet;
4
5use crate::error::{AgmError, ErrorCode, ErrorLocation};
6use crate::model::imports::ImportEntry;
7
8use super::lexer::{Line, LineKind};
9
10// ---------------------------------------------------------------------------
11// FieldTracker
12// ---------------------------------------------------------------------------
13
14/// Tracks seen field names for duplicate detection (P006).
15pub(crate) struct FieldTracker {
16    seen: HashSet<String>,
17}
18
19impl FieldTracker {
20    pub(crate) fn new() -> Self {
21        Self {
22            seen: HashSet::new(),
23        }
24    }
25
26    /// Returns `true` if the field was already seen (duplicate).
27    pub(crate) fn track(&mut self, field_name: &str) -> bool {
28        !self.seen.insert(field_name.to_owned())
29    }
30}
31
32// ---------------------------------------------------------------------------
33// Structured field detection
34// ---------------------------------------------------------------------------
35
36/// Field names that are parsed by Step 6 (structured fields).
37pub(crate) const STRUCTURED_FIELD_NAMES: &[&str] = &[
38    "code",
39    "code_blocks",
40    "verify",
41    "agent_context",
42    "parallel_groups",
43    "memory",
44    "load_profiles",
45];
46
47pub(crate) fn is_structured_field(name: &str) -> bool {
48    STRUCTURED_FIELD_NAMES.contains(&name)
49}
50
51// ---------------------------------------------------------------------------
52// parse_indented_list
53// ---------------------------------------------------------------------------
54
55/// Consumes consecutive `ListItem` lines from `lines` starting at `*pos`.
56///
57/// Blank lines between items are skipped if followed by more `ListItem`s.
58/// Returns the collected item strings.
59pub(crate) fn parse_indented_list(lines: &[Line], pos: &mut usize) -> Vec<String> {
60    let mut items = Vec::new();
61    while *pos < lines.len() {
62        match &lines[*pos].kind {
63            LineKind::ListItem(value) => {
64                items.push(value.clone());
65                *pos += 1;
66            }
67            // Comments inside indented lists are skipped (spec S16.7).
68            LineKind::Comment | LineKind::TestExpectHeader(_) => {
69                *pos += 1;
70            }
71            LineKind::Blank => {
72                // Peek ahead: only skip blank if a ListItem or Comment follows.
73                let mut lookahead = *pos + 1;
74                while lookahead < lines.len() {
75                    match &lines[lookahead].kind {
76                        LineKind::Blank => lookahead += 1,
77                        LineKind::Comment | LineKind::TestExpectHeader(_) => {
78                            lookahead += 1;
79                        }
80                        LineKind::ListItem(_) => break,
81                        _ => {
82                            // No more list items — stop consuming.
83                            return items;
84                        }
85                    }
86                }
87                if lookahead < lines.len() {
88                    if let LineKind::ListItem(_) = &lines[lookahead].kind {
89                        *pos += 1; // skip the blank
90                        continue;
91                    }
92                }
93                break;
94            }
95            _ => break,
96        }
97    }
98    items
99}
100
101// ---------------------------------------------------------------------------
102// parse_block
103// ---------------------------------------------------------------------------
104
105/// Consumes consecutive `IndentedLine` and `ListItem` lines (and blank lines
106/// within) that form a block body.
107///
108/// Strips `base_indent` leading spaces from each raw line.
109/// Trailing empty lines are removed before returning.
110pub(crate) fn parse_block(lines: &[Line], pos: &mut usize) -> String {
111    // Determine base indent from the first non-blank indented line.
112    let base_indent = {
113        let mut base = 0usize;
114        let mut i = *pos;
115        while i < lines.len() {
116            match &lines[i].kind {
117                LineKind::IndentedLine(_) | LineKind::ListItem(_) => {
118                    base = lines[i].indent;
119                    break;
120                }
121                LineKind::Blank => {
122                    i += 1;
123                }
124                _ => break,
125            }
126        }
127        base
128    };
129
130    let mut parts: Vec<String> = Vec::new();
131
132    while *pos < lines.len() {
133        match &lines[*pos].kind {
134            LineKind::IndentedLine(_) | LineKind::ListItem(_) => {
135                // Strip base_indent leading spaces from the raw line.
136                let raw = &lines[*pos].raw;
137                let stripped = if raw.len() >= base_indent {
138                    raw[base_indent..].to_owned()
139                } else {
140                    raw.trim_start().to_owned()
141                };
142                parts.push(stripped);
143                *pos += 1;
144            }
145            LineKind::Blank => {
146                // Peek ahead to see if more block content follows.
147                let mut lookahead = *pos + 1;
148                while lookahead < lines.len() {
149                    match &lines[lookahead].kind {
150                        LineKind::Blank => lookahead += 1,
151                        LineKind::IndentedLine(_) | LineKind::ListItem(_) => break,
152                        _ => {
153                            return finish_block(parts);
154                        }
155                    }
156                }
157                if lookahead < lines.len() {
158                    match &lines[lookahead].kind {
159                        LineKind::IndentedLine(_) | LineKind::ListItem(_) => {
160                            parts.push(String::new()); // represent blank line
161                            *pos += 1;
162                            continue;
163                        }
164                        _ => {}
165                    }
166                }
167                break;
168            }
169            _ => break,
170        }
171    }
172
173    finish_block(parts)
174}
175
176/// Joins block lines with `\n`, trims trailing empty lines.
177fn finish_block(mut parts: Vec<String>) -> String {
178    // Trim trailing empty strings.
179    while parts.last().is_some_and(|s| s.is_empty()) {
180        parts.pop();
181    }
182    parts.join("\n")
183}
184
185// ---------------------------------------------------------------------------
186// parse_imports
187// ---------------------------------------------------------------------------
188
189/// Parses a list of raw import strings into `ImportEntry` values.
190///
191/// Invalid entries emit a P001 error but do not stop parsing.
192pub(crate) fn parse_imports(
193    items: &[String],
194    line_number: usize,
195    errors: &mut Vec<AgmError>,
196) -> Vec<crate::model::imports::ImportEntry> {
197    let mut result = Vec::new();
198    for item in items {
199        match item.parse::<ImportEntry>() {
200            Ok(entry) => result.push(entry),
201            Err(_) => {
202                errors.push(AgmError::new(
203                    ErrorCode::P001,
204                    format!("Invalid import entry: {item:?}"),
205                    ErrorLocation::new(None, Some(line_number), None),
206                ));
207            }
208        }
209    }
210    result
211}
212
213// ---------------------------------------------------------------------------
214// collect_structured_raw
215// ---------------------------------------------------------------------------
216
217/// Consumes all lines with indent > 0 (including blank lines within the block).
218///
219/// Used as a stub for structured fields that will be parsed in Step 6.
220/// Returns the raw text joined with `\n`.
221pub(crate) fn collect_structured_raw(lines: &[Line], pos: &mut usize) -> String {
222    let mut parts: Vec<String> = Vec::new();
223
224    while *pos < lines.len() {
225        match &lines[*pos].kind {
226            LineKind::ScalarField(_, _)
227            | LineKind::InlineListField(_, _)
228            | LineKind::FieldStart(_)
229            | LineKind::ListItem(_)
230            | LineKind::IndentedLine(_)
231            | LineKind::BodyMarker => {
232                if lines[*pos].indent > 0
233                    || matches!(
234                        &lines[*pos].kind,
235                        LineKind::ListItem(_) | LineKind::IndentedLine(_)
236                    )
237                {
238                    parts.push(lines[*pos].raw.clone());
239                    *pos += 1;
240                } else {
241                    break;
242                }
243            }
244            LineKind::Blank => {
245                // Peek ahead to see if indented content follows.
246                let mut lookahead = *pos + 1;
247                while lookahead < lines.len() {
248                    if matches!(&lines[lookahead].kind, LineKind::Blank) {
249                        lookahead += 1;
250                    } else {
251                        break;
252                    }
253                }
254                let has_more = lookahead < lines.len()
255                    && matches!(
256                        &lines[lookahead].kind,
257                        LineKind::ScalarField(_, _)
258                            | LineKind::InlineListField(_, _)
259                            | LineKind::FieldStart(_)
260                            | LineKind::ListItem(_)
261                            | LineKind::IndentedLine(_)
262                    )
263                    && lines[lookahead].indent > 0;
264
265                if has_more {
266                    parts.push(lines[*pos].raw.clone());
267                    *pos += 1;
268                } else {
269                    break;
270                }
271            }
272            _ => break,
273        }
274    }
275
276    parts.join("\n")
277}
278
279// ---------------------------------------------------------------------------
280// skip_field_body
281// ---------------------------------------------------------------------------
282
283/// Like `collect_structured_raw` but discards the content.
284///
285/// Used for duplicate field bodies.
286pub(crate) fn skip_field_body(lines: &[Line], pos: &mut usize) {
287    collect_structured_raw(lines, pos);
288}
289
290// ---------------------------------------------------------------------------
291// Tests
292// ---------------------------------------------------------------------------
293
294#[cfg(test)]
295mod tests {
296    use super::*;
297    use crate::parser::lexer::lex;
298
299    #[test]
300    fn test_field_tracker_new_not_duplicate() {
301        let mut tracker = FieldTracker::new();
302        assert!(!tracker.track("summary"));
303    }
304
305    #[test]
306    fn test_field_tracker_second_call_is_duplicate() {
307        let mut tracker = FieldTracker::new();
308        tracker.track("summary");
309        assert!(tracker.track("summary"));
310    }
311
312    #[test]
313    fn test_field_tracker_different_fields_not_duplicate() {
314        let mut tracker = FieldTracker::new();
315        tracker.track("summary");
316        assert!(!tracker.track("detail"));
317    }
318
319    #[test]
320    fn test_is_structured_field_known_returns_true() {
321        assert!(is_structured_field("code"));
322        assert!(is_structured_field("verify"));
323        assert!(is_structured_field("memory"));
324    }
325
326    #[test]
327    fn test_is_structured_field_unknown_returns_false() {
328        assert!(!is_structured_field("summary"));
329        assert!(!is_structured_field("detail"));
330    }
331
332    #[test]
333    fn test_parse_indented_list_basic() {
334        let input = "  - item1\n  - item2\n  - item3\n";
335        let lines = lex(input).unwrap();
336        let mut pos = 0;
337        let items = parse_indented_list(&lines, &mut pos);
338        assert_eq!(items, vec!["item1", "item2", "item3"]);
339        assert_eq!(pos, 3);
340    }
341
342    #[test]
343    fn test_parse_indented_list_stops_at_non_list() {
344        let input = "  - item1\nsummary: foo\n";
345        let lines = lex(input).unwrap();
346        let mut pos = 0;
347        let items = parse_indented_list(&lines, &mut pos);
348        assert_eq!(items, vec!["item1"]);
349        assert_eq!(pos, 1);
350    }
351
352    #[test]
353    fn test_parse_block_basic() {
354        let input = "  This is block text.\n  Second line.\n";
355        let lines = lex(input).unwrap();
356        let mut pos = 0;
357        let text = parse_block(&lines, &mut pos);
358        assert_eq!(text, "This is block text.\nSecond line.");
359    }
360
361    #[test]
362    fn test_parse_block_strips_base_indent() {
363        let input = "    indented four\n    second line\n";
364        let lines = lex(input).unwrap();
365        let mut pos = 0;
366        let text = parse_block(&lines, &mut pos);
367        assert_eq!(text, "indented four\nsecond line");
368    }
369}