Skip to main content

agm_core/parser/
sidecar.rs

1//! Shared sidecar line classifier and lexer.
2//!
3//! Used by both the state parser (`parser/state.rs`) and the mem parser
4//! (`parser/mem.rs`). Do NOT modify `lexer.rs` — this is a separate lexer
5//! for sidecar file formats.
6
7use crate::error::{AgmError, ErrorCode, ErrorLocation};
8
9// ---------------------------------------------------------------------------
10// SidecarLineKind
11// ---------------------------------------------------------------------------
12
13/// Classification of a single line in a sidecar file.
14#[derive(Debug, Clone, PartialEq)]
15pub enum SidecarLineKind {
16    /// A commented header line: `# key: value`
17    Header(String, String),
18    /// A block declaration: `state <id>` or `entry <key>`
19    BlockDecl(String, String),
20    /// A key-value field: `key: value` (value may be empty)
21    Field(String, String),
22    /// An indented continuation line (2+ leading spaces); content has the 2 spaces stripped.
23    Continuation(String),
24    /// A blank or whitespace-only line.
25    Blank,
26    /// A comment that does not match the `# key: value` header pattern.
27    Comment(String),
28}
29
30// ---------------------------------------------------------------------------
31// SidecarLine
32// ---------------------------------------------------------------------------
33
34/// A classified line with its original position and raw content.
35#[derive(Debug, Clone, PartialEq)]
36pub struct SidecarLine {
37    pub kind: SidecarLineKind,
38    pub number: usize,
39    pub raw: String,
40}
41
42// ---------------------------------------------------------------------------
43// classify_sidecar_line
44// ---------------------------------------------------------------------------
45
46/// Classifies a single raw line from a sidecar file.
47///
48/// Priority order:
49/// 1. Empty/whitespace -> [`SidecarLineKind::Blank`]
50/// 2. Starts with `"# "` -> Header if matches `# key: value`, else Comment
51/// 3. Starts with `"#"` alone -> Comment("")
52/// 4. Matches `^(state|entry) (.+)$` -> [`SidecarLineKind::BlockDecl`]
53/// 5. Starts with 2+ spaces -> [`SidecarLineKind::Continuation`] (2 spaces stripped)
54/// 6. Matches `^([a-z_][a-z0-9_]*): (.*)$` or `^([a-z_][a-z0-9_]*):$` -> Field
55#[must_use]
56pub fn classify_sidecar_line(line: &str) -> SidecarLineKind {
57    // 1. Blank
58    if line.trim().is_empty() {
59        return SidecarLineKind::Blank;
60    }
61
62    // 2 & 3. Comment / Header
63    if let Some(rest_of_hash) = line.strip_prefix('#') {
64        let after_hash = if let Some(stripped) = line.strip_prefix("# ") {
65            stripped
66        } else {
67            // bare `#` or `##...`
68            return SidecarLineKind::Comment(rest_of_hash.trim_start().to_owned());
69        };
70
71        // Try to parse as `key: value` header
72        if let Some(colon_pos) = after_hash.find(": ") {
73            let key = &after_hash[..colon_pos];
74            let value = &after_hash[colon_pos + 2..];
75            if is_header_key(key) {
76                return SidecarLineKind::Header(key.to_owned(), value.to_owned());
77            }
78        }
79        // Also handle `key:` with empty value (no trailing space)
80        if let Some(key) = after_hash.strip_suffix(':') {
81            if is_header_key(key) {
82                return SidecarLineKind::Header(key.to_owned(), String::new());
83            }
84        }
85        return SidecarLineKind::Comment(after_hash.to_owned());
86    }
87
88    // 4. BlockDecl: `state <id>` or `entry <key>`
89    if let Some(rest) = line.strip_prefix("state ") {
90        let id = rest.trim_end();
91        if !id.is_empty() {
92            return SidecarLineKind::BlockDecl("state".to_owned(), id.to_owned());
93        }
94    }
95    if let Some(rest) = line.strip_prefix("entry ") {
96        let id = rest.trim_end();
97        if !id.is_empty() {
98            return SidecarLineKind::BlockDecl("entry".to_owned(), id.to_owned());
99        }
100    }
101
102    // 5. Continuation (2+ leading spaces)
103    if let Some(stripped) = line.strip_prefix("  ") {
104        return SidecarLineKind::Continuation(stripped.to_owned());
105    }
106
107    // 6. Field: `key: value` or `key:`
108    if let Some(colon_pos) = line.find(':') {
109        let key = &line[..colon_pos];
110        if is_field_key(key) {
111            let rest = &line[colon_pos + 1..];
112            let value = if let Some(stripped) = rest.strip_prefix(' ') {
113                stripped.to_owned()
114            } else {
115                rest.to_owned()
116            };
117            return SidecarLineKind::Field(key.to_owned(), value);
118        }
119    }
120
121    // Fallback: treat as a comment / unknown
122    SidecarLineKind::Comment(line.to_owned())
123}
124
125/// Returns true if `s` looks like a valid header key: `[a-z][a-z0-9_.]*`
126fn is_header_key(s: &str) -> bool {
127    let mut chars = s.chars();
128    match chars.next() {
129        Some(c) if c.is_ascii_lowercase() => {}
130        _ => return false,
131    }
132    chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '.')
133}
134
135/// Returns true if `s` looks like a valid field key: `[a-z_][a-z0-9_]*`
136fn is_field_key(s: &str) -> bool {
137    let mut chars = s.chars();
138    match chars.next() {
139        Some(c) if c.is_ascii_lowercase() || c == '_' => {}
140        _ => return false,
141    }
142    chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_')
143}
144
145// ---------------------------------------------------------------------------
146// lex_sidecar
147// ---------------------------------------------------------------------------
148
149/// Tokenises every line of a sidecar file.
150///
151/// Returns a `Vec<SidecarLine>` on success. Returns `Err(Vec<AgmError>)` if
152/// any line contains a tab character (error P004).
153pub fn lex_sidecar(input: &str) -> Result<Vec<SidecarLine>, Vec<AgmError>> {
154    let mut lines = Vec::new();
155    let mut errors = Vec::new();
156
157    for (index, raw) in input.lines().enumerate() {
158        let number = index + 1;
159
160        // P004: tabs not allowed
161        if raw.contains('\t') {
162            errors.push(AgmError::new(
163                ErrorCode::P004,
164                format!("Tab character in indentation at line {number} (spaces required)"),
165                ErrorLocation::new(None, Some(number), None),
166            ));
167            continue;
168        }
169
170        let kind = classify_sidecar_line(raw);
171        lines.push(SidecarLine {
172            kind,
173            number,
174            raw: raw.to_owned(),
175        });
176    }
177
178    if errors.is_empty() {
179        Ok(lines)
180    } else {
181        Err(errors)
182    }
183}
184
185// ---------------------------------------------------------------------------
186// Tests
187// ---------------------------------------------------------------------------
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    // -----------------------------------------------------------------------
194    // A: Blank lines
195    // -----------------------------------------------------------------------
196
197    #[test]
198    fn test_classify_empty_string_is_blank() {
199        assert_eq!(classify_sidecar_line(""), SidecarLineKind::Blank);
200    }
201
202    #[test]
203    fn test_classify_whitespace_only_is_blank() {
204        assert_eq!(classify_sidecar_line("   "), SidecarLineKind::Blank);
205    }
206
207    // -----------------------------------------------------------------------
208    // B: Header lines
209    // -----------------------------------------------------------------------
210
211    #[test]
212    fn test_classify_hash_key_value_is_header() {
213        assert_eq!(
214            classify_sidecar_line("# agm.state: 1.0"),
215            SidecarLineKind::Header("agm.state".to_owned(), "1.0".to_owned())
216        );
217    }
218
219    #[test]
220    fn test_classify_header_package() {
221        assert_eq!(
222            classify_sidecar_line("# package: test.pkg"),
223            SidecarLineKind::Header("package".to_owned(), "test.pkg".to_owned())
224        );
225    }
226
227    #[test]
228    fn test_classify_header_session_id() {
229        assert_eq!(
230            classify_sidecar_line("# session_id: run-001"),
231            SidecarLineKind::Header("session_id".to_owned(), "run-001".to_owned())
232        );
233    }
234
235    #[test]
236    fn test_classify_comment_no_colon() {
237        assert_eq!(
238            classify_sidecar_line("# just a comment"),
239            SidecarLineKind::Comment("just a comment".to_owned())
240        );
241    }
242
243    #[test]
244    fn test_classify_comment_uppercase_key_not_header() {
245        // Key has uppercase — not a valid header key
246        assert_eq!(
247            classify_sidecar_line("# Package: test.pkg"),
248            SidecarLineKind::Comment("Package: test.pkg".to_owned())
249        );
250    }
251
252    #[test]
253    fn test_classify_bare_hash_is_comment() {
254        assert_eq!(
255            classify_sidecar_line("#"),
256            SidecarLineKind::Comment(String::new())
257        );
258    }
259
260    // -----------------------------------------------------------------------
261    // C: BlockDecl lines
262    // -----------------------------------------------------------------------
263
264    #[test]
265    fn test_classify_state_block_decl() {
266        assert_eq!(
267            classify_sidecar_line("state migration.025.data"),
268            SidecarLineKind::BlockDecl("state".to_owned(), "migration.025.data".to_owned())
269        );
270    }
271
272    #[test]
273    fn test_classify_entry_block_decl() {
274        assert_eq!(
275            classify_sidecar_line("entry project.db_version"),
276            SidecarLineKind::BlockDecl("entry".to_owned(), "project.db_version".to_owned())
277        );
278    }
279
280    // -----------------------------------------------------------------------
281    // D: Field lines
282    // -----------------------------------------------------------------------
283
284    #[test]
285    fn test_classify_field_with_value() {
286        assert_eq!(
287            classify_sidecar_line("execution_status: completed"),
288            SidecarLineKind::Field("execution_status".to_owned(), "completed".to_owned())
289        );
290    }
291
292    #[test]
293    fn test_classify_field_empty_value() {
294        assert_eq!(
295            classify_sidecar_line("execution_log:"),
296            SidecarLineKind::Field("execution_log".to_owned(), String::new())
297        );
298    }
299
300    #[test]
301    fn test_classify_field_retry_count() {
302        assert_eq!(
303            classify_sidecar_line("retry_count: 0"),
304            SidecarLineKind::Field("retry_count".to_owned(), "0".to_owned())
305        );
306    }
307
308    // -----------------------------------------------------------------------
309    // E: Continuation lines
310    // -----------------------------------------------------------------------
311
312    #[test]
313    fn test_classify_two_spaces_is_continuation() {
314        assert_eq!(
315            classify_sidecar_line("  continuation content"),
316            SidecarLineKind::Continuation("continuation content".to_owned())
317        );
318    }
319
320    #[test]
321    fn test_classify_four_spaces_is_continuation_strips_two() {
322        assert_eq!(
323            classify_sidecar_line("    deeper content"),
324            SidecarLineKind::Continuation("  deeper content".to_owned())
325        );
326    }
327
328    // -----------------------------------------------------------------------
329    // F: lex_sidecar
330    // -----------------------------------------------------------------------
331
332    #[test]
333    fn test_lex_sidecar_simple_input_returns_ok() {
334        let input =
335            "# agm.state: 1.0\n# package: test.pkg\n\nstate node.one\nexecution_status: pending\n";
336        let lines = lex_sidecar(input).unwrap();
337        assert_eq!(lines.len(), 5);
338        assert_eq!(
339            lines[0].kind,
340            SidecarLineKind::Header("agm.state".to_owned(), "1.0".to_owned())
341        );
342        assert_eq!(lines[2].kind, SidecarLineKind::Blank);
343    }
344
345    #[test]
346    fn test_lex_sidecar_tab_returns_error_p004() {
347        let input = "# agm.state: 1.0\n\texecution_status: pending\n";
348        let errors = lex_sidecar(input).unwrap_err();
349        assert!(!errors.is_empty());
350        assert_eq!(errors[0].code, ErrorCode::P004);
351    }
352
353    #[test]
354    fn test_lex_sidecar_line_numbers_start_at_one() {
355        let input = "# agm.state: 1.0\n# package: test\n";
356        let lines = lex_sidecar(input).unwrap();
357        assert_eq!(lines[0].number, 1);
358        assert_eq!(lines[1].number, 2);
359    }
360}