Skip to main content

serde_structprop/
parse.rs

1//! Parser for the structprop format.
2//!
3//! This module contains the [`Value`] type that represents a parsed structprop
4//! document and the [`parse()`] function that converts a raw `&str` into a
5//! [`Value::Object`] tree.
6//!
7//! # Grammar (informal)
8//!
9//! ```text
10//! document   = assignment*
11//! assignment = TERM '=' value
12//!            | TERM '{' assignment* '}'
13//! value      = TERM
14//!            | '{' (TERM | '{' assignment* '}')* '}'
15//! ```
16
17use crate::error::{Error, Result};
18use crate::lexer::{tokenize, Token};
19use indexmap::IndexMap;
20
21// ---------------------------------------------------------------------------
22// Public types
23// ---------------------------------------------------------------------------
24
25/// A node in the structprop value tree produced by [`parse()`].
26///
27/// The tree maps directly onto structprop's three syntactic forms:
28///
29/// | Structprop syntax | Variant |
30/// |---|---|
31/// | `key = value` | [`Value::Scalar`] |
32/// | `key = { a b c }` | [`Value::Array`] of [`Value::Scalar`]s |
33/// | `key = { { k = v } { k = v } }` | [`Value::Array`] of [`Value::Object`]s |
34/// | `key { … }` | [`Value::Object`] |
35///
36/// Scalar strings are stored verbatim (no coercion at parse time); numeric
37/// or boolean coercion is performed lazily via the [`Value::as_bool`],
38/// [`Value::as_i64`], and [`Value::as_f64`] helpers.  Duplicate keys within
39/// any object block are detected and rejected during parsing.
40#[derive(Debug, Clone, PartialEq)]
41pub enum Value {
42    /// A bare or quoted string token, stored as-is (no coercion applied).
43    ///
44    /// Use [`Value::as_bool`], [`Value::as_i64`], or [`Value::as_f64`] to
45    /// attempt type coercion, or [`Value::is_null`] to test for `null`.
46    Scalar(String),
47
48    /// An ordered list of values, corresponding to `key = { … }` syntax.
49    ///
50    /// Array items may be [`Value::Scalar`]s (bare terms) or
51    /// [`Value::Object`]s (written as `{ key = val … }` inline sub-objects).
52    /// Duplicate keys within a sub-object are rejected at parse time.
53    Array(Vec<Value>),
54
55    /// An ordered map from string keys to values, corresponding to either a
56    /// `key { … }` block or the implicit top-level document object.
57    ///
58    /// Key insertion order is preserved via [`IndexMap`].
59    Object(IndexMap<String, Value>),
60}
61
62// ---------------------------------------------------------------------------
63// Public entry point
64// ---------------------------------------------------------------------------
65
66/// Parse a structprop document from `input` and return the top-level
67/// [`Value::Object`].
68///
69/// # Errors
70///
71/// Returns [`Error::Parse`] if the input contains unexpected tokens or
72/// violates the structprop grammar.  The error message includes the 1-indexed
73/// line number where the problem was detected.
74///
75/// # Examples
76///
77/// ```
78/// use serde_structprop::parse::{parse, Value};
79///
80/// let v = parse("port = 8080\n").unwrap();
81/// if let Value::Object(map) = v {
82///     assert_eq!(map["port"].as_i64(), Some(8080));
83/// }
84/// ```
85pub fn parse(input: &str) -> Result<Value> {
86    let tokens = tokenize(input)?;
87    let mut pos = 0usize;
88    let map = parse_object(
89        &tokens, &mut pos, /*top_level=*/ true, /*open_line=*/ 0,
90    )?;
91    Ok(Value::Object(map))
92}
93
94// ---------------------------------------------------------------------------
95// Internal parser helpers
96// ---------------------------------------------------------------------------
97
98/// Return a reference to the token at `pos` without advancing.
99fn peek(tokens: &[(Token, u32)], pos: usize) -> &Token {
100    tokens.get(pos).map_or(&Token::Eof, |(tok, _)| tok)
101}
102
103/// Format a token as a human-readable string for error messages.
104fn token_display(tok: Option<&Token>) -> String {
105    match tok {
106        Some(Token::Term(s)) => format!("'{s}'"),
107        Some(Token::Eq) => "'='".to_owned(),
108        Some(Token::Open) => "'{{'".to_owned(),
109        Some(Token::Close) => "'}}'".to_owned(),
110        Some(Token::Eof) | None => "end of input".to_owned(),
111    }
112}
113
114/// Return the source line of the token at `pos`.
115fn line_at(tokens: &[(Token, u32)], pos: usize) -> u32 {
116    tokens.get(pos).map_or(0, |&(_, line)| line)
117}
118
119/// Advance the position cursor by one.
120fn advance(pos: &mut usize) {
121    *pos += 1;
122}
123
124/// Consume the next token, asserting it is a [`Token::Term`], and return its
125/// string value.
126///
127/// # Errors
128///
129/// Returns [`Error::Parse`] with a line number if the next token is not a term.
130fn expect_term(tokens: &[(Token, u32)], pos: &mut usize) -> Result<String> {
131    let line = line_at(tokens, *pos);
132    match tokens.get(*pos) {
133        Some((Token::Term(s), _)) => {
134            let s = s.clone();
135            advance(pos);
136            Ok(s)
137        }
138        other => {
139            let tok = other.map(|(t, _)| t);
140            Err(Error::Parse(format!(
141                "line {line}: expected a key or value, got {}",
142                token_display(tok)
143            )))
144        }
145    }
146}
147
148/// Parse a sequence of assignments into an [`IndexMap`].
149///
150/// * If `top_level` is `true`, parsing stops at [`Token::Eof`].
151/// * If `top_level` is `false`, parsing stops at `}` (which is consumed).
152///   `open_line` must be the source line of the opening `{` so that the EOF
153///   error can point back to where the block started.
154///
155/// # Errors
156///
157/// Returns [`Error::Parse`] on malformed input.
158fn parse_object(
159    tokens: &[(Token, u32)],
160    pos: &mut usize,
161    top_level: bool,
162    open_line: u32,
163) -> Result<IndexMap<String, Value>> {
164    let mut map = IndexMap::new();
165
166    loop {
167        let line = line_at(tokens, *pos);
168        match peek(tokens, *pos) {
169            Token::Eof => {
170                if top_level {
171                    break;
172                }
173                return Err(Error::Parse(format!(
174                    "line {line}: unexpected EOF inside object opened on line {open_line}"
175                )));
176            }
177            Token::Close => {
178                if top_level {
179                    return Err(Error::Parse(format!("line {line}: unexpected '}}'")));
180                }
181                advance(pos); // consume '}'
182                break;
183            }
184            Token::Term(_) => {
185                let key = expect_term(tokens, pos)?;
186                let after_line = line_at(tokens, *pos);
187                match peek(tokens, *pos) {
188                    Token::Eq => {
189                        advance(pos); // consume '='
190                        let val = parse_value(tokens, pos)?;
191                        if map.contains_key(&key) {
192                            return Err(Error::Parse(format!(
193                                "line {after_line}: duplicate key '{key}'"
194                            )));
195                        }
196                        map.insert(key, val);
197                    }
198                    Token::Open => {
199                        let open_line = line_at(tokens, *pos);
200                        advance(pos); // consume '{'
201                        let sub = parse_object(tokens, pos, /*top_level=*/ false, open_line)?;
202                        if map.contains_key(&key) {
203                            return Err(Error::Parse(format!(
204                                "line {after_line}: duplicate key '{key}'"
205                            )));
206                        }
207                        map.insert(key, Value::Object(sub));
208                    }
209                    other => {
210                        return Err(Error::Parse(format!(
211                            "line {after_line}: expected '=' or '{{' after key '{key}', got {}",
212                            token_display(Some(other))
213                        )));
214                    }
215                }
216            }
217            other => {
218                return Err(Error::Parse(format!(
219                    "line {line}: unexpected {}",
220                    token_display(Some(other))
221                )));
222            }
223        }
224    }
225
226    Ok(map)
227}
228
229/// Parse a single value: either a scalar term or a `{ … }` block.
230///
231/// # Errors
232///
233/// Returns [`Error::Parse`] on unexpected tokens.
234fn parse_value(tokens: &[(Token, u32)], pos: &mut usize) -> Result<Value> {
235    let line = line_at(tokens, *pos);
236    match peek(tokens, *pos) {
237        Token::Open => {
238            let open_line = line_at(tokens, *pos);
239            advance(pos); // consume '{'
240            parse_array_or_object_list(tokens, pos, open_line)
241        }
242        Token::Term(_) => {
243            let s = expect_term(tokens, pos)?;
244            Ok(Value::Scalar(s))
245        }
246        other => Err(Error::Parse(format!(
247            "line {line}: expected a value, got {}",
248            token_display(Some(other))
249        ))),
250    }
251}
252
253/// Parse the body of a `{ … }` block that follows `=`.
254///
255/// The block may contain:
256/// - A list of scalar terms → [`Value::Array`] of [`Value::Scalar`]s.
257/// - A list of `{ … }` sub-objects → [`Value::Array`] of [`Value::Object`]s.
258/// - A mix of both.
259///
260/// `open_line` is the source line of the opening `{` and is used in EOF
261/// error messages to point back to where the block started.
262///
263/// # Errors
264///
265/// Returns [`Error::Parse`] on unexpected tokens or premature EOF.
266fn parse_array_or_object_list(
267    tokens: &[(Token, u32)],
268    pos: &mut usize,
269    open_line: u32,
270) -> Result<Value> {
271    let mut items: Vec<Value> = Vec::new();
272
273    loop {
274        let line = line_at(tokens, *pos);
275        match peek(tokens, *pos) {
276            Token::Close => {
277                advance(pos); // consume '}'
278                break;
279            }
280            Token::Eof => {
281                return Err(Error::Parse(format!(
282                    "line {line}: unexpected EOF inside array opened on line {open_line}"
283                )));
284            }
285            Token::Open => {
286                // A nested object literal inside an array: { key = val … }
287                let inner_open_line = line_at(tokens, *pos);
288                advance(pos); // consume '{'
289                let sub = parse_object(tokens, pos, /*top_level=*/ false, inner_open_line)?;
290                items.push(Value::Object(sub));
291            }
292            Token::Term(_) => {
293                // Peek ahead: `term =` inside an array means the caller wrote
294                // a key-value assignment directly in a list body, which is not
295                // valid.  Catch it here so we can name the key and suggest the
296                // correct syntax before consuming the term.
297                if matches!(tokens.get(*pos + 1), Some((Token::Eq, _))) {
298                    let key = match tokens.get(*pos) {
299                        Some((Token::Term(s), _)) => s.clone(),
300                        _ => "?".to_owned(),
301                    };
302                    return Err(Error::Parse(format!(
303                        "line {line}: '{key} = ...' is not valid inside an array; \
304                         wrap it in braces for a nested object: '{{ {key} = ... }}'"
305                    )));
306                }
307                let s = expect_term(tokens, pos)?;
308                items.push(Value::Scalar(s));
309            }
310            Token::Eq => {
311                return Err(Error::Parse(format!(
312                    "line {line}: unexpected '=' inside array"
313                )));
314            }
315        }
316    }
317
318    Ok(Value::Array(items))
319}
320
321// ---------------------------------------------------------------------------
322// Scalar coercion helpers
323// ---------------------------------------------------------------------------
324
325impl Value {
326    /// Try to interpret this [`Value::Scalar`] as a `bool`.
327    ///
328    /// Returns `Some(true)` for the literal string `"true"`, `Some(false)` for
329    /// `"false"`, and `None` for any other value or non-scalar variant.
330    ///
331    /// This mirrors the Python implementation's `json.loads` coercion.
332    #[must_use]
333    pub fn as_bool(&self) -> Option<bool> {
334        if let Value::Scalar(s) = self {
335            match s.as_str() {
336                "true" => Some(true),
337                "false" => Some(false),
338                _ => None,
339            }
340        } else {
341            None
342        }
343    }
344
345    /// Try to interpret this [`Value::Scalar`] as an `i64`.
346    ///
347    /// Returns `Some(n)` if the string parses as a signed 64-bit integer, or
348    /// `None` otherwise.
349    #[must_use]
350    pub fn as_i64(&self) -> Option<i64> {
351        if let Value::Scalar(s) = self {
352            s.parse().ok()
353        } else {
354            None
355        }
356    }
357
358    /// Try to interpret this [`Value::Scalar`] as an `f64`.
359    ///
360    /// Returns `Some(n)` if the string parses as a 64-bit float, or `None`
361    /// otherwise.
362    #[must_use]
363    pub fn as_f64(&self) -> Option<f64> {
364        if let Value::Scalar(s) = self {
365            s.parse().ok()
366        } else {
367            None
368        }
369    }
370
371    /// Returns `true` if this value is the scalar string `"null"`.
372    ///
373    /// Used by the deserializer to map structprop's `null` token to
374    /// [`Option::None`].
375    #[must_use]
376    pub fn is_null(&self) -> bool {
377        matches!(self, Value::Scalar(s) if s == "null")
378    }
379
380    /// Return the inner string of a [`Value::Scalar`], or `None` for other
381    /// variants.
382    ///
383    /// This complements [`Value::as_bool`], [`Value::as_i64`], and
384    /// [`Value::as_f64`] for cases where the raw string value is needed.
385    ///
386    /// # Examples
387    ///
388    /// ```
389    /// use serde_structprop::parse::{parse, Value};
390    ///
391    /// let v = parse("greeting = hello\n").unwrap();
392    /// if let Value::Object(map) = v {
393    ///     assert_eq!(map["greeting"].as_str(), Some("hello"));
394    /// }
395    /// ```
396    #[must_use]
397    pub fn as_str(&self) -> Option<&str> {
398        if let Value::Scalar(s) = self {
399            Some(s)
400        } else {
401            None
402        }
403    }
404
405    /// Returns a short human-readable name for the variant, used in error
406    /// messages.
407    #[must_use]
408    pub fn type_name(&self) -> &'static str {
409        match self {
410            Value::Scalar(_) => "scalar",
411            Value::Array(_) => "array",
412            Value::Object(_) => "object",
413        }
414    }
415}
416
417// ---------------------------------------------------------------------------
418// Tests
419// ---------------------------------------------------------------------------
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    #[test]
426    fn simple_kv() {
427        let v = parse("key = value\n").unwrap();
428        if let Value::Object(map) = v {
429            assert_eq!(map["key"], Value::Scalar("value".into()));
430        } else {
431            panic!("expected object");
432        }
433    }
434
435    #[test]
436    fn nested_object() {
437        let input = "db {\n  host = localhost\n  port = 5432\n}\n";
438        let v = parse(input).unwrap();
439        if let Value::Object(map) = v {
440            if let Value::Object(db) = &map["db"] {
441                assert_eq!(db["host"], Value::Scalar("localhost".into()));
442                assert_eq!(db["port"], Value::Scalar("5432".into()));
443            } else {
444                panic!("expected nested object");
445            }
446        } else {
447            panic!("expected object");
448        }
449    }
450
451    #[test]
452    fn array_of_scalars() {
453        let input = "tables = { Table1 Table2 }\n";
454        let v = parse(input).unwrap();
455        if let Value::Object(map) = v {
456            assert_eq!(
457                map["tables"],
458                Value::Array(vec![
459                    Value::Scalar("Table1".into()),
460                    Value::Scalar("Table2".into()),
461                ])
462            );
463        } else {
464            panic!("expected object");
465        }
466    }
467
468    #[test]
469    fn number_scalar() {
470        let v = parse("port = 8080\n").unwrap();
471        if let Value::Object(map) = v {
472            assert_eq!(map["port"].as_i64(), Some(8080));
473        }
474    }
475
476    #[test]
477    fn bool_scalar() {
478        let v = parse("enabled = true\n").unwrap();
479        if let Value::Object(map) = v {
480            assert_eq!(map["enabled"].as_bool(), Some(true));
481        }
482    }
483
484    #[test]
485    fn error_includes_line_number() {
486        let input = "good = ok\nbad = {\n";
487        let err = parse(input).unwrap_err().to_string();
488        assert!(
489            err.contains("line "),
490            "expected a line number in error: {err}"
491        );
492    }
493
494    #[test]
495    fn unterminated_object_reports_opening_brace_line() {
496        // The opening `{` is on line 2; EOF is reached on line 3.
497        // The error should name the line where the block was opened.
498        let input = "good = ok\nbad {\n  key = value\n";
499        let err = parse(input).unwrap_err().to_string();
500        assert!(
501            err.contains("opened on line 2"),
502            "expected opening brace line in error: {err}"
503        );
504    }
505
506    #[test]
507    fn unterminated_array_reports_opening_brace_line() {
508        // The opening `{` is on line 1; EOF is reached on line 2.
509        let input = "list = {\n  item1\n";
510        let err = parse(input).unwrap_err().to_string();
511        assert!(
512            err.contains("opened on line 1"),
513            "expected opening brace line in error: {err}"
514        );
515    }
516
517    #[test]
518    fn deeply_nested_unterminated_object_reports_correct_opening_line() {
519        // The outer block's `{` is on line 1.  The inner block's `{` is on
520        // line 2.  Neither block is closed, so the parser hits EOF while
521        // inside the inner object.  The error should reference line 2 (the
522        // opening of the innermost unclosed block).
523        let input = "outer {\n  inner {\n    key = value\n";
524        let err = parse(input).unwrap_err().to_string();
525        assert!(
526            err.contains("opened on line 2"),
527            "expected inner opening brace line in error: {err}"
528        );
529    }
530
531    #[test]
532    fn kv_inside_array_suggests_fix() {
533        // `subkey = nested` inside an array body is the most common mistake;
534        // the error should name the key and tell the user how to fix it.
535        let input = "list = {\n  subkey = nested\n}\n";
536        let err = parse(input).unwrap_err().to_string();
537        assert!(
538            err.contains("'subkey = ...' is not valid inside an array"),
539            "expected actionable hint in error: {err}"
540        );
541        assert!(
542            err.contains("{ subkey = ... }"),
543            "expected brace-wrap hint in error: {err}"
544        );
545    }
546
547    #[test]
548    fn token_display_uses_human_readable_names() {
549        // A leading `=` with no preceding key should say `'='`, not `Eq`.
550        let input = "= value\n";
551        let err = parse(input).unwrap_err().to_string();
552        assert!(
553            err.contains("'='") || err.contains("end of input"),
554            "error should use human-readable token names: {err}"
555        );
556    }
557}