Skip to main content

serde_structprop/
parse.rs

1//! Parser for the structprop format.
2//!
3//! This module contains the [`Value`] type that represents a parsed structprop
4//! document and the [`parse()`] function that converts a raw `&str` into a
5//! [`Value::Object`] tree.
6//!
7//! # Grammar (informal)
8//!
9//! ```text
10//! document   = assignment*
11//! assignment = TERM '=' value
12//!            | TERM '{' assignment* '}'
13//! value      = TERM
14//!            | '{' (TERM | '{' assignment* '}')* '}'
15//! ```
16
17use crate::error::{Error, Result};
18use crate::lexer::{tokenize, Token};
19use indexmap::IndexMap;
20
21// ---------------------------------------------------------------------------
22// Public types
23// ---------------------------------------------------------------------------
24
25/// A node in the structprop value tree produced by [`parse()`].
26///
27/// The tree maps directly onto structprop's three syntactic forms:
28///
29/// | Structprop syntax | Variant |
30/// |---|---|
31/// | `key = value` | [`Value::Scalar`] |
32/// | `key = { a b c }` | [`Value::Array`] of [`Value::Scalar`]s |
33/// | `key = { { k = v } { k = v } }` | [`Value::Array`] of [`Value::Object`]s |
34/// | `key { … }` | [`Value::Object`] |
35///
36/// Scalar strings are stored verbatim (no coercion at parse time); numeric
37/// or boolean coercion is performed lazily via the [`Value::as_bool`],
38/// [`Value::as_i64`], and [`Value::as_f64`] helpers.  Duplicate keys within
39/// any object block are detected and rejected during parsing.
40#[derive(Debug, Clone, PartialEq)]
41pub enum Value {
42    /// A bare or quoted string token, stored as-is (no coercion applied).
43    ///
44    /// Use [`Value::as_bool`], [`Value::as_i64`], or [`Value::as_f64`] to
45    /// attempt type coercion, or [`Value::is_null`] to test for `null`.
46    Scalar(String),
47
48    /// An ordered list of values, corresponding to `key = { … }` syntax.
49    ///
50    /// Array items may be [`Value::Scalar`]s (bare terms) or
51    /// [`Value::Object`]s (written as `{ key = val … }` inline sub-objects).
52    /// Duplicate keys within a sub-object are rejected at parse time.
53    Array(Vec<Value>),
54
55    /// An ordered map from string keys to values, corresponding to either a
56    /// `key { … }` block or the implicit top-level document object.
57    ///
58    /// Key insertion order is preserved via [`IndexMap`].
59    Object(IndexMap<String, Value>),
60}
61
62// ---------------------------------------------------------------------------
63// Public entry point
64// ---------------------------------------------------------------------------
65
66/// Parse a structprop document from `input` and return the top-level
67/// [`Value::Object`].
68///
69/// # Errors
70///
71/// Returns [`Error::Parse`] if the input contains unexpected tokens or
72/// violates the structprop grammar.  The error message includes the 1-indexed
73/// line number where the problem was detected.
74///
75/// # Examples
76///
77/// ```
78/// use serde_structprop::parse::{parse, Value};
79///
80/// let v = parse("port = 8080\n").unwrap();
81/// if let Value::Object(map) = v {
82///     assert_eq!(map["port"].as_i64(), Some(8080));
83/// }
84/// ```
85pub fn parse(input: &str) -> Result<Value> {
86    let tokens = tokenize(input)?;
87    let mut pos = 0usize;
88    let map = parse_object(&tokens, &mut pos, /*top_level=*/ true)?;
89    Ok(Value::Object(map))
90}
91
92// ---------------------------------------------------------------------------
93// Internal parser helpers
94// ---------------------------------------------------------------------------
95
96/// Return a reference to the token at `pos` without advancing.
97fn peek(tokens: &[(Token, u32)], pos: usize) -> &Token {
98    tokens.get(pos).map_or(&Token::Eof, |(tok, _)| tok)
99}
100
101/// Format a token as a human-readable string for error messages.
102fn token_display(tok: Option<&Token>) -> String {
103    match tok {
104        Some(Token::Term(s)) => format!("'{s}'"),
105        Some(Token::Eq) => "'='".to_owned(),
106        Some(Token::Open) => "'{{'".to_owned(),
107        Some(Token::Close) => "'}}'".to_owned(),
108        Some(Token::Eof) | None => "end of input".to_owned(),
109    }
110}
111
112/// Return the source line of the token at `pos`.
113fn line_at(tokens: &[(Token, u32)], pos: usize) -> u32 {
114    tokens.get(pos).map_or(0, |&(_, line)| line)
115}
116
117/// Advance the position cursor by one.
118fn advance(pos: &mut usize) {
119    *pos += 1;
120}
121
122/// Consume the next token, asserting it is a [`Token::Term`], and return its
123/// string value.
124///
125/// # Errors
126///
127/// Returns [`Error::Parse`] with a line number if the next token is not a term.
128fn expect_term(tokens: &[(Token, u32)], pos: &mut usize) -> Result<String> {
129    let line = line_at(tokens, *pos);
130    match tokens.get(*pos) {
131        Some((Token::Term(s), _)) => {
132            let s = s.clone();
133            advance(pos);
134            Ok(s)
135        }
136        other => {
137            let tok = other.map(|(t, _)| t);
138            Err(Error::Parse(format!(
139                "line {line}: expected a key or value, got {}",
140                token_display(tok)
141            )))
142        }
143    }
144}
145
146/// Parse a sequence of assignments into an [`IndexMap`].
147///
148/// * If `top_level` is `true`, parsing stops at [`Token::Eof`].
149/// * If `top_level` is `false`, parsing stops at `}` (which is consumed).
150///
151/// # Errors
152///
153/// Returns [`Error::Parse`] on malformed input.
154fn parse_object(
155    tokens: &[(Token, u32)],
156    pos: &mut usize,
157    top_level: bool,
158) -> Result<IndexMap<String, Value>> {
159    let mut map = IndexMap::new();
160
161    loop {
162        let line = line_at(tokens, *pos);
163        match peek(tokens, *pos) {
164            Token::Eof => {
165                if top_level {
166                    break;
167                }
168                return Err(Error::Parse(format!(
169                    "line {line}: unexpected EOF inside object"
170                )));
171            }
172            Token::Close => {
173                if top_level {
174                    return Err(Error::Parse(format!("line {line}: unexpected '}}'")));
175                }
176                advance(pos); // consume '}'
177                break;
178            }
179            Token::Term(_) => {
180                let key = expect_term(tokens, pos)?;
181                let after_line = line_at(tokens, *pos);
182                match peek(tokens, *pos) {
183                    Token::Eq => {
184                        advance(pos); // consume '='
185                        let val = parse_value(tokens, pos)?;
186                        if map.contains_key(&key) {
187                            return Err(Error::Parse(format!(
188                                "line {after_line}: duplicate key '{key}'"
189                            )));
190                        }
191                        map.insert(key, val);
192                    }
193                    Token::Open => {
194                        advance(pos); // consume '{'
195                        let sub = parse_object(tokens, pos, /*top_level=*/ false)?;
196                        if map.contains_key(&key) {
197                            return Err(Error::Parse(format!(
198                                "line {after_line}: duplicate key '{key}'"
199                            )));
200                        }
201                        map.insert(key, Value::Object(sub));
202                    }
203                    other => {
204                        return Err(Error::Parse(format!(
205                            "line {after_line}: expected '=' or '{{' after key '{key}', got {}",
206                            token_display(Some(other))
207                        )));
208                    }
209                }
210            }
211            other => {
212                return Err(Error::Parse(format!(
213                    "line {line}: unexpected {}",
214                    token_display(Some(other))
215                )));
216            }
217        }
218    }
219
220    Ok(map)
221}
222
223/// Parse a single value: either a scalar term or a `{ … }` block.
224///
225/// # Errors
226///
227/// Returns [`Error::Parse`] on unexpected tokens.
228fn parse_value(tokens: &[(Token, u32)], pos: &mut usize) -> Result<Value> {
229    let line = line_at(tokens, *pos);
230    match peek(tokens, *pos) {
231        Token::Open => {
232            advance(pos); // consume '{'
233            parse_array_or_object_list(tokens, pos)
234        }
235        Token::Term(_) => {
236            let s = expect_term(tokens, pos)?;
237            Ok(Value::Scalar(s))
238        }
239        other => Err(Error::Parse(format!(
240            "line {line}: expected a value, got {}",
241            token_display(Some(other))
242        ))),
243    }
244}
245
246/// Parse the body of a `{ … }` block that follows `=`.
247///
248/// The block may contain:
249/// - A list of scalar terms → [`Value::Array`] of [`Value::Scalar`]s.
250/// - A list of `{ … }` sub-objects → [`Value::Array`] of [`Value::Object`]s.
251/// - A mix of both.
252///
253/// # Errors
254///
255/// Returns [`Error::Parse`] on unexpected tokens or premature EOF.
256fn parse_array_or_object_list(tokens: &[(Token, u32)], pos: &mut usize) -> Result<Value> {
257    let mut items: Vec<Value> = Vec::new();
258
259    loop {
260        let line = line_at(tokens, *pos);
261        match peek(tokens, *pos) {
262            Token::Close => {
263                advance(pos); // consume '}'
264                break;
265            }
266            Token::Eof => {
267                return Err(Error::Parse(format!(
268                    "line {line}: unexpected EOF inside array"
269                )));
270            }
271            Token::Open => {
272                // A nested object literal inside an array: { key = val … }
273                advance(pos); // consume '{'
274                let sub = parse_object(tokens, pos, /*top_level=*/ false)?;
275                items.push(Value::Object(sub));
276            }
277            Token::Term(_) => {
278                // Peek ahead: `term =` inside an array means the caller wrote
279                // a key-value assignment directly in a list body, which is not
280                // valid.  Catch it here so we can name the key and suggest the
281                // correct syntax before consuming the term.
282                if matches!(tokens.get(*pos + 1), Some((Token::Eq, _))) {
283                    let key = match tokens.get(*pos) {
284                        Some((Token::Term(s), _)) => s.clone(),
285                        _ => "?".to_owned(),
286                    };
287                    return Err(Error::Parse(format!(
288                        "line {line}: '{key} = ...' is not valid inside an array; \
289                         wrap it in braces for a nested object: '{{ {key} = ... }}'"
290                    )));
291                }
292                let s = expect_term(tokens, pos)?;
293                items.push(Value::Scalar(s));
294            }
295            Token::Eq => {
296                return Err(Error::Parse(format!(
297                    "line {line}: unexpected '=' inside array"
298                )));
299            }
300        }
301    }
302
303    Ok(Value::Array(items))
304}
305
306// ---------------------------------------------------------------------------
307// Scalar coercion helpers
308// ---------------------------------------------------------------------------
309
310impl Value {
311    /// Try to interpret this [`Value::Scalar`] as a `bool`.
312    ///
313    /// Returns `Some(true)` for the literal string `"true"`, `Some(false)` for
314    /// `"false"`, and `None` for any other value or non-scalar variant.
315    ///
316    /// This mirrors the Python implementation's `json.loads` coercion.
317    #[must_use]
318    pub fn as_bool(&self) -> Option<bool> {
319        if let Value::Scalar(s) = self {
320            match s.as_str() {
321                "true" => Some(true),
322                "false" => Some(false),
323                _ => None,
324            }
325        } else {
326            None
327        }
328    }
329
330    /// Try to interpret this [`Value::Scalar`] as an `i64`.
331    ///
332    /// Returns `Some(n)` if the string parses as a signed 64-bit integer, or
333    /// `None` otherwise.
334    #[must_use]
335    pub fn as_i64(&self) -> Option<i64> {
336        if let Value::Scalar(s) = self {
337            s.parse().ok()
338        } else {
339            None
340        }
341    }
342
343    /// Try to interpret this [`Value::Scalar`] as an `f64`.
344    ///
345    /// Returns `Some(n)` if the string parses as a 64-bit float, or `None`
346    /// otherwise.
347    #[must_use]
348    pub fn as_f64(&self) -> Option<f64> {
349        if let Value::Scalar(s) = self {
350            s.parse().ok()
351        } else {
352            None
353        }
354    }
355
356    /// Returns `true` if this value is the scalar string `"null"`.
357    ///
358    /// Used by the deserializer to map structprop's `null` token to
359    /// [`Option::None`].
360    #[must_use]
361    pub fn is_null(&self) -> bool {
362        matches!(self, Value::Scalar(s) if s == "null")
363    }
364
365    /// Return the inner string of a [`Value::Scalar`], or `None` for other
366    /// variants.
367    ///
368    /// This complements [`Value::as_bool`], [`Value::as_i64`], and
369    /// [`Value::as_f64`] for cases where the raw string value is needed.
370    ///
371    /// # Examples
372    ///
373    /// ```
374    /// use serde_structprop::parse::{parse, Value};
375    ///
376    /// let v = parse("greeting = hello\n").unwrap();
377    /// if let Value::Object(map) = v {
378    ///     assert_eq!(map["greeting"].as_str(), Some("hello"));
379    /// }
380    /// ```
381    #[must_use]
382    pub fn as_str(&self) -> Option<&str> {
383        if let Value::Scalar(s) = self {
384            Some(s)
385        } else {
386            None
387        }
388    }
389
390    /// Returns a short human-readable name for the variant, used in error
391    /// messages.
392    #[must_use]
393    pub fn type_name(&self) -> &'static str {
394        match self {
395            Value::Scalar(_) => "scalar",
396            Value::Array(_) => "array",
397            Value::Object(_) => "object",
398        }
399    }
400}
401
402// ---------------------------------------------------------------------------
403// Tests
404// ---------------------------------------------------------------------------
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409
410    #[test]
411    fn simple_kv() {
412        let v = parse("key = value\n").unwrap();
413        if let Value::Object(map) = v {
414            assert_eq!(map["key"], Value::Scalar("value".into()));
415        } else {
416            panic!("expected object");
417        }
418    }
419
420    #[test]
421    fn nested_object() {
422        let input = "db {\n  host = localhost\n  port = 5432\n}\n";
423        let v = parse(input).unwrap();
424        if let Value::Object(map) = v {
425            if let Value::Object(db) = &map["db"] {
426                assert_eq!(db["host"], Value::Scalar("localhost".into()));
427                assert_eq!(db["port"], Value::Scalar("5432".into()));
428            } else {
429                panic!("expected nested object");
430            }
431        } else {
432            panic!("expected object");
433        }
434    }
435
436    #[test]
437    fn array_of_scalars() {
438        let input = "tables = { Table1 Table2 }\n";
439        let v = parse(input).unwrap();
440        if let Value::Object(map) = v {
441            assert_eq!(
442                map["tables"],
443                Value::Array(vec![
444                    Value::Scalar("Table1".into()),
445                    Value::Scalar("Table2".into()),
446                ])
447            );
448        } else {
449            panic!("expected object");
450        }
451    }
452
453    #[test]
454    fn number_scalar() {
455        let v = parse("port = 8080\n").unwrap();
456        if let Value::Object(map) = v {
457            assert_eq!(map["port"].as_i64(), Some(8080));
458        }
459    }
460
461    #[test]
462    fn bool_scalar() {
463        let v = parse("enabled = true\n").unwrap();
464        if let Value::Object(map) = v {
465            assert_eq!(map["enabled"].as_bool(), Some(true));
466        }
467    }
468
469    #[test]
470    fn error_includes_line_number() {
471        let input = "good = ok\nbad = {\n";
472        let err = parse(input).unwrap_err().to_string();
473        assert!(
474            err.contains("line "),
475            "expected a line number in error: {err}"
476        );
477    }
478
479    #[test]
480    fn kv_inside_array_suggests_fix() {
481        // `subkey = nested` inside an array body is the most common mistake;
482        // the error should name the key and tell the user how to fix it.
483        let input = "list = {\n  subkey = nested\n}\n";
484        let err = parse(input).unwrap_err().to_string();
485        assert!(
486            err.contains("'subkey = ...' is not valid inside an array"),
487            "expected actionable hint in error: {err}"
488        );
489        assert!(
490            err.contains("{ subkey = ... }"),
491            "expected brace-wrap hint in error: {err}"
492        );
493    }
494
495    #[test]
496    fn token_display_uses_human_readable_names() {
497        // A leading `=` with no preceding key should say `'='`, not `Eq`.
498        let input = "= value\n";
499        let err = parse(input).unwrap_err().to_string();
500        assert!(
501            err.contains("'='") || err.contains("end of input"),
502            "error should use human-readable token names: {err}"
503        );
504    }
505}