serde_structprop/parse.rs
1//! Parser for the structprop format.
2//!
3//! This module contains the [`Value`] type that represents a parsed structprop
4//! document and the [`parse()`] function that converts a raw `&str` into a
5//! [`Value::Object`] tree.
6//!
7//! # Grammar (informal)
8//!
9//! ```text
10//! document = assignment*
11//! assignment = TERM '=' value
12//! | TERM '{' assignment* '}'
13//! value = TERM
14//! | '{' (TERM | '{' assignment* '}')* '}'
15//! ```
16
17use crate::error::{Error, Result};
18use crate::lexer::{tokenize, Token};
19use indexmap::IndexMap;
20
21// ---------------------------------------------------------------------------
22// Public types
23// ---------------------------------------------------------------------------
24
25/// A node in the structprop value tree produced by [`parse()`].
26///
27/// The tree maps directly onto structprop's three syntactic forms:
28///
29/// | Structprop syntax | Variant |
30/// |---|---|
31/// | `key = value` | [`Value::Scalar`] |
32/// | `key = { a b c }` | [`Value::Array`] of [`Value::Scalar`]s |
33/// | `key = { { k = v } { k = v } }` | [`Value::Array`] of [`Value::Object`]s |
34/// | `key { … }` | [`Value::Object`] |
35///
36/// Scalar strings are stored verbatim (no coercion at parse time); numeric
37/// or boolean coercion is performed lazily via the [`Value::as_bool`],
38/// [`Value::as_i64`], and [`Value::as_f64`] helpers. Duplicate keys within
39/// any object block are detected and rejected during parsing.
40#[derive(Debug, Clone, PartialEq)]
41pub enum Value {
42 /// A bare or quoted string token, stored as-is (no coercion applied).
43 ///
44 /// Use [`Value::as_bool`], [`Value::as_i64`], or [`Value::as_f64`] to
45 /// attempt type coercion, or [`Value::is_null`] to test for `null`.
46 Scalar(String),
47
48 /// An ordered list of values, corresponding to `key = { … }` syntax.
49 ///
50 /// Array items may be [`Value::Scalar`]s (bare terms) or
51 /// [`Value::Object`]s (written as `{ key = val … }` inline sub-objects).
52 /// Duplicate keys within a sub-object are rejected at parse time.
53 Array(Vec<Value>),
54
55 /// An ordered map from string keys to values, corresponding to either a
56 /// `key { … }` block or the implicit top-level document object.
57 ///
58 /// Key insertion order is preserved via [`IndexMap`].
59 Object(IndexMap<String, Value>),
60}
61
62// ---------------------------------------------------------------------------
63// Public entry point
64// ---------------------------------------------------------------------------
65
66/// Parse a structprop document from `input` and return the top-level
67/// [`Value::Object`].
68///
69/// # Errors
70///
71/// Returns [`Error::Parse`] if the input contains unexpected tokens or
72/// violates the structprop grammar. The error message includes the 1-indexed
73/// line number where the problem was detected.
74///
75/// # Examples
76///
77/// ```
78/// use serde_structprop::parse::{parse, Value};
79///
80/// let v = parse("port = 8080\n").unwrap();
81/// if let Value::Object(map) = v {
82/// assert_eq!(map["port"].as_i64(), Some(8080));
83/// }
84/// ```
85pub fn parse(input: &str) -> Result<Value> {
86 let tokens = tokenize(input)?;
87 let mut pos = 0usize;
88 let map = parse_object(
89 &tokens, &mut pos, /*top_level=*/ true, /*open_line=*/ 0,
90 )?;
91 Ok(Value::Object(map))
92}
93
94// ---------------------------------------------------------------------------
95// Internal parser helpers
96// ---------------------------------------------------------------------------
97
98/// Return a reference to the token at `pos` without advancing.
99fn peek(tokens: &[(Token, u32)], pos: usize) -> &Token {
100 tokens.get(pos).map_or(&Token::Eof, |(tok, _)| tok)
101}
102
103/// Format a token as a human-readable string for error messages.
104fn token_display(tok: Option<&Token>) -> String {
105 match tok {
106 Some(Token::Term(s)) => format!("'{s}'"),
107 Some(Token::Eq) => "'='".to_owned(),
108 Some(Token::Open) => "'{{'".to_owned(),
109 Some(Token::Close) => "'}}'".to_owned(),
110 Some(Token::Eof) | None => "end of input".to_owned(),
111 }
112}
113
114/// Return the source line of the token at `pos`.
115fn line_at(tokens: &[(Token, u32)], pos: usize) -> u32 {
116 tokens.get(pos).map_or(0, |&(_, line)| line)
117}
118
119/// Advance the position cursor by one.
120fn advance(pos: &mut usize) {
121 *pos += 1;
122}
123
124/// Consume the next token, asserting it is a [`Token::Term`], and return its
125/// string value.
126///
127/// # Errors
128///
129/// Returns [`Error::Parse`] with a line number if the next token is not a term.
130fn expect_term(tokens: &[(Token, u32)], pos: &mut usize) -> Result<String> {
131 let line = line_at(tokens, *pos);
132 match tokens.get(*pos) {
133 Some((Token::Term(s), _)) => {
134 let s = s.clone();
135 advance(pos);
136 Ok(s)
137 }
138 other => {
139 let tok = other.map(|(t, _)| t);
140 Err(Error::Parse(format!(
141 "line {line}: expected a key or value, got {}",
142 token_display(tok)
143 )))
144 }
145 }
146}
147
148/// Parse a sequence of assignments into an [`IndexMap`].
149///
150/// * If `top_level` is `true`, parsing stops at [`Token::Eof`].
151/// * If `top_level` is `false`, parsing stops at `}` (which is consumed).
152/// `open_line` must be the source line of the opening `{` so that the EOF
153/// error can point back to where the block started.
154///
155/// # Errors
156///
157/// Returns [`Error::Parse`] on malformed input.
158fn parse_object(
159 tokens: &[(Token, u32)],
160 pos: &mut usize,
161 top_level: bool,
162 open_line: u32,
163) -> Result<IndexMap<String, Value>> {
164 let mut map = IndexMap::new();
165
166 loop {
167 let line = line_at(tokens, *pos);
168 match peek(tokens, *pos) {
169 Token::Eof => {
170 if top_level {
171 break;
172 }
173 return Err(Error::Parse(format!(
174 "line {line}: unexpected EOF inside object opened on line {open_line}"
175 )));
176 }
177 Token::Close => {
178 if top_level {
179 return Err(Error::Parse(format!("line {line}: unexpected '}}'")));
180 }
181 advance(pos); // consume '}'
182 break;
183 }
184 Token::Term(_) => {
185 let key = expect_term(tokens, pos)?;
186 let after_line = line_at(tokens, *pos);
187 match peek(tokens, *pos) {
188 Token::Eq => {
189 advance(pos); // consume '='
190 let val = parse_value(tokens, pos)?;
191 if map.contains_key(&key) {
192 return Err(Error::Parse(format!(
193 "line {after_line}: duplicate key '{key}'"
194 )));
195 }
196 map.insert(key, val);
197 }
198 Token::Open => {
199 let open_line = line_at(tokens, *pos);
200 advance(pos); // consume '{'
201 let sub = parse_object(tokens, pos, /*top_level=*/ false, open_line)?;
202 if map.contains_key(&key) {
203 return Err(Error::Parse(format!(
204 "line {after_line}: duplicate key '{key}'"
205 )));
206 }
207 map.insert(key, Value::Object(sub));
208 }
209 other => {
210 return Err(Error::Parse(format!(
211 "line {after_line}: expected '=' or '{{' after key '{key}', got {}",
212 token_display(Some(other))
213 )));
214 }
215 }
216 }
217 other => {
218 return Err(Error::Parse(format!(
219 "line {line}: unexpected {}",
220 token_display(Some(other))
221 )));
222 }
223 }
224 }
225
226 Ok(map)
227}
228
229/// Parse a single value: either a scalar term or a `{ … }` block.
230///
231/// # Errors
232///
233/// Returns [`Error::Parse`] on unexpected tokens.
234fn parse_value(tokens: &[(Token, u32)], pos: &mut usize) -> Result<Value> {
235 let line = line_at(tokens, *pos);
236 match peek(tokens, *pos) {
237 Token::Open => {
238 let open_line = line_at(tokens, *pos);
239 advance(pos); // consume '{'
240 parse_array_or_object_list(tokens, pos, open_line)
241 }
242 Token::Term(_) => {
243 let s = expect_term(tokens, pos)?;
244 Ok(Value::Scalar(s))
245 }
246 other => Err(Error::Parse(format!(
247 "line {line}: expected a value, got {}",
248 token_display(Some(other))
249 ))),
250 }
251}
252
253/// Parse the body of a `{ … }` block that follows `=`.
254///
255/// The block may contain:
256/// - A list of scalar terms → [`Value::Array`] of [`Value::Scalar`]s.
257/// - A list of `{ … }` sub-objects → [`Value::Array`] of [`Value::Object`]s.
258/// - A mix of both.
259///
260/// `open_line` is the source line of the opening `{` and is used in EOF
261/// error messages to point back to where the block started.
262///
263/// # Errors
264///
265/// Returns [`Error::Parse`] on unexpected tokens or premature EOF.
266fn parse_array_or_object_list(
267 tokens: &[(Token, u32)],
268 pos: &mut usize,
269 open_line: u32,
270) -> Result<Value> {
271 let mut items: Vec<Value> = Vec::new();
272
273 loop {
274 let line = line_at(tokens, *pos);
275 match peek(tokens, *pos) {
276 Token::Close => {
277 advance(pos); // consume '}'
278 break;
279 }
280 Token::Eof => {
281 return Err(Error::Parse(format!(
282 "line {line}: unexpected EOF inside array opened on line {open_line}"
283 )));
284 }
285 Token::Open => {
286 // A nested object literal inside an array: { key = val … }
287 let inner_open_line = line_at(tokens, *pos);
288 advance(pos); // consume '{'
289 let sub = parse_object(tokens, pos, /*top_level=*/ false, inner_open_line)?;
290 items.push(Value::Object(sub));
291 }
292 Token::Term(_) => {
293 // Peek ahead: `term =` inside an array means the caller wrote
294 // a key-value assignment directly in a list body, which is not
295 // valid. Catch it here so we can name the key and suggest the
296 // correct syntax before consuming the term.
297 if matches!(tokens.get(*pos + 1), Some((Token::Eq, _))) {
298 let key = match tokens.get(*pos) {
299 Some((Token::Term(s), _)) => s.clone(),
300 _ => "?".to_owned(),
301 };
302 return Err(Error::Parse(format!(
303 "line {line}: '{key} = ...' is not valid inside an array; \
304 wrap it in braces for a nested object: '{{ {key} = ... }}'"
305 )));
306 }
307 let s = expect_term(tokens, pos)?;
308 items.push(Value::Scalar(s));
309 }
310 Token::Eq => {
311 return Err(Error::Parse(format!(
312 "line {line}: unexpected '=' inside array"
313 )));
314 }
315 }
316 }
317
318 Ok(Value::Array(items))
319}
320
321// ---------------------------------------------------------------------------
322// Scalar coercion helpers
323// ---------------------------------------------------------------------------
324
325impl Value {
326 /// Try to interpret this [`Value::Scalar`] as a `bool`.
327 ///
328 /// Returns `Some(true)` for the literal string `"true"`, `Some(false)` for
329 /// `"false"`, and `None` for any other value or non-scalar variant.
330 ///
331 /// This mirrors the Python implementation's `json.loads` coercion.
332 #[must_use]
333 pub fn as_bool(&self) -> Option<bool> {
334 if let Value::Scalar(s) = self {
335 match s.as_str() {
336 "true" => Some(true),
337 "false" => Some(false),
338 _ => None,
339 }
340 } else {
341 None
342 }
343 }
344
345 /// Try to interpret this [`Value::Scalar`] as an `i64`.
346 ///
347 /// Returns `Some(n)` if the string parses as a signed 64-bit integer, or
348 /// `None` otherwise.
349 #[must_use]
350 pub fn as_i64(&self) -> Option<i64> {
351 if let Value::Scalar(s) = self {
352 s.parse().ok()
353 } else {
354 None
355 }
356 }
357
358 /// Try to interpret this [`Value::Scalar`] as an `f64`.
359 ///
360 /// Returns `Some(n)` if the string parses as a 64-bit float, or `None`
361 /// otherwise.
362 #[must_use]
363 pub fn as_f64(&self) -> Option<f64> {
364 if let Value::Scalar(s) = self {
365 s.parse().ok()
366 } else {
367 None
368 }
369 }
370
371 /// Returns `true` if this value is the scalar string `"null"`.
372 ///
373 /// Used by the deserializer to map structprop's `null` token to
374 /// [`Option::None`].
375 #[must_use]
376 pub fn is_null(&self) -> bool {
377 matches!(self, Value::Scalar(s) if s == "null")
378 }
379
380 /// Return the inner string of a [`Value::Scalar`], or `None` for other
381 /// variants.
382 ///
383 /// This complements [`Value::as_bool`], [`Value::as_i64`], and
384 /// [`Value::as_f64`] for cases where the raw string value is needed.
385 ///
386 /// # Examples
387 ///
388 /// ```
389 /// use serde_structprop::parse::{parse, Value};
390 ///
391 /// let v = parse("greeting = hello\n").unwrap();
392 /// if let Value::Object(map) = v {
393 /// assert_eq!(map["greeting"].as_str(), Some("hello"));
394 /// }
395 /// ```
396 #[must_use]
397 pub fn as_str(&self) -> Option<&str> {
398 if let Value::Scalar(s) = self {
399 Some(s)
400 } else {
401 None
402 }
403 }
404
405 /// Returns a short human-readable name for the variant, used in error
406 /// messages.
407 #[must_use]
408 pub fn type_name(&self) -> &'static str {
409 match self {
410 Value::Scalar(_) => "scalar",
411 Value::Array(_) => "array",
412 Value::Object(_) => "object",
413 }
414 }
415}
416
417// ---------------------------------------------------------------------------
418// Tests
419// ---------------------------------------------------------------------------
420
421#[cfg(test)]
422mod tests {
423 use super::*;
424
425 #[test]
426 fn simple_kv() {
427 let v = parse("key = value\n").unwrap();
428 if let Value::Object(map) = v {
429 assert_eq!(map["key"], Value::Scalar("value".into()));
430 } else {
431 panic!("expected object");
432 }
433 }
434
435 #[test]
436 fn nested_object() {
437 let input = "db {\n host = localhost\n port = 5432\n}\n";
438 let v = parse(input).unwrap();
439 if let Value::Object(map) = v {
440 if let Value::Object(db) = &map["db"] {
441 assert_eq!(db["host"], Value::Scalar("localhost".into()));
442 assert_eq!(db["port"], Value::Scalar("5432".into()));
443 } else {
444 panic!("expected nested object");
445 }
446 } else {
447 panic!("expected object");
448 }
449 }
450
451 #[test]
452 fn array_of_scalars() {
453 let input = "tables = { Table1 Table2 }\n";
454 let v = parse(input).unwrap();
455 if let Value::Object(map) = v {
456 assert_eq!(
457 map["tables"],
458 Value::Array(vec![
459 Value::Scalar("Table1".into()),
460 Value::Scalar("Table2".into()),
461 ])
462 );
463 } else {
464 panic!("expected object");
465 }
466 }
467
468 #[test]
469 fn number_scalar() {
470 let v = parse("port = 8080\n").unwrap();
471 if let Value::Object(map) = v {
472 assert_eq!(map["port"].as_i64(), Some(8080));
473 }
474 }
475
476 #[test]
477 fn bool_scalar() {
478 let v = parse("enabled = true\n").unwrap();
479 if let Value::Object(map) = v {
480 assert_eq!(map["enabled"].as_bool(), Some(true));
481 }
482 }
483
484 #[test]
485 fn error_includes_line_number() {
486 let input = "good = ok\nbad = {\n";
487 let err = parse(input).unwrap_err().to_string();
488 assert!(
489 err.contains("line "),
490 "expected a line number in error: {err}"
491 );
492 }
493
494 #[test]
495 fn unterminated_object_reports_opening_brace_line() {
496 // The opening `{` is on line 2; EOF is reached on line 3.
497 // The error should name the line where the block was opened.
498 let input = "good = ok\nbad {\n key = value\n";
499 let err = parse(input).unwrap_err().to_string();
500 assert!(
501 err.contains("opened on line 2"),
502 "expected opening brace line in error: {err}"
503 );
504 }
505
506 #[test]
507 fn unterminated_array_reports_opening_brace_line() {
508 // The opening `{` is on line 1; EOF is reached on line 2.
509 let input = "list = {\n item1\n";
510 let err = parse(input).unwrap_err().to_string();
511 assert!(
512 err.contains("opened on line 1"),
513 "expected opening brace line in error: {err}"
514 );
515 }
516
517 #[test]
518 fn deeply_nested_unterminated_object_reports_correct_opening_line() {
519 // The outer block's `{` is on line 1. The inner block's `{` is on
520 // line 2. Neither block is closed, so the parser hits EOF while
521 // inside the inner object. The error should reference line 2 (the
522 // opening of the innermost unclosed block).
523 let input = "outer {\n inner {\n key = value\n";
524 let err = parse(input).unwrap_err().to_string();
525 assert!(
526 err.contains("opened on line 2"),
527 "expected inner opening brace line in error: {err}"
528 );
529 }
530
531 #[test]
532 fn kv_inside_array_suggests_fix() {
533 // `subkey = nested` inside an array body is the most common mistake;
534 // the error should name the key and tell the user how to fix it.
535 let input = "list = {\n subkey = nested\n}\n";
536 let err = parse(input).unwrap_err().to_string();
537 assert!(
538 err.contains("'subkey = ...' is not valid inside an array"),
539 "expected actionable hint in error: {err}"
540 );
541 assert!(
542 err.contains("{ subkey = ... }"),
543 "expected brace-wrap hint in error: {err}"
544 );
545 }
546
547 #[test]
548 fn token_display_uses_human_readable_names() {
549 // A leading `=` with no preceding key should say `'='`, not `Eq`.
550 let input = "= value\n";
551 let err = parse(input).unwrap_err().to_string();
552 assert!(
553 err.contains("'='") || err.contains("end of input"),
554 "error should use human-readable token names: {err}"
555 );
556 }
557}