serde_structprop/parse.rs
1//! Parser for the structprop format.
2//!
3//! This module contains the [`Value`] type that represents a parsed structprop
4//! document and the [`parse()`] function that converts a raw `&str` into a
5//! [`Value::Object`] tree.
6//!
7//! # Grammar (informal)
8//!
9//! ```text
10//! document = assignment*
11//! assignment = TERM '=' value
12//! | TERM '{' assignment* '}'
13//! value = TERM
14//! | '{' (TERM | '{' assignment* '}')* '}'
15//! ```
16
17use crate::error::{Error, Result};
18use crate::lexer::{tokenize, Token};
19use indexmap::IndexMap;
20
21// ---------------------------------------------------------------------------
22// Public types
23// ---------------------------------------------------------------------------
24
25/// A node in the structprop value tree produced by [`parse()`].
26///
27/// The tree maps directly onto structprop's three syntactic forms:
28///
29/// | Structprop syntax | Variant |
30/// |---|---|
31/// | `key = value` | [`Value::Scalar`] |
32/// | `key = { a b c }` | [`Value::Array`] of [`Value::Scalar`]s |
33/// | `key = { { k = v } { k = v } }` | [`Value::Array`] of [`Value::Object`]s |
34/// | `key { … }` | [`Value::Object`] |
35///
36/// Scalar strings are stored verbatim (no coercion at parse time); numeric
37/// or boolean coercion is performed lazily via the [`Value::as_bool`],
38/// [`Value::as_i64`], and [`Value::as_f64`] helpers. Duplicate keys within
39/// any object block are detected and rejected during parsing.
40#[derive(Debug, Clone, PartialEq)]
41pub enum Value {
42 /// A bare or quoted string token, stored as-is (no coercion applied).
43 ///
44 /// Use [`Value::as_bool`], [`Value::as_i64`], or [`Value::as_f64`] to
45 /// attempt type coercion, or [`Value::is_null`] to test for `null`.
46 Scalar(String),
47
48 /// An ordered list of values, corresponding to `key = { … }` syntax.
49 ///
50 /// Array items may be [`Value::Scalar`]s (bare terms) or
51 /// [`Value::Object`]s (written as `{ key = val … }` inline sub-objects).
52 /// Duplicate keys within a sub-object are rejected at parse time.
53 Array(Vec<Value>),
54
55 /// An ordered map from string keys to values, corresponding to either a
56 /// `key { … }` block or the implicit top-level document object.
57 ///
58 /// Key insertion order is preserved via [`IndexMap`].
59 Object(IndexMap<String, Value>),
60}
61
62// ---------------------------------------------------------------------------
63// Public entry point
64// ---------------------------------------------------------------------------
65
66/// Parse a structprop document from `input` and return the top-level
67/// [`Value::Object`].
68///
69/// # Errors
70///
71/// Returns [`Error::Parse`] if the input contains unexpected tokens or
72/// violates the structprop grammar. The error message includes the 1-indexed
73/// line number where the problem was detected.
74///
75/// # Examples
76///
77/// ```
78/// use serde_structprop::parse::{parse, Value};
79///
80/// let v = parse("port = 8080\n").unwrap();
81/// if let Value::Object(map) = v {
82/// assert_eq!(map["port"].as_i64(), Some(8080));
83/// }
84/// ```
85pub fn parse(input: &str) -> Result<Value> {
86 let tokens = tokenize(input)?;
87 let mut pos = 0usize;
88 let map = parse_object(&tokens, &mut pos, /*top_level=*/ true)?;
89 Ok(Value::Object(map))
90}
91
92// ---------------------------------------------------------------------------
93// Internal parser helpers
94// ---------------------------------------------------------------------------
95
96/// Return a reference to the token at `pos` without advancing.
97fn peek(tokens: &[(Token, u32)], pos: usize) -> &Token {
98 tokens.get(pos).map_or(&Token::Eof, |(tok, _)| tok)
99}
100
101/// Format a token as a human-readable string for error messages.
102fn token_display(tok: Option<&Token>) -> String {
103 match tok {
104 Some(Token::Term(s)) => format!("'{s}'"),
105 Some(Token::Eq) => "'='".to_owned(),
106 Some(Token::Open) => "'{{'".to_owned(),
107 Some(Token::Close) => "'}}'".to_owned(),
108 Some(Token::Eof) | None => "end of input".to_owned(),
109 }
110}
111
112/// Return the source line of the token at `pos`.
113fn line_at(tokens: &[(Token, u32)], pos: usize) -> u32 {
114 tokens.get(pos).map_or(0, |&(_, line)| line)
115}
116
117/// Advance the position cursor by one.
118fn advance(pos: &mut usize) {
119 *pos += 1;
120}
121
122/// Consume the next token, asserting it is a [`Token::Term`], and return its
123/// string value.
124///
125/// # Errors
126///
127/// Returns [`Error::Parse`] with a line number if the next token is not a term.
128fn expect_term(tokens: &[(Token, u32)], pos: &mut usize) -> Result<String> {
129 let line = line_at(tokens, *pos);
130 match tokens.get(*pos) {
131 Some((Token::Term(s), _)) => {
132 let s = s.clone();
133 advance(pos);
134 Ok(s)
135 }
136 other => {
137 let tok = other.map(|(t, _)| t);
138 Err(Error::Parse(format!(
139 "line {line}: expected a key or value, got {}",
140 token_display(tok)
141 )))
142 }
143 }
144}
145
146/// Parse a sequence of assignments into an [`IndexMap`].
147///
148/// * If `top_level` is `true`, parsing stops at [`Token::Eof`].
149/// * If `top_level` is `false`, parsing stops at `}` (which is consumed).
150///
151/// # Errors
152///
153/// Returns [`Error::Parse`] on malformed input.
154fn parse_object(
155 tokens: &[(Token, u32)],
156 pos: &mut usize,
157 top_level: bool,
158) -> Result<IndexMap<String, Value>> {
159 let mut map = IndexMap::new();
160
161 loop {
162 let line = line_at(tokens, *pos);
163 match peek(tokens, *pos) {
164 Token::Eof => {
165 if top_level {
166 break;
167 }
168 return Err(Error::Parse(format!(
169 "line {line}: unexpected EOF inside object"
170 )));
171 }
172 Token::Close => {
173 if top_level {
174 return Err(Error::Parse(format!("line {line}: unexpected '}}'")));
175 }
176 advance(pos); // consume '}'
177 break;
178 }
179 Token::Term(_) => {
180 let key = expect_term(tokens, pos)?;
181 let after_line = line_at(tokens, *pos);
182 match peek(tokens, *pos) {
183 Token::Eq => {
184 advance(pos); // consume '='
185 let val = parse_value(tokens, pos)?;
186 if map.contains_key(&key) {
187 return Err(Error::Parse(format!(
188 "line {after_line}: duplicate key '{key}'"
189 )));
190 }
191 map.insert(key, val);
192 }
193 Token::Open => {
194 advance(pos); // consume '{'
195 let sub = parse_object(tokens, pos, /*top_level=*/ false)?;
196 if map.contains_key(&key) {
197 return Err(Error::Parse(format!(
198 "line {after_line}: duplicate key '{key}'"
199 )));
200 }
201 map.insert(key, Value::Object(sub));
202 }
203 other => {
204 return Err(Error::Parse(format!(
205 "line {after_line}: expected '=' or '{{' after key '{key}', got {}",
206 token_display(Some(other))
207 )));
208 }
209 }
210 }
211 other => {
212 return Err(Error::Parse(format!(
213 "line {line}: unexpected {}",
214 token_display(Some(other))
215 )));
216 }
217 }
218 }
219
220 Ok(map)
221}
222
223/// Parse a single value: either a scalar term or a `{ … }` block.
224///
225/// # Errors
226///
227/// Returns [`Error::Parse`] on unexpected tokens.
228fn parse_value(tokens: &[(Token, u32)], pos: &mut usize) -> Result<Value> {
229 let line = line_at(tokens, *pos);
230 match peek(tokens, *pos) {
231 Token::Open => {
232 advance(pos); // consume '{'
233 parse_array_or_object_list(tokens, pos)
234 }
235 Token::Term(_) => {
236 let s = expect_term(tokens, pos)?;
237 Ok(Value::Scalar(s))
238 }
239 other => Err(Error::Parse(format!(
240 "line {line}: expected a value, got {}",
241 token_display(Some(other))
242 ))),
243 }
244}
245
246/// Parse the body of a `{ … }` block that follows `=`.
247///
248/// The block may contain:
249/// - A list of scalar terms → [`Value::Array`] of [`Value::Scalar`]s.
250/// - A list of `{ … }` sub-objects → [`Value::Array`] of [`Value::Object`]s.
251/// - A mix of both.
252///
253/// # Errors
254///
255/// Returns [`Error::Parse`] on unexpected tokens or premature EOF.
256fn parse_array_or_object_list(tokens: &[(Token, u32)], pos: &mut usize) -> Result<Value> {
257 let mut items: Vec<Value> = Vec::new();
258
259 loop {
260 let line = line_at(tokens, *pos);
261 match peek(tokens, *pos) {
262 Token::Close => {
263 advance(pos); // consume '}'
264 break;
265 }
266 Token::Eof => {
267 return Err(Error::Parse(format!(
268 "line {line}: unexpected EOF inside array"
269 )));
270 }
271 Token::Open => {
272 // A nested object literal inside an array: { key = val … }
273 advance(pos); // consume '{'
274 let sub = parse_object(tokens, pos, /*top_level=*/ false)?;
275 items.push(Value::Object(sub));
276 }
277 Token::Term(_) => {
278 // Peek ahead: `term =` inside an array means the caller wrote
279 // a key-value assignment directly in a list body, which is not
280 // valid. Catch it here so we can name the key and suggest the
281 // correct syntax before consuming the term.
282 if matches!(tokens.get(*pos + 1), Some((Token::Eq, _))) {
283 let key = match tokens.get(*pos) {
284 Some((Token::Term(s), _)) => s.clone(),
285 _ => "?".to_owned(),
286 };
287 return Err(Error::Parse(format!(
288 "line {line}: '{key} = ...' is not valid inside an array; \
289 wrap it in braces for a nested object: '{{ {key} = ... }}'"
290 )));
291 }
292 let s = expect_term(tokens, pos)?;
293 items.push(Value::Scalar(s));
294 }
295 Token::Eq => {
296 return Err(Error::Parse(format!(
297 "line {line}: unexpected '=' inside array"
298 )));
299 }
300 }
301 }
302
303 Ok(Value::Array(items))
304}
305
306// ---------------------------------------------------------------------------
307// Scalar coercion helpers
308// ---------------------------------------------------------------------------
309
310impl Value {
311 /// Try to interpret this [`Value::Scalar`] as a `bool`.
312 ///
313 /// Returns `Some(true)` for the literal string `"true"`, `Some(false)` for
314 /// `"false"`, and `None` for any other value or non-scalar variant.
315 ///
316 /// This mirrors the Python implementation's `json.loads` coercion.
317 #[must_use]
318 pub fn as_bool(&self) -> Option<bool> {
319 if let Value::Scalar(s) = self {
320 match s.as_str() {
321 "true" => Some(true),
322 "false" => Some(false),
323 _ => None,
324 }
325 } else {
326 None
327 }
328 }
329
330 /// Try to interpret this [`Value::Scalar`] as an `i64`.
331 ///
332 /// Returns `Some(n)` if the string parses as a signed 64-bit integer, or
333 /// `None` otherwise.
334 #[must_use]
335 pub fn as_i64(&self) -> Option<i64> {
336 if let Value::Scalar(s) = self {
337 s.parse().ok()
338 } else {
339 None
340 }
341 }
342
343 /// Try to interpret this [`Value::Scalar`] as an `f64`.
344 ///
345 /// Returns `Some(n)` if the string parses as a 64-bit float, or `None`
346 /// otherwise.
347 #[must_use]
348 pub fn as_f64(&self) -> Option<f64> {
349 if let Value::Scalar(s) = self {
350 s.parse().ok()
351 } else {
352 None
353 }
354 }
355
356 /// Returns `true` if this value is the scalar string `"null"`.
357 ///
358 /// Used by the deserializer to map structprop's `null` token to
359 /// [`Option::None`].
360 #[must_use]
361 pub fn is_null(&self) -> bool {
362 matches!(self, Value::Scalar(s) if s == "null")
363 }
364
365 /// Return the inner string of a [`Value::Scalar`], or `None` for other
366 /// variants.
367 ///
368 /// This complements [`Value::as_bool`], [`Value::as_i64`], and
369 /// [`Value::as_f64`] for cases where the raw string value is needed.
370 ///
371 /// # Examples
372 ///
373 /// ```
374 /// use serde_structprop::parse::{parse, Value};
375 ///
376 /// let v = parse("greeting = hello\n").unwrap();
377 /// if let Value::Object(map) = v {
378 /// assert_eq!(map["greeting"].as_str(), Some("hello"));
379 /// }
380 /// ```
381 #[must_use]
382 pub fn as_str(&self) -> Option<&str> {
383 if let Value::Scalar(s) = self {
384 Some(s)
385 } else {
386 None
387 }
388 }
389
390 /// Returns a short human-readable name for the variant, used in error
391 /// messages.
392 #[must_use]
393 pub fn type_name(&self) -> &'static str {
394 match self {
395 Value::Scalar(_) => "scalar",
396 Value::Array(_) => "array",
397 Value::Object(_) => "object",
398 }
399 }
400}
401
402// ---------------------------------------------------------------------------
403// Tests
404// ---------------------------------------------------------------------------
405
406#[cfg(test)]
407mod tests {
408 use super::*;
409
410 #[test]
411 fn simple_kv() {
412 let v = parse("key = value\n").unwrap();
413 if let Value::Object(map) = v {
414 assert_eq!(map["key"], Value::Scalar("value".into()));
415 } else {
416 panic!("expected object");
417 }
418 }
419
420 #[test]
421 fn nested_object() {
422 let input = "db {\n host = localhost\n port = 5432\n}\n";
423 let v = parse(input).unwrap();
424 if let Value::Object(map) = v {
425 if let Value::Object(db) = &map["db"] {
426 assert_eq!(db["host"], Value::Scalar("localhost".into()));
427 assert_eq!(db["port"], Value::Scalar("5432".into()));
428 } else {
429 panic!("expected nested object");
430 }
431 } else {
432 panic!("expected object");
433 }
434 }
435
436 #[test]
437 fn array_of_scalars() {
438 let input = "tables = { Table1 Table2 }\n";
439 let v = parse(input).unwrap();
440 if let Value::Object(map) = v {
441 assert_eq!(
442 map["tables"],
443 Value::Array(vec![
444 Value::Scalar("Table1".into()),
445 Value::Scalar("Table2".into()),
446 ])
447 );
448 } else {
449 panic!("expected object");
450 }
451 }
452
453 #[test]
454 fn number_scalar() {
455 let v = parse("port = 8080\n").unwrap();
456 if let Value::Object(map) = v {
457 assert_eq!(map["port"].as_i64(), Some(8080));
458 }
459 }
460
461 #[test]
462 fn bool_scalar() {
463 let v = parse("enabled = true\n").unwrap();
464 if let Value::Object(map) = v {
465 assert_eq!(map["enabled"].as_bool(), Some(true));
466 }
467 }
468
469 #[test]
470 fn error_includes_line_number() {
471 let input = "good = ok\nbad = {\n";
472 let err = parse(input).unwrap_err().to_string();
473 assert!(
474 err.contains("line "),
475 "expected a line number in error: {err}"
476 );
477 }
478
479 #[test]
480 fn kv_inside_array_suggests_fix() {
481 // `subkey = nested` inside an array body is the most common mistake;
482 // the error should name the key and tell the user how to fix it.
483 let input = "list = {\n subkey = nested\n}\n";
484 let err = parse(input).unwrap_err().to_string();
485 assert!(
486 err.contains("'subkey = ...' is not valid inside an array"),
487 "expected actionable hint in error: {err}"
488 );
489 assert!(
490 err.contains("{ subkey = ... }"),
491 "expected brace-wrap hint in error: {err}"
492 );
493 }
494
495 #[test]
496 fn token_display_uses_human_readable_names() {
497 // A leading `=` with no preceding key should say `'='`, not `Eq`.
498 let input = "= value\n";
499 let err = parse(input).unwrap_err().to_string();
500 assert!(
501 err.contains("'='") || err.contains("end of input"),
502 "error should use human-readable token names: {err}"
503 );
504 }
505}