Skip to main content

nested_text/
parser.rs

1use crate::error::{Error, ErrorKind};
2use crate::inline::InlineParser;
3use crate::lexer::{Lexer, LineKind};
4use crate::value::Value;
5
6/// Constraint on the top-level type of a NestedText document.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum Top {
9    Dict,
10    List,
11    String,
12    Any,
13}
14
15/// Parse a NestedText string into a Value.
16///
17/// Returns `None` for empty documents (only comments/blank lines).
18pub fn loads(input: &str, top: Top) -> Result<Option<Value>, Error> {
19    let mut lexer = Lexer::new(input)?;
20    let mut parser = Parser {
21        lexer: &mut lexer,
22        indent_stack: vec![],
23        all_indent_levels: vec![0],
24    };
25    let value = parser.read_value(0)?;
26
27    // Check for extra content after top-level value
28    if value.is_some() {
29        if let Some(line) = parser.lexer.peek() {
30            // Check if this is a partial dedent (depth > 0, established indentation exists,
31            // but this depth was never used as an indentation level)
32            if line.depth > 0
33                && parser.all_indent_levels.len() > 1
34                && !parser.all_indent_levels.contains(&line.depth)
35            {
36                return Err(Error::new(
37                    ErrorKind::InvalidIndentLevel,
38                    "invalid indentation, partial dedent.",
39                )
40                .with_lineno(line.lineno)
41                .with_colno(0)
42                .with_line(line.text.clone()));
43            }
44            return Err(Error::new(ErrorKind::UnexpectedLineType, "extra content.")
45                .with_lineno(line.lineno)
46                .with_colno(0)
47                .with_line(line.text.clone()));
48        }
49    }
50
51    // Handle empty documents based on Top constraint
52    let value = match (value, top) {
53        (None, Top::Any) => None,
54        (None, Top::Dict) => Some(Value::Dict(vec![])),
55        (None, Top::List) => Some(Value::List(vec![])),
56        (None, Top::String) => Some(Value::String(String::new())),
57        (Some(v), Top::Any) => Some(v),
58        (Some(v @ Value::Dict(_)), Top::Dict) => Some(v),
59        (Some(v @ Value::List(_)), Top::List) => Some(v),
60        (Some(v @ Value::String(_)), Top::String) => Some(v),
61        (Some(_), Top::Dict) => {
62            return Err(Error::new(
63                ErrorKind::UnexpectedLineType,
64                "expected dictionary top-level",
65            ));
66        }
67        (Some(_), Top::List) => {
68            return Err(Error::new(
69                ErrorKind::UnexpectedLineType,
70                "expected list top-level",
71            ));
72        }
73        (Some(_), Top::String) => {
74            return Err(Error::new(
75                ErrorKind::UnexpectedLineType,
76                "expected string top-level",
77            ));
78        }
79    };
80
81    Ok(value)
82}
83
84/// Parse a NestedText document from a reader.
85pub fn load<R: std::io::Read>(reader: R, top: Top) -> Result<Option<Value>, Error> {
86    let mut buf = String::new();
87    let mut reader = reader;
88    reader.read_to_string(&mut buf)?;
89    loads(&buf, top)
90}
91
92struct Parser<'a> {
93    lexer: &'a mut Lexer,
94    /// Stack of indentation levels we've entered, for detecting partial dedents.
95    indent_stack: Vec<usize>,
96    /// All indentation levels ever established during parsing.
97    all_indent_levels: Vec<usize>,
98}
99
100impl<'a> Parser<'a> {
101    /// Read a value at the given indentation depth.
102    fn read_value(&mut self, depth: usize) -> Result<Option<Value>, Error> {
103        let line = match self.lexer.peek() {
104            Some(l) => l,
105            None => return Ok(None),
106        };
107
108        if line.depth < depth {
109            return Ok(None);
110        }
111        if line.depth > depth {
112            if depth == 0 && self.indent_stack.is_empty() {
113                // Top-level content with indentation
114                return Err(Error::new(
115                    ErrorKind::InvalidIndentLevel,
116                    "top-level content must start in column 1.",
117                )
118                .with_lineno(line.lineno)
119                .with_colno(0)
120                .with_line(line.text.clone()));
121            }
122            // Check for partial dedent — returning to an indentation level
123            // that was never established
124            if !self.indent_stack.is_empty() && !self.indent_stack.contains(&line.depth) {
125                return Err(Error::new(
126                    ErrorKind::InvalidIndentLevel,
127                    "invalid indentation, partial dedent.",
128                )
129                .with_lineno(line.lineno)
130                .with_colno(0)
131                .with_line(line.text.clone()));
132            }
133            return Err(Error::new(
134                ErrorKind::InvalidIndentLevel,
135                "invalid indentation.",
136            )
137            .with_lineno(line.lineno)
138            .with_colno(line.depth)
139            .with_line(line.text.clone()));
140        }
141
142        match line.kind {
143            LineKind::DictItem | LineKind::KeyItem => self.read_dict(depth).map(Some),
144            LineKind::ListItem => self.read_list(depth).map(Some),
145            LineKind::StringItem => self.read_string(depth).map(Some),
146            LineKind::InlineList | LineKind::InlineDict => {
147                let line = self.lexer.next_line().unwrap();
148                let input = line.value.as_ref().unwrap();
149                let lineno = line.lineno;
150                let colno_offset = line.depth;
151                let line_text = &line.text;
152                InlineParser::parse(input, lineno, colno_offset, line_text).map(Some)
153            }
154            LineKind::Unrecognized => {
155                let line = self.lexer.peek().unwrap();
156                Err(Error::new(ErrorKind::UnrecognizedLine, "unrecognized line.")
157                    .with_lineno(line.lineno)
158                    .with_colno(line.depth)
159                    .with_line(line.text.clone()))
160            }
161        }
162    }
163
164    /// Read a dictionary at the given depth.
165    fn read_dict(&mut self, depth: usize) -> Result<Value, Error> {
166        let mut pairs: Vec<(String, Value)> = Vec::new();
167        let mut seen_keys: Vec<String> = Vec::new();
168
169        while let Some(line) = self.lexer.peek() {
170            if line.depth != depth {
171                break;
172            }
173
174            match line.kind {
175                LineKind::DictItem => {
176                    let line = self.lexer.next_line().unwrap();
177                    let key = line.key.clone().unwrap();
178                    let raw_value = line.value.clone().unwrap();
179                    let lineno = line.lineno;
180                    let line_text = line.text.clone();
181
182                    // Check for duplicate keys
183                    if seen_keys.contains(&key) {
184                        return Err(Error::new(
185                            ErrorKind::DuplicateKey,
186                            format!("duplicate key: {}.", key),
187                        )
188                        .with_lineno(lineno)
189                        .with_colno(0)
190                        .with_line(line_text));
191                    }
192                    seen_keys.push(key.clone());
193
194                    let value = if !raw_value.is_empty() {
195                        // Value on the same line — no indented content allowed
196                        self.check_no_indented_content(depth, lineno)?;
197                        Value::String(raw_value)
198                    } else {
199                        // Value on indented lines below
200                        self.read_indented_value(depth)?
201                    };
202
203                    pairs.push((key, value));
204                }
205                LineKind::KeyItem => {
206                    // Save first key item line info for error reporting
207                    let first_key_lineno = self.lexer.peek().unwrap().lineno;
208                    let first_key_text = self.lexer.peek().unwrap().text.clone();
209                    let key = self.read_key(depth)?;
210
211                    // Check for duplicate keys
212                    if seen_keys.contains(&key) {
213                        return Err(Error::new(
214                            ErrorKind::DuplicateKey,
215                            format!("duplicate key: {}.", key),
216                        ));
217                    }
218                    seen_keys.push(key.clone());
219
220                    // After a multiline key, an indented value MUST follow
221                    let next = self.lexer.peek();
222                    match next {
223                        Some(l) if l.depth > depth => {
224                            let child_depth = l.depth;
225                            self.indent_stack.push(child_depth);
226                            self.all_indent_levels.push(child_depth);
227                            let value = self
228                                .read_value(child_depth)?
229                                .unwrap_or(Value::String(String::new()));
230                            self.indent_stack.pop();
231                            pairs.push((key, value));
232                        }
233                        Some(_l) => {
234                            return Err(Error::new(
235                                ErrorKind::InvalidIndentLevel,
236                                "multiline key requires a value.",
237                            )
238                            .with_lineno(first_key_lineno)
239                            .with_colno(depth)
240                            .with_line(first_key_text.clone()));
241                        }
242                        None => {
243                            return Err(Error::new(
244                                ErrorKind::InvalidIndentLevel,
245                                "indented value must follow multiline key.",
246                            )
247                            .with_lineno(first_key_lineno)
248                            .with_line(first_key_text));
249                        }
250                    }
251                }
252                _ => {
253                    // Wrong line type at this depth in a dict context
254                    let line = self.lexer.peek().unwrap();
255                    return Err(Error::new(
256                        ErrorKind::UnexpectedLineType,
257                        "expected dictionary item.",
258                    )
259                    .with_lineno(line.lineno)
260                    .with_colno(line.depth)
261                    .with_line(line.text.clone()));
262                }
263            }
264        }
265
266        Ok(Value::Dict(pairs))
267    }
268
269    /// Read a list at the given depth.
270    fn read_list(&mut self, depth: usize) -> Result<Value, Error> {
271        let mut items = Vec::new();
272
273        while let Some(line) = self.lexer.peek() {
274            if line.depth != depth {
275                break;
276            }
277
278            if line.kind == LineKind::ListItem {
279                let line = self.lexer.next_line().unwrap();
280                let raw_value = line.value.clone().unwrap();
281                let lineno = line.lineno;
282
283                let value = if !raw_value.is_empty() {
284                    // Value on same line — no indented content allowed
285                    self.check_no_indented_content(depth, lineno)?;
286                    Value::String(raw_value)
287                } else {
288                    self.read_indented_value(depth)?
289                };
290
291                items.push(value);
292            } else {
293                // Wrong line type at this depth in a list context
294                let line = self.lexer.peek().unwrap();
295                return Err(Error::new(
296                    ErrorKind::UnexpectedLineType,
297                    "expected list item.",
298                )
299                .with_lineno(line.lineno)
300                .with_colno(line.depth)
301                .with_line(line.text.clone()));
302            }
303        }
304
305        Ok(Value::List(items))
306    }
307
308    /// Read a multiline string at the given depth.
309    fn read_string(&mut self, depth: usize) -> Result<Value, Error> {
310        let mut parts = Vec::new();
311
312        while self.lexer.next_is(depth, LineKind::StringItem) {
313            let line = self.lexer.next_line().unwrap();
314            parts.push(line.value.clone().unwrap());
315        }
316
317        // Check for invalid indentation after string
318        // (e.g., a string item at deeper indentation mixed in)
319        if let Some(next) = self.lexer.peek() {
320            if next.depth > depth && next.kind == LineKind::StringItem {
321                return Err(Error::new(
322                    ErrorKind::InvalidIndentLevel,
323                    "invalid indentation.",
324                )
325                .with_lineno(next.lineno)
326                .with_colno(depth)
327                .with_line(next.text.clone()));
328            }
329        }
330
331        Ok(Value::String(parts.join("\n")))
332    }
333
334    /// Read a multiline key at the given depth.
335    fn read_key(&mut self, depth: usize) -> Result<String, Error> {
336        let mut parts = Vec::new();
337
338        while self.lexer.next_is(depth, LineKind::KeyItem) {
339            let line = self.lexer.next_line().unwrap();
340            parts.push(line.value.clone().unwrap());
341        }
342
343        Ok(parts.join("\n"))
344    }
345
346    /// Read an indented value below a list/dict item that had an empty value on its line.
347    fn read_indented_value(&mut self, parent_depth: usize) -> Result<Value, Error> {
348        match self.lexer.peek() {
349            Some(line) if line.depth > parent_depth => {
350                let child_depth = line.depth;
351                self.indent_stack.push(child_depth);
352                            self.all_indent_levels.push(child_depth);
353                let result = self
354                    .read_value(child_depth)?
355                    .ok_or_else(|| Error::new(ErrorKind::UnexpectedLineType, "expected value"));
356                self.indent_stack.pop();
357                result
358            }
359            _ => Ok(Value::String(String::new())),
360        }
361    }
362
363    /// Check that no indented content follows a line that already has a value.
364    /// If a dict/list item has text after the tag on the same line, then indented
365    /// content below is an error (the value is already set).
366    fn check_no_indented_content(
367        &self,
368        parent_depth: usize,
369        _parent_lineno: usize,
370    ) -> Result<(), Error> {
371        if let Some(next) = self.lexer.peek() {
372            if next.depth > parent_depth {
373                return Err(Error::new(
374                    ErrorKind::InvalidIndentLevel,
375                    "invalid indentation.",
376                )
377                .with_lineno(next.lineno)
378                .with_colno(parent_depth)
379                .with_line(next.text.clone()));
380            }
381        }
382        Ok(())
383    }
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389
390    #[test]
391    fn test_empty_document() {
392        assert_eq!(loads("", Top::Any).unwrap(), None);
393        assert_eq!(loads("# just a comment\n", Top::Any).unwrap(), None);
394        assert_eq!(loads("  \n\n  \n", Top::Any).unwrap(), None);
395    }
396
397    #[test]
398    fn test_simple_dict() {
399        let v = loads("name: John\nage: 30", Top::Any).unwrap().unwrap();
400        assert_eq!(
401            v,
402            Value::Dict(vec![
403                ("name".to_string(), Value::String("John".to_string())),
404                ("age".to_string(), Value::String("30".to_string())),
405            ])
406        );
407    }
408
409    #[test]
410    fn test_simple_list() {
411        let v = loads("- apple\n- banana\n- cherry", Top::Any)
412            .unwrap()
413            .unwrap();
414        assert_eq!(
415            v,
416            Value::List(vec![
417                Value::String("apple".to_string()),
418                Value::String("banana".to_string()),
419                Value::String("cherry".to_string()),
420            ])
421        );
422    }
423
424    #[test]
425    fn test_multiline_string() {
426        let v = loads("> line one\n> line two\n> line three", Top::Any)
427            .unwrap()
428            .unwrap();
429        assert_eq!(
430            v,
431            Value::String("line one\nline two\nline three".to_string())
432        );
433    }
434
435    #[test]
436    fn test_nested_dict_with_list() {
437        let input = "fruits:\n  - apple\n  - banana\nveggies:\n  - carrot";
438        let v = loads(input, Top::Any).unwrap().unwrap();
439        assert_eq!(
440            v,
441            Value::Dict(vec![
442                (
443                    "fruits".to_string(),
444                    Value::List(vec![
445                        Value::String("apple".to_string()),
446                        Value::String("banana".to_string()),
447                    ])
448                ),
449                (
450                    "veggies".to_string(),
451                    Value::List(vec![Value::String("carrot".to_string())])
452                ),
453            ])
454        );
455    }
456
457    #[test]
458    fn test_nested_list_with_dict() {
459        let input = "-\n  name: John\n  age: 30\n-\n  name: Jane\n  age: 25";
460        let v = loads(input, Top::Any).unwrap().unwrap();
461        assert_eq!(
462            v,
463            Value::List(vec![
464                Value::Dict(vec![
465                    ("name".to_string(), Value::String("John".to_string())),
466                    ("age".to_string(), Value::String("30".to_string())),
467                ]),
468                Value::Dict(vec![
469                    ("name".to_string(), Value::String("Jane".to_string())),
470                    ("age".to_string(), Value::String("25".to_string())),
471                ]),
472            ])
473        );
474    }
475
476    #[test]
477    fn test_empty_list_item() {
478        let v = loads("- \n- hello", Top::Any).unwrap().unwrap();
479        assert_eq!(
480            v,
481            Value::List(vec![
482                Value::String("".to_string()),
483                Value::String("hello".to_string()),
484            ])
485        );
486    }
487
488    #[test]
489    fn test_empty_dict_value() {
490        let v = loads("key:", Top::Any).unwrap().unwrap();
491        assert_eq!(
492            v,
493            Value::Dict(vec![(
494                "key".to_string(),
495                Value::String("".to_string()),
496            )])
497        );
498    }
499
500    #[test]
501    fn test_inline_list_in_dict() {
502        let v = loads("items: [a, b, c]", Top::Any).unwrap().unwrap();
503        assert_eq!(
504            v,
505            Value::Dict(vec![(
506                "items".to_string(),
507                Value::String("[a, b, c]".to_string()),
508            )])
509        );
510    }
511
512    #[test]
513    fn test_inline_list_standalone() {
514        let v = loads("[a, b, c]", Top::Any).unwrap().unwrap();
515        assert_eq!(
516            v,
517            Value::List(vec![
518                Value::String("a".to_string()),
519                Value::String("b".to_string()),
520                Value::String("c".to_string()),
521            ])
522        );
523    }
524
525    #[test]
526    fn test_inline_dict_standalone() {
527        let v = loads("{k: v}", Top::Any).unwrap().unwrap();
528        assert_eq!(
529            v,
530            Value::Dict(vec![("k".to_string(), Value::String("v".to_string()))])
531        );
532    }
533
534    #[test]
535    fn test_top_constraint_dict() {
536        let r = loads("- item", Top::Dict);
537        assert!(r.is_err());
538    }
539
540    #[test]
541    fn test_top_constraint_list() {
542        let r = loads("key: value", Top::List);
543        assert!(r.is_err());
544    }
545
546    #[test]
547    fn test_multiline_key() {
548        let input = ": key part 1\n: key part 2\n  > value";
549        let v = loads(input, Top::Any).unwrap().unwrap();
550        assert_eq!(
551            v,
552            Value::Dict(vec![(
553                "key part 1\nkey part 2".to_string(),
554                Value::String("value".to_string()),
555            )])
556        );
557    }
558
559    #[test]
560    fn test_deeply_nested() {
561        let input = "a:\n  b:\n    c: deep";
562        let v = loads(input, Top::Any).unwrap().unwrap();
563        assert_eq!(
564            v,
565            Value::Dict(vec![(
566                "a".to_string(),
567                Value::Dict(vec![(
568                    "b".to_string(),
569                    Value::Dict(vec![(
570                        "c".to_string(),
571                        Value::String("deep".to_string()),
572                    )]),
573                )]),
574            )])
575        );
576    }
577
578    #[test]
579    fn test_duplicate_key_error() {
580        let r = loads("key: value 1\nkey: value 2", Top::Any);
581        assert!(r.is_err());
582    }
583
584    #[test]
585    fn test_extra_content_after_inline() {
586        let r = loads("[]\nfoo: bar", Top::Any);
587        assert!(r.is_err());
588    }
589
590    #[test]
591    fn test_value_on_line_then_indent_error() {
592        // "key 1:  " has trailing space as value, so indented content is invalid
593        let r = loads("key 1:  \n        key 2: value 2", Top::Any);
594        assert!(r.is_err());
595    }
596
597    #[test]
598    fn test_list_value_on_line_then_indent_error() {
599        let r = loads("-  \n   > value", Top::Any);
600        assert!(r.is_err());
601    }
602}