nested-text 0.1.0

A fully spec-compliant NestedText v3.8 parser and serializer
Documentation
use crate::error::{Error, ErrorKind};
use crate::value::Value;

/// Recursive descent parser for inline NestedText structures: [...] and {...}.
pub struct InlineParser<'a> {
    input: &'a str,
    pos: usize,
    lineno: usize,
    colno_offset: usize,
    /// The full original line text (for error reporting).
    line_text: String,
}

impl<'a> InlineParser<'a> {
    /// Parse an inline value from the given string.
    pub fn parse(input: &'a str, lineno: usize, colno_offset: usize, line_text: &str) -> Result<Value, Error> {
        let mut parser = InlineParser {
            input,
            pos: 0,
            lineno,
            colno_offset,
            line_text: line_text.to_string(),
        };
        let value = parser.parse_value()?;
        parser.skip_any_whitespace();
        if parser.pos < parser.input.len() {
            let trailing = &parser.input[parser.pos..];
            let trimmed = trailing.trim();
            let msg = if trimmed.len() == 1 {
                format!("extra character after closing delimiter: \u{2018}{}\u{2019}.", trimmed)
            } else {
                format!("extra characters after closing delimiter: \u{2018}{}\u{2019}.", trimmed)
            };
            return Err(parser.error(ErrorKind::TrailingContent, msg));
        }
        Ok(value)
    }

    fn parse_value(&mut self) -> Result<Value, Error> {
        match self.peek() {
            Some('[') => self.parse_list(),
            Some('{') => self.parse_dict(),
            _ => Err(self.error(ErrorKind::InvalidInlineCharacter, "expected \u{2018}[\u{2019} or \u{2018}{\u{2019}.")),
        }
    }

    fn parse_list(&mut self) -> Result<Value, Error> {
        self.expect('[')?;

        // Empty list: []
        if self.peek() == Some(']') {
            self.advance();
            return Ok(Value::List(vec![]));
        }

        let mut items = Vec::new();

        loop {
            self.skip_whitespace();

            // Parse item value
            let item = match self.peek() {
                Some('[') => self.parse_list()?,
                Some('{') => self.parse_dict()?,
                Some('}') => {
                    return Err(self.error(
                        ErrorKind::InvalidInlineCharacter,
                        "expected value.",
                    ));
                }
                _ => {
                    let s = self.parse_inline_string(&['[', ']', '{', '}', ','])?;
                    Value::String(s)
                }
            };
            items.push(item);

            self.skip_whitespace();

            match self.peek() {
                Some(',') => {
                    self.advance();
                }
                Some(']') => {
                    self.advance();
                    return Ok(Value::List(items));
                }
                Some(c) => {
                    return Err(self.error(
                        ErrorKind::InvalidInlineCharacter,
                        format!("expected \u{2018},\u{2019} or \u{2018}]\u{2019}, found \u{2018}{}\u{2019}.", c),
                    ));
                }
                None => {
                    return Err(self.error(
                        ErrorKind::UnterminatedInlineList,
                        "line ended without closing delimiter.",
                    ));
                }
            }
        }
    }

    fn parse_dict(&mut self) -> Result<Value, Error> {
        self.expect('{')?;

        // Empty dict: {}
        if self.peek() == Some('}') {
            self.advance();
            return Ok(Value::Dict(vec![]));
        }

        let mut pairs = Vec::new();
        let mut seen_keys: Vec<String> = Vec::new();

        loop {
            self.skip_whitespace();

            // Parse key
            let key = self.parse_inline_string(&['[', ']', '{', '}', ',', ':'])?;

            // Expect ':'
            match self.peek() {
                Some(':') => {
                    self.advance();
                }
                Some(c) => {
                    return Err(self.error(
                        ErrorKind::InvalidInlineCharacter,
                        format!("expected \u{2018}:\u{2019}, found \u{2018}{}\u{2019}.", c),
                    ));
                }
                None => {
                    return Err(self.error(
                        ErrorKind::UnterminatedInlineDict,
                        "line ended without closing delimiter.",
                    ));
                }
            }

            self.skip_whitespace();

            // Check for "expected value." — empty value followed by comma or closing brace
            // e.g. {a:0,} or {a:0,:,}
            let value = match self.peek() {
                Some('[') => self.parse_list()?,
                Some('{') => self.parse_dict()?,
                Some('}') | Some(',') => {
                    // Check if this is truly an "expected value" situation
                    // In {a:0,}, after the comma we get key="" and `:` is not found
                    // But in {a:,} we'd get key="a", colon consumed, and then see ','
                    // which means empty value — that's fine per spec
                    Value::String(String::new())
                }
                _ => {
                    let s = self.parse_inline_string(&['[', ']', '{', '}', ','])?;
                    Value::String(s)
                }
            };

            // Check for duplicate keys
            if seen_keys.contains(&key) {
                return Err(self.error(
                    ErrorKind::DuplicateKey,
                    format!("duplicate key: {}.", key),
                ));
            }
            seen_keys.push(key.clone());

            pairs.push((key, value));

            self.skip_whitespace();

            match self.peek() {
                Some(',') => {
                    self.advance();
                    // If immediately followed by '}' (no whitespace), it's "expected value."
                    if self.peek() == Some('}') {
                        return Err(self.error(
                            ErrorKind::InvalidInlineCharacter,
                            "expected value.",
                        ));
                    }
                    self.skip_whitespace();
                }
                Some('}') => {
                    self.advance();
                    return Ok(Value::Dict(pairs));
                }
                Some(c) => {
                    return Err(self.error(
                        ErrorKind::InvalidInlineCharacter,
                        format!("expected \u{2018},\u{2019} or \u{2018}}}\u{2019}, found \u{2018}{}\u{2019}.", c),
                    ));
                }
                None => {
                    return Err(self.error(
                        ErrorKind::UnterminatedInlineDict,
                        "line ended without closing delimiter.",
                    ));
                }
            }
        }
    }

    /// Parse an inline string, collecting characters until a forbidden char or end of input.
    /// Leading and trailing whitespace is trimmed.
    fn parse_inline_string(&mut self, forbidden: &[char]) -> Result<String, Error> {
        let start = self.pos;
        while let Some(c) = self.peek() {
            if forbidden.contains(&c) {
                break;
            }
            self.advance();
        }
        let raw = &self.input[start..self.pos];
        Ok(raw.trim().to_string())
    }

    fn peek(&self) -> Option<char> {
        self.input[self.pos..].chars().next()
    }

    fn advance(&mut self) -> Option<char> {
        let c = self.peek()?;
        self.pos += c.len_utf8();
        Some(c)
    }

    fn expect(&mut self, expected: char) -> Result<(), Error> {
        match self.advance() {
            Some(c) if c == expected => Ok(()),
            Some(c) => Err(self.error(
                ErrorKind::InvalidInlineCharacter,
                format!("expected \u{2018}{}\u{2019}, found \u{2018}{}\u{2019}.", expected, c),
            )),
            None => Err(self.error(
                ErrorKind::InvalidInlineCharacter,
                format!("expected \u{2018}{}\u{2019}, found end of input.", expected),
            )),
        }
    }

    fn skip_whitespace(&mut self) {
        while let Some(c) = self.peek() {
            if c == ' ' || c == '\t' {
                self.advance();
            } else {
                break;
            }
        }
    }

    fn skip_any_whitespace(&mut self) {
        while let Some(c) = self.peek() {
            if c.is_ascii_whitespace() {
                self.advance();
            } else {
                break;
            }
        }
    }

    fn error(&self, kind: ErrorKind, message: impl Into<String>) -> Error {
        Error::new(kind, message)
            .with_lineno(self.lineno)
            .with_colno(self.colno_offset + self.pos)
            .with_line(self.line_text.clone())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_empty_list() {
        let v = InlineParser::parse("[]", 0, 0, "[]").unwrap();
        assert_eq!(v, Value::List(vec![]));
    }

    #[test]
    fn test_list_with_empty_string() {
        let v = InlineParser::parse("[ ]", 0, 0, "[ ]").unwrap();
        assert_eq!(v, Value::List(vec![Value::String("".to_string())]));
    }

    #[test]
    fn test_list_two_empty_strings() {
        let v = InlineParser::parse("[,]", 0, 0, "[,]").unwrap();
        assert_eq!(
            v,
            Value::List(vec![
                Value::String("".to_string()),
                Value::String("".to_string()),
            ])
        );
    }

    #[test]
    fn test_simple_list() {
        let v = InlineParser::parse("[a, b, c]", 0, 0, "[a, b, c]").unwrap();
        assert_eq!(
            v,
            Value::List(vec![
                Value::String("a".to_string()),
                Value::String("b".to_string()),
                Value::String("c".to_string()),
            ])
        );
    }

    #[test]
    fn test_empty_dict() {
        let v = InlineParser::parse("{}", 0, 0, "{}").unwrap();
        assert_eq!(v, Value::Dict(vec![]));
    }

    #[test]
    fn test_simple_dict() {
        let v = InlineParser::parse("{k: v, k2: v2}", 0, 0, "{k: v, k2: v2}").unwrap();
        assert_eq!(
            v,
            Value::Dict(vec![
                ("k".to_string(), Value::String("v".to_string())),
                ("k2".to_string(), Value::String("v2".to_string())),
            ])
        );
    }

    #[test]
    fn test_nested_inline() {
        let v = InlineParser::parse("[a, [b, c], d]", 0, 0, "[a, [b, c], d]").unwrap();
        assert_eq!(
            v,
            Value::List(vec![
                Value::String("a".to_string()),
                Value::List(vec![
                    Value::String("b".to_string()),
                    Value::String("c".to_string()),
                ]),
                Value::String("d".to_string()),
            ])
        );
    }

    #[test]
    fn test_unterminated_list() {
        let r = InlineParser::parse("[a, b", 0, 0, "[a, b");
        assert!(r.is_err());
    }

    #[test]
    fn test_trailing_content() {
        let r = InlineParser::parse("[] extra", 0, 0, "[] extra");
        assert!(r.is_err());
    }

    #[test]
    fn test_whitespace_trimming() {
        let v = InlineParser::parse("[  hello  ,  world  ]", 0, 0, "[  hello  ,  world  ]").unwrap();
        assert_eq!(
            v,
            Value::List(vec![
                Value::String("hello".to_string()),
                Value::String("world".to_string()),
            ])
        );
    }

    #[test]
    fn test_inline_dict_duplicate_key() {
        let r = InlineParser::parse("{a:1, a:2}", 0, 0, "{a:1, a:2}");
        assert!(r.is_err());
    }
}