use crate::error::{Error, ErrorKind};
use crate::inline::InlineParser;
use crate::lexer::{Lexer, LineKind};
use crate::value::Value;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Top {
Dict,
List,
String,
Any,
}
pub fn loads(input: &str, top: Top) -> Result<Option<Value>, Error> {
let mut lexer = Lexer::new(input)?;
let mut parser = Parser {
lexer: &mut lexer,
indent_stack: vec![],
all_indent_levels: vec![0],
};
let value = parser.read_value(0)?;
if value.is_some() {
if let Some(line) = parser.lexer.peek() {
if line.depth > 0
&& parser.all_indent_levels.len() > 1
&& !parser.all_indent_levels.contains(&line.depth)
{
return Err(Error::new(
ErrorKind::InvalidIndentLevel,
"invalid indentation, partial dedent.",
)
.with_lineno(line.lineno)
.with_colno(0)
.with_line(line.text.clone()));
}
return Err(Error::new(ErrorKind::UnexpectedLineType, "extra content.")
.with_lineno(line.lineno)
.with_colno(0)
.with_line(line.text.clone()));
}
}
let value = match (value, top) {
(None, Top::Any) => None,
(None, Top::Dict) => Some(Value::Dict(vec![])),
(None, Top::List) => Some(Value::List(vec![])),
(None, Top::String) => Some(Value::String(String::new())),
(Some(v), Top::Any) => Some(v),
(Some(v @ Value::Dict(_)), Top::Dict) => Some(v),
(Some(v @ Value::List(_)), Top::List) => Some(v),
(Some(v @ Value::String(_)), Top::String) => Some(v),
(Some(_), Top::Dict) => {
return Err(Error::new(
ErrorKind::UnexpectedLineType,
"expected dictionary top-level",
));
}
(Some(_), Top::List) => {
return Err(Error::new(
ErrorKind::UnexpectedLineType,
"expected list top-level",
));
}
(Some(_), Top::String) => {
return Err(Error::new(
ErrorKind::UnexpectedLineType,
"expected string top-level",
));
}
};
Ok(value)
}
pub fn load<R: std::io::Read>(reader: R, top: Top) -> Result<Option<Value>, Error> {
let mut buf = String::new();
let mut reader = reader;
reader.read_to_string(&mut buf)?;
loads(&buf, top)
}
struct Parser<'a> {
lexer: &'a mut Lexer,
indent_stack: Vec<usize>,
all_indent_levels: Vec<usize>,
}
impl<'a> Parser<'a> {
fn read_value(&mut self, depth: usize) -> Result<Option<Value>, Error> {
let line = match self.lexer.peek() {
Some(l) => l,
None => return Ok(None),
};
if line.depth < depth {
return Ok(None);
}
if line.depth > depth {
if depth == 0 && self.indent_stack.is_empty() {
return Err(Error::new(
ErrorKind::InvalidIndentLevel,
"top-level content must start in column 1.",
)
.with_lineno(line.lineno)
.with_colno(0)
.with_line(line.text.clone()));
}
if !self.indent_stack.is_empty() && !self.indent_stack.contains(&line.depth) {
return Err(Error::new(
ErrorKind::InvalidIndentLevel,
"invalid indentation, partial dedent.",
)
.with_lineno(line.lineno)
.with_colno(0)
.with_line(line.text.clone()));
}
return Err(Error::new(
ErrorKind::InvalidIndentLevel,
"invalid indentation.",
)
.with_lineno(line.lineno)
.with_colno(line.depth)
.with_line(line.text.clone()));
}
match line.kind {
LineKind::DictItem | LineKind::KeyItem => self.read_dict(depth).map(Some),
LineKind::ListItem => self.read_list(depth).map(Some),
LineKind::StringItem => self.read_string(depth).map(Some),
LineKind::InlineList | LineKind::InlineDict => {
let line = self.lexer.next_line().unwrap();
let input = line.value.as_ref().unwrap();
let lineno = line.lineno;
let colno_offset = line.depth;
let line_text = &line.text;
InlineParser::parse(input, lineno, colno_offset, line_text).map(Some)
}
LineKind::Unrecognized => {
let line = self.lexer.peek().unwrap();
Err(Error::new(ErrorKind::UnrecognizedLine, "unrecognized line.")
.with_lineno(line.lineno)
.with_colno(line.depth)
.with_line(line.text.clone()))
}
}
}
fn read_dict(&mut self, depth: usize) -> Result<Value, Error> {
let mut pairs: Vec<(String, Value)> = Vec::new();
let mut seen_keys: Vec<String> = Vec::new();
while let Some(line) = self.lexer.peek() {
if line.depth != depth {
break;
}
match line.kind {
LineKind::DictItem => {
let line = self.lexer.next_line().unwrap();
let key = line.key.clone().unwrap();
let raw_value = line.value.clone().unwrap();
let lineno = line.lineno;
let line_text = line.text.clone();
if seen_keys.contains(&key) {
return Err(Error::new(
ErrorKind::DuplicateKey,
format!("duplicate key: {}.", key),
)
.with_lineno(lineno)
.with_colno(0)
.with_line(line_text));
}
seen_keys.push(key.clone());
let value = if !raw_value.is_empty() {
self.check_no_indented_content(depth, lineno)?;
Value::String(raw_value)
} else {
self.read_indented_value(depth)?
};
pairs.push((key, value));
}
LineKind::KeyItem => {
let first_key_lineno = self.lexer.peek().unwrap().lineno;
let first_key_text = self.lexer.peek().unwrap().text.clone();
let key = self.read_key(depth)?;
if seen_keys.contains(&key) {
return Err(Error::new(
ErrorKind::DuplicateKey,
format!("duplicate key: {}.", key),
));
}
seen_keys.push(key.clone());
let next = self.lexer.peek();
match next {
Some(l) if l.depth > depth => {
let child_depth = l.depth;
self.indent_stack.push(child_depth);
self.all_indent_levels.push(child_depth);
let value = self
.read_value(child_depth)?
.unwrap_or(Value::String(String::new()));
self.indent_stack.pop();
pairs.push((key, value));
}
Some(_l) => {
return Err(Error::new(
ErrorKind::InvalidIndentLevel,
"multiline key requires a value.",
)
.with_lineno(first_key_lineno)
.with_colno(depth)
.with_line(first_key_text.clone()));
}
None => {
return Err(Error::new(
ErrorKind::InvalidIndentLevel,
"indented value must follow multiline key.",
)
.with_lineno(first_key_lineno)
.with_line(first_key_text));
}
}
}
_ => {
let line = self.lexer.peek().unwrap();
return Err(Error::new(
ErrorKind::UnexpectedLineType,
"expected dictionary item.",
)
.with_lineno(line.lineno)
.with_colno(line.depth)
.with_line(line.text.clone()));
}
}
}
Ok(Value::Dict(pairs))
}
fn read_list(&mut self, depth: usize) -> Result<Value, Error> {
let mut items = Vec::new();
while let Some(line) = self.lexer.peek() {
if line.depth != depth {
break;
}
if line.kind == LineKind::ListItem {
let line = self.lexer.next_line().unwrap();
let raw_value = line.value.clone().unwrap();
let lineno = line.lineno;
let value = if !raw_value.is_empty() {
self.check_no_indented_content(depth, lineno)?;
Value::String(raw_value)
} else {
self.read_indented_value(depth)?
};
items.push(value);
} else {
let line = self.lexer.peek().unwrap();
return Err(Error::new(
ErrorKind::UnexpectedLineType,
"expected list item.",
)
.with_lineno(line.lineno)
.with_colno(line.depth)
.with_line(line.text.clone()));
}
}
Ok(Value::List(items))
}
fn read_string(&mut self, depth: usize) -> Result<Value, Error> {
let mut parts = Vec::new();
while self.lexer.next_is(depth, LineKind::StringItem) {
let line = self.lexer.next_line().unwrap();
parts.push(line.value.clone().unwrap());
}
if let Some(next) = self.lexer.peek() {
if next.depth > depth && next.kind == LineKind::StringItem {
return Err(Error::new(
ErrorKind::InvalidIndentLevel,
"invalid indentation.",
)
.with_lineno(next.lineno)
.with_colno(depth)
.with_line(next.text.clone()));
}
}
Ok(Value::String(parts.join("\n")))
}
fn read_key(&mut self, depth: usize) -> Result<String, Error> {
let mut parts = Vec::new();
while self.lexer.next_is(depth, LineKind::KeyItem) {
let line = self.lexer.next_line().unwrap();
parts.push(line.value.clone().unwrap());
}
Ok(parts.join("\n"))
}
fn read_indented_value(&mut self, parent_depth: usize) -> Result<Value, Error> {
match self.lexer.peek() {
Some(line) if line.depth > parent_depth => {
let child_depth = line.depth;
self.indent_stack.push(child_depth);
self.all_indent_levels.push(child_depth);
let result = self
.read_value(child_depth)?
.ok_or_else(|| Error::new(ErrorKind::UnexpectedLineType, "expected value"));
self.indent_stack.pop();
result
}
_ => Ok(Value::String(String::new())),
}
}
fn check_no_indented_content(
&self,
parent_depth: usize,
_parent_lineno: usize,
) -> Result<(), Error> {
if let Some(next) = self.lexer.peek() {
if next.depth > parent_depth {
return Err(Error::new(
ErrorKind::InvalidIndentLevel,
"invalid indentation.",
)
.with_lineno(next.lineno)
.with_colno(parent_depth)
.with_line(next.text.clone()));
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_document() {
assert_eq!(loads("", Top::Any).unwrap(), None);
assert_eq!(loads("# just a comment\n", Top::Any).unwrap(), None);
assert_eq!(loads(" \n\n \n", Top::Any).unwrap(), None);
}
#[test]
fn test_simple_dict() {
let v = loads("name: John\nage: 30", Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::Dict(vec![
("name".to_string(), Value::String("John".to_string())),
("age".to_string(), Value::String("30".to_string())),
])
);
}
#[test]
fn test_simple_list() {
let v = loads("- apple\n- banana\n- cherry", Top::Any)
.unwrap()
.unwrap();
assert_eq!(
v,
Value::List(vec![
Value::String("apple".to_string()),
Value::String("banana".to_string()),
Value::String("cherry".to_string()),
])
);
}
#[test]
fn test_multiline_string() {
let v = loads("> line one\n> line two\n> line three", Top::Any)
.unwrap()
.unwrap();
assert_eq!(
v,
Value::String("line one\nline two\nline three".to_string())
);
}
#[test]
fn test_nested_dict_with_list() {
let input = "fruits:\n - apple\n - banana\nveggies:\n - carrot";
let v = loads(input, Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::Dict(vec![
(
"fruits".to_string(),
Value::List(vec![
Value::String("apple".to_string()),
Value::String("banana".to_string()),
])
),
(
"veggies".to_string(),
Value::List(vec![Value::String("carrot".to_string())])
),
])
);
}
#[test]
fn test_nested_list_with_dict() {
let input = "-\n name: John\n age: 30\n-\n name: Jane\n age: 25";
let v = loads(input, Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::List(vec![
Value::Dict(vec![
("name".to_string(), Value::String("John".to_string())),
("age".to_string(), Value::String("30".to_string())),
]),
Value::Dict(vec![
("name".to_string(), Value::String("Jane".to_string())),
("age".to_string(), Value::String("25".to_string())),
]),
])
);
}
#[test]
fn test_empty_list_item() {
let v = loads("- \n- hello", Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::List(vec![
Value::String("".to_string()),
Value::String("hello".to_string()),
])
);
}
#[test]
fn test_empty_dict_value() {
let v = loads("key:", Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::Dict(vec![(
"key".to_string(),
Value::String("".to_string()),
)])
);
}
#[test]
fn test_inline_list_in_dict() {
let v = loads("items: [a, b, c]", Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::Dict(vec![(
"items".to_string(),
Value::String("[a, b, c]".to_string()),
)])
);
}
#[test]
fn test_inline_list_standalone() {
let v = loads("[a, b, c]", Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::List(vec![
Value::String("a".to_string()),
Value::String("b".to_string()),
Value::String("c".to_string()),
])
);
}
#[test]
fn test_inline_dict_standalone() {
let v = loads("{k: v}", Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::Dict(vec![("k".to_string(), Value::String("v".to_string()))])
);
}
#[test]
fn test_top_constraint_dict() {
let r = loads("- item", Top::Dict);
assert!(r.is_err());
}
#[test]
fn test_top_constraint_list() {
let r = loads("key: value", Top::List);
assert!(r.is_err());
}
#[test]
fn test_multiline_key() {
let input = ": key part 1\n: key part 2\n > value";
let v = loads(input, Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::Dict(vec![(
"key part 1\nkey part 2".to_string(),
Value::String("value".to_string()),
)])
);
}
#[test]
fn test_deeply_nested() {
let input = "a:\n b:\n c: deep";
let v = loads(input, Top::Any).unwrap().unwrap();
assert_eq!(
v,
Value::Dict(vec![(
"a".to_string(),
Value::Dict(vec![(
"b".to_string(),
Value::Dict(vec![(
"c".to_string(),
Value::String("deep".to_string()),
)]),
)]),
)])
);
}
#[test]
fn test_duplicate_key_error() {
let r = loads("key: value 1\nkey: value 2", Top::Any);
assert!(r.is_err());
}
#[test]
fn test_extra_content_after_inline() {
let r = loads("[]\nfoo: bar", Top::Any);
assert!(r.is_err());
}
#[test]
fn test_value_on_line_then_indent_error() {
let r = loads("key 1: \n key 2: value 2", Top::Any);
assert!(r.is_err());
}
#[test]
fn test_list_value_on_line_then_indent_error() {
let r = loads("- \n > value", Top::Any);
assert!(r.is_err());
}
}