use super::{Mapping, Value};
use std::fmt;
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct YamlError {
pub line: usize,
pub message: String,
}
impl fmt::Display for YamlError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.line > 0 {
write!(f, "YAML error at line {}: {}", self.line, self.message)
} else {
write!(f, "YAML error: {}", self.message)
}
}
}
impl std::error::Error for YamlError {}
pub fn parse(text: &str) -> Result<Value, YamlError> {
let lines: Vec<String> = text.lines().map(|l| l.to_string()).collect();
let mut p = Parser { lines, pos: 0 };
p.skip_blank_and_comments();
if p.pos >= p.lines.len() {
return Ok(Value::Null);
}
let base = p.current_indent()?;
let value = p.parse_node(base)?;
p.skip_blank_and_comments();
if p.pos < p.lines.len() {
return Err(p.err("unexpected trailing content"));
}
Ok(value)
}
struct Parser {
lines: Vec<String>,
pos: usize,
}
impl Parser {
fn err(&self, msg: impl Into<String>) -> YamlError {
YamlError {
line: self.pos + 1,
message: msg.into(),
}
}
fn is_blank_or_comment(line: &str) -> bool {
let t = line.trim_start();
t.is_empty() || t.starts_with('#')
}
fn skip_blank_and_comments(&mut self) {
while self.pos < self.lines.len() && Self::is_blank_or_comment(&self.lines[self.pos]) {
self.pos += 1;
}
}
fn current_indent(&self) -> Result<usize, YamlError> {
indent_of(&self.lines[self.pos]).ok_or_else(|| self.err("tab character in indentation"))
}
fn parse_node(&mut self, indent: usize) -> Result<Value, YamlError> {
let line = &self.lines[self.pos];
let content = &line[indent.min(line.len())..];
let trimmed = content.trim_start();
if trimmed == "-" || trimmed.starts_with("- ") {
self.parse_sequence(indent)
} else if split_key_value(trimmed).is_some() {
self.parse_mapping(indent)
} else {
let v = parse_inline_value(trimmed, self.pos)?;
self.pos += 1;
Ok(v)
}
}
fn parse_mapping(&mut self, indent: usize) -> Result<Value, YamlError> {
let mut map = Mapping::new();
loop {
self.skip_blank_and_comments();
if self.pos >= self.lines.len() {
break;
}
let ind = self.current_indent()?;
if ind < indent {
break;
}
if ind > indent {
return Err(self.err("unexpected indentation in mapping"));
}
let line = self.lines[self.pos].clone();
let content = line[indent..].to_string();
let trimmed = content.trim_start();
if trimmed == "-" || trimmed.starts_with("- ") {
break; }
let (key_str, rest) = split_key_value(trimmed)
.ok_or_else(|| self.err("expected 'key: value' mapping entry"))?;
let key = parse_scalar(&key_str, self.pos)?;
let entry_line = self.pos;
self.pos += 1;
let value = match rest {
Some(r) if r.starts_with('|') || r.starts_with('>') => {
self.parse_block_scalar(indent, &r)?
}
Some(r) => parse_inline_value(&r, entry_line)?,
None => {
self.parse_nested(indent)?
}
};
map.push_raw(key, value);
}
Ok(Value::Mapping(map))
}
fn parse_sequence(&mut self, indent: usize) -> Result<Value, YamlError> {
let mut seq = Vec::new();
loop {
self.skip_blank_and_comments();
if self.pos >= self.lines.len() {
break;
}
let ind = self.current_indent()?;
if ind < indent {
break;
}
if ind > indent {
return Err(self.err("unexpected indentation in sequence"));
}
let line = self.lines[self.pos].clone();
let content = &line[indent..];
if !(content == "-" || content.starts_with("- ")) {
break;
}
let dash_rest = &content[1..]; let item_offset = indent + 1 + (dash_rest.len() - dash_rest.trim_start().len());
let item_text = content[1..].trim_start().to_string();
let entry_line = self.pos;
if item_text.is_empty() {
self.pos += 1;
let v = self.parse_nested(indent)?;
seq.push(v);
} else if item_text.starts_with('|') || item_text.starts_with('>') {
self.pos += 1;
let v = self.parse_block_scalar(indent, &item_text)?;
seq.push(v);
} else if split_key_value(&item_text).is_some() {
let mut rewritten = " ".repeat(item_offset);
rewritten.push_str(&item_text);
self.lines[entry_line] = rewritten;
let v = self.parse_mapping(item_offset)?;
seq.push(v);
} else {
let v = parse_inline_value(&item_text, entry_line)?;
self.pos += 1;
seq.push(v);
}
}
Ok(Value::Sequence(seq))
}
fn parse_nested(&mut self, parent_indent: usize) -> Result<Value, YamlError> {
self.skip_blank_and_comments();
if self.pos >= self.lines.len() {
return Ok(Value::Null);
}
let ind = self.current_indent()?;
if ind > parent_indent {
self.parse_node(ind)
} else if ind == parent_indent && self.line_is_sequence_item(ind) {
self.parse_sequence(ind)
} else {
Ok(Value::Null)
}
}
fn line_is_sequence_item(&self, indent: usize) -> bool {
let line = &self.lines[self.pos];
let content = &line[indent.min(line.len())..];
content == "-" || content.starts_with("- ")
}
fn parse_block_scalar(&mut self, parent_indent: usize, header: &str) -> Result<Value, YamlError> {
let style = header.as_bytes()[0]; let chomp = header[1..].chars().find(|c| *c == '+' || *c == '-');
let mut body: Vec<String> = Vec::new();
let mut block_indent: Option<usize> = None;
while self.pos < self.lines.len() {
let line = &self.lines[self.pos];
if line.trim().is_empty() {
body.push(String::new());
self.pos += 1;
continue;
}
let ind = indent_of(line).ok_or_else(|| self.err("tab in block scalar indentation"))?;
if ind <= parent_indent {
break;
}
if block_indent.is_none() {
block_indent = Some(ind);
}
let bi = block_indent.unwrap();
let stripped = if line.len() >= bi { line[bi..].to_string() } else { String::new() };
body.push(stripped);
self.pos += 1;
}
let mut trailing_blanks = 0;
while body.last().map(|l| l.is_empty()).unwrap_or(false) {
body.pop();
trailing_blanks += 1;
}
let mut text = if style == b'|' {
body.join("\n")
} else {
fold_lines(&body)
};
match chomp {
Some('-') => {} Some('+') => {
text.push('\n');
for _ in 0..trailing_blanks {
text.push('\n');
}
}
_ => {
if !text.is_empty() || trailing_blanks > 0 {
text.push('\n');
}
}
}
Ok(Value::String(text))
}
}
fn fold_lines(lines: &[String]) -> String {
let mut out = String::new();
let mut prev_nonempty = false;
for line in lines {
if line.is_empty() {
out.push('\n');
prev_nonempty = false;
} else {
if prev_nonempty {
out.push(' ');
}
out.push_str(line);
prev_nonempty = true;
}
}
out
}
fn indent_of(line: &str) -> Option<usize> {
let mut n = 0;
for c in line.chars() {
match c {
' ' => n += 1,
'\t' => return None,
_ => break,
}
}
Some(n)
}
fn split_key_value(s: &str) -> Option<(String, Option<String>)> {
let chars: Vec<char> = s.chars().collect();
let mut i = 0;
let mut quote: Option<char> = None;
let mut depth: i32 = 0;
while i < chars.len() {
let c = chars[i];
if let Some(q) = quote {
if q == '"' && c == '\\' {
i += 2;
continue;
}
if c == q {
if q == '\'' && chars.get(i + 1) == Some(&'\'') {
i += 2;
continue;
}
quote = None;
}
i += 1;
continue;
}
match c {
'\'' | '"' => quote = Some(c),
'[' | '{' => depth += 1,
']' | '}' => {
if depth > 0 {
depth -= 1;
}
}
'#' if depth == 0 && i > 0 && (chars[i - 1] == ' ' || chars[i - 1] == '\t') => {
break; }
':' if depth == 0 => {
let next = chars.get(i + 1).copied();
if next.is_none() || next == Some(' ') || next == Some('\t') {
let key: String = chars[..i].iter().collect();
let rest: String = chars[i + 1..].iter().collect();
let rest = rest.trim();
let rest_opt = if rest.is_empty() || rest.starts_with('#') {
None
} else {
Some(rest.to_string())
};
return Some((key.trim().to_string(), rest_opt));
}
}
_ => {}
}
i += 1;
}
None
}
fn parse_inline_value(s: &str, line: usize) -> Result<Value, YamlError> {
let t = s.trim();
if t.starts_with('[') || t.starts_with('{') {
let mut fp = FlowParser {
chars: t.chars().collect(),
pos: 0,
line,
};
let v = fp.parse_value()?;
fp.skip_ws();
if fp.pos < fp.chars.len() && fp.chars[fp.pos] != '#' {
return Err(YamlError {
line: line + 1,
message: "unexpected content after flow collection".into(),
});
}
Ok(v)
} else {
parse_scalar(t, line)
}
}
fn parse_scalar(token: &str, line: usize) -> Result<Value, YamlError> {
let t = token.trim();
if t.is_empty() {
return Ok(Value::Null);
}
if t.starts_with('"') {
return parse_double_quoted(t, line).map(Value::String);
}
if t.starts_with('\'') {
return parse_single_quoted(t, line).map(Value::String);
}
let plain = strip_trailing_comment(t);
Ok(interpret_plain(plain))
}
fn strip_trailing_comment(s: &str) -> &str {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'#' && i > 0 && (bytes[i - 1] == b' ' || bytes[i - 1] == b'\t') {
return s[..i].trim_end();
}
i += 1;
}
s.trim_end()
}
fn interpret_plain(s: &str) -> Value {
match s {
"" | "~" | "null" | "Null" | "NULL" => return Value::Null,
"true" | "True" | "TRUE" => return Value::Bool(true),
"false" | "False" | "FALSE" => return Value::Bool(false),
".inf" | ".Inf" | ".INF" | "+.inf" => return Value::Float(f64::INFINITY),
"-.inf" | "-.Inf" | "-.INF" => return Value::Float(f64::NEG_INFINITY),
".nan" | ".NaN" | ".NAN" => return Value::Float(f64::NAN),
_ => {}
}
if is_canonical_int(s) {
if let Ok(i) = s.parse::<i64>() {
return Value::Int(i);
}
}
if is_canonical_float(s) {
if let Ok(f) = s.parse::<f64>() {
return Value::Float(f);
}
}
Value::String(s.to_string())
}
fn is_canonical_int(s: &str) -> bool {
let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
if digits.is_empty() || !digits.bytes().all(|b| b.is_ascii_digit()) {
return false;
}
digits == "0" || !digits.starts_with('0')
}
fn is_canonical_float(s: &str) -> bool {
if !s.contains('.') || !s.bytes().any(|b| b.is_ascii_digit()) {
return false;
}
s.parse::<f64>().is_ok()
}
fn parse_double_quoted(s: &str, line: usize) -> Result<String, YamlError> {
let chars: Vec<char> = s.chars().collect();
debug_assert_eq!(chars[0], '"');
let mut out = String::new();
let mut i = 1;
while i < chars.len() {
let c = chars[i];
if c == '"' {
return Ok(out);
}
if c == '\\' {
i += 1;
let e = *chars.get(i).ok_or_else(|| YamlError {
line: line + 1,
message: "dangling escape in double-quoted string".into(),
})?;
match e {
'n' => out.push('\n'),
't' => out.push('\t'),
'r' => out.push('\r'),
'"' => out.push('"'),
'\\' => out.push('\\'),
'/' => out.push('/'),
'0' => out.push('\0'),
'b' => out.push('\u{0008}'),
'f' => out.push('\u{000C}'),
'u' => {
let hex: String = chars[i + 1..(i + 5).min(chars.len())].iter().collect();
if hex.len() == 4 {
if let Ok(cp) = u32::from_str_radix(&hex, 16) {
if let Some(ch) = char::from_u32(cp) {
out.push(ch);
}
}
i += 4;
}
}
other => out.push(other),
}
i += 1;
continue;
}
out.push(c);
i += 1;
}
Err(YamlError {
line: line + 1,
message: "unterminated double-quoted string".into(),
})
}
fn parse_single_quoted(s: &str, line: usize) -> Result<String, YamlError> {
let chars: Vec<char> = s.chars().collect();
debug_assert_eq!(chars[0], '\'');
let mut out = String::new();
let mut i = 1;
while i < chars.len() {
let c = chars[i];
if c == '\'' {
if chars.get(i + 1) == Some(&'\'') {
out.push('\'');
i += 2;
continue;
}
return Ok(out);
}
out.push(c);
i += 1;
}
Err(YamlError {
line: line + 1,
message: "unterminated single-quoted string".into(),
})
}
struct FlowParser {
chars: Vec<char>,
pos: usize,
line: usize,
}
impl FlowParser {
fn skip_ws(&mut self) {
while self.pos < self.chars.len() && self.chars[self.pos].is_whitespace() {
self.pos += 1;
}
}
fn err(&self, msg: impl Into<String>) -> YamlError {
YamlError {
line: self.line + 1,
message: msg.into(),
}
}
fn parse_value(&mut self) -> Result<Value, YamlError> {
self.skip_ws();
match self.chars.get(self.pos) {
Some('[') => self.parse_seq(),
Some('{') => self.parse_map(),
Some(_) => self.parse_flow_scalar(),
None => Ok(Value::Null),
}
}
fn parse_seq(&mut self) -> Result<Value, YamlError> {
self.pos += 1; let mut seq = Vec::new();
loop {
self.skip_ws();
match self.chars.get(self.pos) {
Some(']') => {
self.pos += 1;
break;
}
None => return Err(self.err("unterminated flow sequence")),
_ => {}
}
seq.push(self.parse_value()?);
self.skip_ws();
match self.chars.get(self.pos) {
Some(',') => self.pos += 1,
Some(']') => {
self.pos += 1;
break;
}
_ => return Err(self.err("expected ',' or ']' in flow sequence")),
}
}
Ok(Value::Sequence(seq))
}
fn parse_map(&mut self) -> Result<Value, YamlError> {
self.pos += 1; let mut map = Mapping::new();
loop {
self.skip_ws();
match self.chars.get(self.pos) {
Some('}') => {
self.pos += 1;
break;
}
None => return Err(self.err("unterminated flow mapping")),
_ => {}
}
let key = self.parse_flow_scalar()?;
self.skip_ws();
if self.chars.get(self.pos) != Some(&':') {
return Err(self.err("expected ':' in flow mapping"));
}
self.pos += 1;
let value = self.parse_value()?;
map.push_raw(key, value);
self.skip_ws();
match self.chars.get(self.pos) {
Some(',') => self.pos += 1,
Some('}') => {
self.pos += 1;
break;
}
_ => return Err(self.err("expected ',' or '}' in flow mapping")),
}
}
Ok(Value::Mapping(map))
}
fn parse_flow_scalar(&mut self) -> Result<Value, YamlError> {
self.skip_ws();
let c = *self.chars.get(self.pos).ok_or_else(|| self.err("expected scalar"))?;
if c == '"' || c == '\'' {
let start = self.pos;
self.pos += 1;
while self.pos < self.chars.len() {
let cur = self.chars[self.pos];
if c == '"' && cur == '\\' {
self.pos += 2;
continue;
}
if cur == c {
if c == '\'' && self.chars.get(self.pos + 1) == Some(&'\'') {
self.pos += 2;
continue;
}
self.pos += 1;
break;
}
self.pos += 1;
}
let raw: String = self.chars[start..self.pos].iter().collect();
let s = if c == '"' {
parse_double_quoted(&raw, self.line)?
} else {
parse_single_quoted(&raw, self.line)?
};
return Ok(Value::String(s));
}
let start = self.pos;
while self.pos < self.chars.len() {
match self.chars[self.pos] {
',' | ':' | ']' | '}' => break,
_ => self.pos += 1,
}
}
let raw: String = self.chars[start..self.pos].iter().collect();
Ok(interpret_plain(raw.trim()))
}
}