mod block_scalar;
mod cursor;
mod escape;
mod flow;
mod block_map;
mod scalar;
mod block_seq;
use std::{borrow::Cow, collections::HashMap};
use crate::{
Result, Value,
patterns::resolve_scalar,
value::{apply_tag, resolve_merge_keys},
};
pub struct Parser<'a> {
pub(super) src: &'a str,
pub(super) pos: usize,
pub(super) line: usize,
pub(super) col: usize,
pub(super) anchors: HashMap<&'a str, crate::Value<'a>>,
}
impl<'a> Parser<'a> {
pub fn new(src: &'a str) -> Self {
let src = src.strip_prefix('\u{FEFF}').unwrap_or(src);
Self {
src,
pos: 0,
line: 1,
col: 1,
anchors: HashMap::new(),
}
}
pub fn parse_all(&mut self) -> Result<Vec<Value<'a>>> {
let mut docs = Vec::new();
loop {
self.skip_blank_and_comment_lines();
self.skip_directives();
if self.at_eof() {
break;
}
if self.at_doc_marker(b"---") {
self.consume_doc_marker(3);
}
self.anchors.clear();
let doc = resolve_merge_keys(self.parse_node(0)?);
docs.push(doc);
self.skip_blank_and_comment_lines();
if self.at_doc_marker(b"...") {
self.consume_doc_marker(3);
}
}
Ok(docs)
}
pub fn parse(&mut self) -> Result<Value<'a>> {
let mut docs = self.parse_all()?;
match docs.len() {
0 => Ok(Value::Null),
1 => Ok(docs.pop().unwrap()),
n => Err(self.err(format!(
"expected single document, got {n}"
))),
}
}
fn skip_directives(&mut self) {
while self.peek() == Some(b'%') {
self.skip_to_next_line();
self.skip_blank_and_comment_lines();
}
}
fn consume_doc_marker(&mut self, len: usize) {
for _ in 0..len {
self.advance();
}
self.skip_spaces();
self.consume_one_line_break();
}
pub fn parse_node(&mut self, min_indent: usize) -> Result<Value<'a>> {
if self.at_doc_marker(b"---") || self.at_doc_marker(b"...") {
return Ok(Value::Null);
}
let indent = if self.at_line_end() {
self.skip_blank_and_comment_lines();
if self.at_eof() {
return Ok(Value::Null);
}
if self.at_doc_marker(b"---") || self.at_doc_marker(b"...") {
return Ok(Value::Null);
}
let indent = self.current_indent()?;
if indent < min_indent {
return Ok(Value::Null);
}
for _ in 0..indent {
self.advance();
}
indent
} else {
self.col.saturating_sub(1)
};
let mut tag: Option<Cow<'_, str>> = None;
let mut anchor: Option<&'a str> = None;
loop {
let mut consumed = false;
if tag.is_none()
&& let Some(t) = self.try_consume_tag()?
{
tag = Some(t);
consumed = true;
}
if anchor.is_none()
&& let Some(a) = self.try_consume_anchor()?
{
anchor = Some(a);
consumed = true;
}
if !consumed {
break;
}
}
let value = if (tag.is_some() || anchor.is_some()) && self.at_line_end() {
self.parse_node(min_indent)?
} else {
self.dispatch(indent)?
};
let value = match tag {
Some(t) => apply_tag(t, value),
None => value,
};
if let Some(name) = anchor {
self.anchors.insert(name, value.clone());
}
Ok(value)
}
fn dispatch(&mut self, indent: usize) -> Result<Value<'a>> {
match self.peek() {
None => Ok(Value::Null),
Some(b'[') => self.parse_flow_seq(),
Some(b'{') => self.parse_flow_map(),
Some(b'|') => self.parse_block_scalar(),
Some(b'>') => self.parse_block_scalar(),
Some(b'*') => self.parse_alias(),
Some(b'-') if self.is_seq_dash() => self.parse_block_seq(indent),
_ => self.parse_scalar_or_map(indent),
}
}
fn try_consume_anchor(&mut self) -> Result<Option<&'a str>> {
if self.peek() != Some(b'&') {
return Ok(None);
}
self.advance();
let start = self.pos;
while let Some(b) = self.peek() {
if matches!(
b,
b' ' | b'\t' | b'\n' | b'\r' | b',' | b'[' | b']' | b'{' | b'}'
) {
break;
}
self.advance();
}
if self.pos == start {
return Err(self.err("empty anchor name"));
}
let name = &self.src[start..self.pos];
self.skip_spaces();
Ok(Some(name))
}
fn parse_alias(&mut self) -> Result<Value<'a>> {
self.advance();
let start = self.pos;
while let Some(b) = self.peek() {
if matches!(
b,
b' ' | b'\t' | b'\n' | b'\r' | b',' | b'[' | b']' | b'{' | b'}'
) {
break;
}
self.advance();
}
if self.pos == start {
return Err(self.err("empty alias name"));
}
let name = &self.src[start..self.pos];
match self.anchors.get(name) {
Some(v) => Ok(v.clone()),
None => Err(self.err(format!("unknown anchor: '{name}'"))),
}
}
fn try_consume_tag(&mut self) -> Result<Option<Cow<'a, str>>> {
if self.peek() != Some(b'!') {
return Ok(None);
}
let start = self.pos;
self.advance();
if self.peek() == Some(b'<') {
self.advance();
while let Some(b) = self.peek() {
self.advance();
if b == b'>' {
break;
}
}
} else {
while let Some(b) = self.peek() {
if matches!(
b,
b' ' | b'\t' | b'\n' | b'\r' | b',' | b'[' | b']' | b'{' | b'}'
) {
break;
}
self.advance();
}
}
let tag = Cow::Borrowed(&self.src[start..self.pos]);
self.skip_spaces();
Ok(Some(tag))
}
fn is_seq_dash(&self) -> bool {
matches!(
self.peek_at(self.pos + 1),
None | Some(b' ' | b'\t' | b'\n' | b'\r')
)
}
fn parse_scalar_token(&mut self) -> Result<Value<'a>> {
use Value::*;
match self.peek() {
Some(b'"') => Ok(String(
self.parse_double_quoted(self.col.saturating_sub(1))?,
)),
Some(b'\'') => Ok(String(
self.parse_single_quoted(self.col.saturating_sub(1))?,
)),
_ => {
let s = self.parse_plain_scalar(false);
Ok(resolve_scalar(s))
}
}
}
}
#[inline(always)]
fn line_end(b: u8) -> bool {
matches!(b, b'\n' | b'\r')
}
#[inline(always)]
fn whitespace(b: u8) -> bool {
matches!(b, b' ' | b'\t')
}
#[inline(always)]
fn space(b: u8) -> bool {
matches!(b, b' ')
}
#[inline(always)]
fn tab(b: u8) -> bool {
matches!(b, b'\t')
}
fn trim_trailing_whitespace_end(bytes: &[u8]) -> usize {
let mut n = bytes.len();
while n > 0 && matches!(bytes[n - 1], b' ' | b'\t') {
n -= 1
}
n
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(src: &str) -> Value<'_> {
Parser::new(src).parse().unwrap()
}
#[test]
fn standard_str_tag_drops_wrapper() {
let v = parse("!!str 42\n");
assert!(matches!(v, Value::String(s) if s == "42"));
}
#[test]
fn standard_int_tag_coerces_quoted() {
let v = parse("!!int \"5\"\n");
assert!(matches!(v, Value::UInt(5)));
}
#[test]
fn standard_int_tag_accepts_hex() {
let v = parse("!!int 0xff\n");
assert!(matches!(v, Value::UInt(255)));
}
#[test]
fn standard_float_tag_promotes_int() {
let v = parse("!!float 7\n");
assert!(matches!(v, Value::Float(f) if f == 7.0));
}
#[test]
fn standard_null_tag_drops_inner() {
let v = parse("!!null whatever\n");
assert!(matches!(v, Value::Null));
}
#[test]
fn standard_bool_tag_rejects_yaml1_1_spelling() {
let v = parse("!!bool YES\n");
assert!(matches!(v, Value::String(s) if s == "YES"));
}
#[test]
fn custom_tag_wraps() {
let v = parse("!myapp/Thing foo\n");
match v {
Value::Tagged(tag, inner) => {
assert_eq!(tag, "!myapp/Thing");
assert!(matches!(*inner, Value::String(s) if s == "foo"));
}
other => panic!("expected Tagged, got {other:?}"),
}
}
#[test]
fn verbatim_tag_wraps() {
let v = parse("!<tag:example.com,2026:x> 5\n");
match v {
Value::Tagged(tag, inner) => {
assert_eq!(tag, "!<tag:example.com,2026:x>");
assert!(matches!(*inner, Value::UInt(5)));
}
other => panic!("expected Tagged, got {other:?}"),
}
}
#[test]
fn local_tag_short_form() {
let v = parse("!local foo\n");
match v {
Value::Tagged(tag, inner) => {
assert_eq!(tag, "!local");
assert!(matches!(*inner, Value::String(s) if s == "foo"));
}
other => panic!("expected Tagged, got {other:?}"),
}
}
#[test]
fn custom_tag_on_block_seq() {
let v = parse("!mytag\n- a\n- b\n");
match v {
Value::Tagged(tag, inner) => {
assert_eq!(tag, "!mytag");
match *inner {
Value::Seq(items) => {
assert_eq!(items.len(), 2);
assert!(matches!(&items[0], Value::String(s) if s == "a"));
assert!(matches!(&items[1], Value::String(s) if s == "b"));
}
other => panic!("expected Seq, got {other:?}"),
}
}
other => panic!("expected Tagged, got {other:?}"),
}
}
#[test]
fn custom_tag_on_block_map() {
let v = parse("!mytag\nk: v\n");
match v {
Value::Tagged(tag, inner) => {
assert_eq!(tag, "!mytag");
match *inner {
Value::Map(pairs) => {
assert_eq!(pairs.len(), 1);
assert!(matches!(&pairs[0].0, Value::String(s) if s == "k"));
assert!(matches!(&pairs[0].1, Value::String(s) if s == "v"));
}
other => panic!("expected Map, got {other:?}"),
}
}
other => panic!("expected Tagged, got {other:?}"),
}
}
#[test]
fn no_tag_parses_plain() {
let v = parse("42\n");
assert!(matches!(v, Value::UInt(42)));
}
fn map_pairs(v: Value<'_>) -> Vec<(Value<'_>, Value<'_>)> {
match v {
Value::Map(p) => p,
other => panic!("expected Map, got {other:?}"),
}
}
#[test]
fn anchor_then_alias_scalar() {
let pairs = map_pairs(parse("a: &id 42\nb: *id\n"));
assert_eq!(pairs.len(), 2);
assert!(matches!(&pairs[0].1, Value::UInt(42)));
assert!(matches!(&pairs[1].1, Value::UInt(42)));
}
#[test]
fn alias_on_block_seq() {
let src = "list: &l\n - a\n - b\ncopy: *l\n";
let pairs = map_pairs(parse(src));
assert_eq!(pairs.len(), 2);
let seq_eq = |v: &Value<'_>| match v {
Value::Seq(items) => {
items.len() == 2
&& matches!(&items[0], Value::String(s) if s == "a")
&& matches!(&items[1], Value::String(s) if s == "b")
}
_ => false,
};
assert!(seq_eq(&pairs[0].1), "list");
assert!(seq_eq(&pairs[1].1), "copy");
}
#[test]
fn alias_on_block_map() {
let src = "base: &b\n name: foo\n port: 80\noverride: *b\n";
let pairs = map_pairs(parse(src));
assert_eq!(pairs.len(), 2);
assert_eq!(&pairs[0].1, &pairs[1].1);
}
#[test]
fn unknown_alias_errors() {
let result = Parser::new("a: *missing\n").parse_node(0);
assert!(result.is_err());
}
#[test]
fn empty_anchor_errors() {
let result = Parser::new("a: & 42\n").parse_node(0);
assert!(result.is_err());
}
#[test]
fn anchor_then_tag_order() {
let pairs = map_pairs(parse("a: &id !!str 42\nb: *id\n"));
assert!(matches!(&pairs[0].1, Value::String(s) if s == "42"));
assert!(matches!(&pairs[1].1, Value::String(s) if s == "42"));
}
#[test]
fn tag_then_anchor_order() {
let pairs = map_pairs(parse("a: !!str &id 42\nb: *id\n"));
assert!(matches!(&pairs[0].1, Value::String(s) if s == "42"));
assert!(matches!(&pairs[1].1, Value::String(s) if s == "42"));
}
#[test]
fn anchor_custom_tag_preserved() {
let pairs = map_pairs(parse("a: &id !myapp/T foo\nb: *id\n"));
for (_, v) in &pairs {
match v {
Value::Tagged(tag, inner) => {
assert_eq!(tag, "!myapp/T");
assert!(matches!(inner.as_ref(), Value::String(s) if s == "foo"));
}
other => panic!("expected Tagged, got {other:?}"),
}
}
}
#[test]
fn reanchor_latest_wins() {
let src = "first: &x 1\nsecond: &x 2\nthird: *x\n";
let pairs = map_pairs(parse(src));
assert!(matches!(&pairs[2].1, Value::UInt(2)));
}
#[test]
fn alias_value_is_independent_clone() {
let pairs = map_pairs(parse("a: &id foo\nb: *id\n"));
assert_eq!(&pairs[0].1, &pairs[1].1);
}
#[test]
fn anchor_on_own_line() {
let src = "a: &id\n nested: 1\nb: *id\n";
let pairs = map_pairs(parse(src));
assert_eq!(pairs.len(), 2);
assert_eq!(&pairs[0].1, &pairs[1].1);
assert!(matches!(&pairs[0].1, Value::Map(_)));
}
fn parse_all(src: &str) -> Vec<Value<'_>> {
Parser::new(src).parse_all().unwrap()
}
fn parse_one(src: &str) -> Value<'_> {
Parser::new(src).parse().unwrap()
}
#[test]
fn stream_single_implicit_doc() {
let docs = parse_all("a: 1\nb: 2\n");
assert_eq!(docs.len(), 1);
assert!(matches!(&docs[0], Value::Map(p) if p.len() == 2));
}
#[test]
fn stream_single_explicit_doc() {
let docs = parse_all("---\na: 1\n");
assert_eq!(docs.len(), 1);
assert!(matches!(&docs[0], Value::Map(p) if p.len() == 1));
}
#[test]
fn stream_two_explicit_docs() {
let docs = parse_all("---\na: 1\n---\nb: 2\n");
assert_eq!(docs.len(), 2);
assert!(matches!(&docs[0], Value::Map(p) if matches!(&p[0].0, Value::String(s) if s == "a")));
assert!(matches!(&docs[1], Value::Map(p) if matches!(&p[0].0, Value::String(s) if s == "b")));
}
#[test]
fn stream_kubectl_style() {
let src = "\
---
apiVersion: v1
kind: Pod
---
apiVersion: v1
kind: Service
";
let docs = parse_all(src);
assert_eq!(docs.len(), 2);
for d in &docs {
assert!(matches!(d, Value::Map(p) if p.len() == 2));
}
}
#[test]
fn stream_with_end_markers() {
let docs = parse_all("---\na: 1\n...\n---\nb: 2\n...\n");
assert_eq!(docs.len(), 2);
}
#[test]
fn stream_empty() {
assert!(parse_all("").is_empty());
}
#[test]
fn stream_only_markers() {
let docs = parse_all("---\n---\nb: 2\n");
assert_eq!(docs.len(), 2);
assert!(matches!(&docs[0], Value::Null));
assert!(matches!(&docs[1], Value::Map(_)));
}
#[test]
fn stream_directives_skipped() {
let docs = parse_all("%YAML 1.2\n---\na: 1\n");
assert_eq!(docs.len(), 1);
}
#[test]
fn stream_multiple_directives() {
let docs = parse_all("%YAML 1.2\n%TAG !foo! tag:example.com,2026:\n---\na: 1\n");
assert_eq!(docs.len(), 1);
}
#[test]
fn parse_single_succeeds_on_one() {
let v = parse_one("a: 1\n");
assert!(matches!(v, Value::Map(p) if p.len() == 1));
}
#[test]
fn parse_single_errors_on_multi() {
let result = Parser::new("---\na: 1\n---\nb: 2\n").parse();
assert!(result.is_err());
}
#[test]
fn parse_single_on_empty() {
assert!(matches!(parse_one(""), Value::Null));
}
#[test]
fn anchors_reset_between_docs() {
let result = Parser::new("---\nbase: &x 1\n---\nuse: *x\n").parse_all();
assert!(result.is_err());
}
#[test]
fn triple_dash_in_scalar_is_not_marker() {
let docs = parse_all("key: '---'\n");
assert_eq!(docs.len(), 1);
let pairs = match &docs[0] {
Value::Map(p) => p,
_ => panic!(),
};
assert!(matches!(&pairs[0].1, Value::String(s) if s == "---"));
}
#[test]
fn seq_dash_not_confused_with_marker() {
let docs = parse_all("- item\n- item2\n");
assert_eq!(docs.len(), 1);
assert!(matches!(&docs[0], Value::Seq(items) if items.len() == 2));
}
fn keys_of(v: &Value<'_>) -> Vec<String> {
match v {
Value::Map(pairs) => pairs
.iter()
.map(|(k, _)| match k {
Value::String(s) => s.to_string(),
other => format!("{other:?}"),
})
.collect(),
_ => panic!("expected Map"),
}
}
fn get<'a, 'b>(v: &'a Value<'b>, key: &str) -> &'a Value<'b> {
match v {
Value::Map(pairs) => pairs
.iter()
.find_map(|(k, val)| match k {
Value::String(s) if s == key => Some(val),
_ => None,
})
.unwrap_or_else(|| panic!("missing key {key}")),
_ => panic!("expected Map"),
}
}
#[test]
fn merge_simple_splice() {
let src = "\
defaults: &d
port: 80
host: localhost
service:
<<: *d
port: 443
";
let pairs = map_pairs(parse(src));
let service = &pairs[1].1;
assert!(matches!(get(service, "port"), Value::UInt(443)));
assert!(matches!(get(service, "host"), Value::String(s) if s == "localhost"));
assert!(!keys_of(service).contains(&"<<".to_string()));
}
#[test]
fn merge_parent_wins_over_source() {
let src = "\
d: &d
shared: from_default
target:
<<: *d
shared: overridden
";
let pairs = map_pairs(parse(src));
let target = &pairs[1].1;
assert!(matches!(get(target, "shared"), Value::String(s) if s == "overridden"));
}
#[test]
fn merge_seq_of_aliases_left_wins() {
let src = "\
a: &a
k: from_a
b: &b
k: from_b
extra: from_b
target:
<<: [*a, *b]
";
let pairs = map_pairs(parse(src));
let target = &pairs[2].1;
assert!(matches!(get(target, "k"), Value::String(s) if s == "from_a"));
assert!(matches!(get(target, "extra"), Value::String(s) if s == "from_b"));
}
#[test]
fn merge_no_merge_key_untouched() {
let pairs = map_pairs(parse("a: 1\nb: 2\n"));
assert_eq!(pairs.len(), 2);
assert!(!pairs.iter().any(|(k, _)| matches!(k, Value::String(s) if s == "<<")));
}
#[test]
fn merge_non_map_value_dropped() {
let pairs = map_pairs(parse("k: v\n'<<': 42\n"));
let keys: Vec<String> = pairs.iter().filter_map(|(k, _)| match k {
Value::String(s) => Some(s.to_string()),
_ => None,
}).collect();
assert!(!keys.contains(&"<<".to_string()));
assert!(keys.contains(&"k".to_string()));
}
#[test]
fn merge_inside_seq() {
let src = "\
d: &d
shared: yes
items:
- <<: *d
own: a
- <<: *d
own: b
";
let pairs = map_pairs(parse(src));
let items = match &pairs[1].1 {
Value::Seq(s) => s,
_ => panic!(),
};
for (i, item) in items.iter().enumerate() {
assert!(matches!(get(item, "shared"), Value::String(s) if s == "yes"));
let own = match get(item, "own") {
Value::String(s) => s.as_ref(),
_ => panic!(),
};
assert_eq!(own, if i == 0 { "a" } else { "b" });
}
}
#[test]
fn merge_nested_resolves_bottom_up() {
let src = "\
other: &other
z: from_other
inner: &inner
k: v
<<: *other
outer:
<<: *inner
";
let pairs = map_pairs(parse(src));
let outer = &pairs[2].1;
assert!(matches!(get(outer, "k"), Value::String(s) if s == "v"));
assert!(matches!(get(outer, "z"), Value::String(s) if s == "from_other"));
assert!(!keys_of(outer).contains(&"<<".to_string()));
}
#[test]
fn merge_inside_tagged_value() {
let src = "\
d: &d
x: 1
wrapped: !mytag
<<: *d
y: 2
";
let pairs = map_pairs(parse(src));
let wrapped = &pairs[1].1;
match wrapped {
Value::Tagged(tag, inner) => {
assert_eq!(tag, "!mytag");
assert!(matches!(get(inner, "x"), Value::UInt(1)));
assert!(matches!(get(inner, "y"), Value::UInt(2)));
}
other => panic!("expected Tagged, got {other:?}"),
}
}
#[test]
fn bom_stripped_simple() {
let pairs = map_pairs(parse("\u{FEFF}a: 1\n"));
assert!(matches!(&pairs[0].0, Value::String(s) if s == "a"));
assert!(matches!(&pairs[0].1, Value::UInt(1)));
}
#[test]
fn bom_empty_stream() {
let v = Parser::new("\u{FEFF}").parse().unwrap();
assert!(matches!(v, Value::Null));
}
#[test]
fn bom_with_doc_marker() {
let docs = parse_all("\u{FEFF}---\nkind: Pod\n");
assert_eq!(docs.len(), 1);
}
#[test]
fn bom_in_middle_preserved() {
let pairs = map_pairs(parse("key: \"a\u{FEFF}b\"\n"));
let v = match &pairs[0].1 {
Value::String(s) => s.as_ref(),
_ => panic!(),
};
assert_eq!(v, "a\u{FEFF}b");
}
#[test]
fn merge_compose_style() {
let src = "\
x-defaults: &defaults
restart: always
logging:
driver: json-file
services:
web:
<<: *defaults
image: nginx
api:
<<: *defaults
image: api:latest
restart: on-failure
";
let pairs = map_pairs(parse(src));
let services = &pairs[1].1;
let web = get(services, "web");
let api = get(services, "api");
assert!(matches!(get(web, "image"), Value::String(s) if s == "nginx"));
assert!(matches!(get(web, "restart"), Value::String(s) if s == "always"));
assert!(matches!(get(api, "restart"), Value::String(s) if s == "on-failure")); }
}