use crate::error::{CompoundKind, Error, ErrorKind, Span};
use crate::value::{ObjectMap, Value};
use super::bracket::Bracket;
use super::classify::{classify_value_start, validate_typed_float, validate_typed_integer};
use super::collecting::{Collecting, MultilineMode};
use super::frame::Frame;
use super::insert::insert_value;
use super::value_start::ValueStart;
pub(super) struct Parser<'a> {
stack: Vec<Frame<'a>>,
collecting: Option<Collecting<'a>>,
opener_offsets: Vec<u32>,
multiline_opener: Option<u32>,
root_initialized: bool,
}
impl<'a> Parser<'a> {
pub(super) fn new() -> Self {
Self {
stack: Vec::with_capacity(8),
collecting: None,
opener_offsets: Vec::with_capacity(8),
multiline_opener: None,
root_initialized: false,
}
}
pub(super) fn finish(mut self, eof_offset: u32) -> Result<Value, Error> {
if let Some(c) = &self.collecting {
let kind = match c.mode {
MultilineMode::Stripped => CompoundKind::MultilineStripped,
MultilineMode::Verbatim => CompoundKind::MultilineVerbatim,
};
let start = self.multiline_opener.unwrap_or(eof_offset);
return Err(Error::Structured(ErrorKind::UnclosedCompound {
kind,
span: Span::new(start, eof_offset),
}));
}
if self.stack.len() > 1 {
let kind = match self.stack.last().unwrap() {
Frame::Object { .. } => CompoundKind::Object,
Frame::Array { .. } => CompoundKind::Array,
};
let start = *self.opener_offsets.last().unwrap();
return Err(Error::Structured(ErrorKind::UnclosedCompound {
kind,
span: Span::new(start, eof_offset),
}));
}
if self.stack.is_empty() {
return Ok(Value::Object(crate::value::ObjectMap::default()));
}
Ok(self.stack.pop().unwrap().into_value())
}
pub(super) fn handle_line(
&mut self,
raw: &'a str,
line_num: usize,
line_start: u32,
) -> Result<(), Error> {
if let Some(ref mut collecting) = self.collecting {
let trimmed = raw.trim();
if collecting.is_terminator(trimmed) {
let finished = self.collecting.take().unwrap().finish();
self.multiline_opener = None;
return self.attach_scalar_value(
Value::String(finished.into()),
line_num,
line_start,
);
}
collecting.lines.push(raw);
return Ok(());
}
let trimmed = raw.trim();
if trimmed.is_empty() || trimmed.starts_with('#') {
return Ok(());
}
let trimmed_span = trimmed_span_in(raw, trimmed, line_start);
if !self.root_initialized {
self.root_initialized = true;
if trimmed != "}" && trimmed != "]" {
if classify_root_kind(trimmed) == RootKind::Object {
self.stack.push(Frame::new_object());
} else {
self.stack.push(Frame::new_array());
}
self.opener_offsets.push(0);
}
}
if trimmed == "}" {
return self.close_frame(Bracket::Object, line_num, trimmed_span);
}
if trimmed == "]" {
return self.close_frame(Bracket::Array, line_num, trimmed_span);
}
if matches!(self.stack.last(), Some(Frame::Array { .. })) {
self.handle_array_item(trimmed, line_num, trimmed_span)
} else {
self.handle_object_pair(raw, trimmed, line_num, line_start, trimmed_span)
}
}
fn attach_scalar_value(
&mut self,
value: Value,
line_num: usize,
line_start: u32,
) -> Result<(), Error> {
match self.stack.last_mut().unwrap() {
Frame::Object {
pairs,
pending_key,
pending_key_span,
} => {
let key = pending_key.take().ok_or_else(|| {
Error::Structured(ErrorKind::Other {
line: Some(line_num as u32),
message: format!(
"Line {}: internal error \u{2014} multi-line string closed without pending key",
line_num
),
span: Span::new(line_start, line_start),
})
})?;
let key_span = pending_key_span
.take()
.unwrap_or_else(|| Span::new(line_start, line_start));
insert_value(pairs, key, value, line_num, key_span)
}
Frame::Array { items } => {
items.push(value);
Ok(())
}
}
}
fn handle_object_pair(
&mut self,
raw: &'a str,
line: &'a str,
line_num: usize,
line_start: u32,
trimmed_span: Span,
) -> Result<(), Error> {
let trimmed_off_in_raw = (trimmed_span.start - line_start) as usize;
let colon = match line.find(':') {
Some(c) => c,
None => {
return Err(Error::Structured(ErrorKind::MissingSeparator {
line: line_num as u32,
span: trimmed_span,
}));
}
};
let key = line[..colon].trim_end();
let key_start = trimmed_span.start; let key_end = key_start + key.len() as u32;
let _ = raw; if key.is_empty() {
return Err(Error::Structured(ErrorKind::EmptyKey {
line: line_num as u32,
span: Span::new(key_start, key_start + 1),
}));
}
let after_colon = &line[colon + 1..];
let after_colon_off_in_line = colon + 1;
let after_colon_off = line_start + (trimmed_off_in_raw + after_colon_off_in_line) as u32;
let sep = classify_separator(after_colon);
let marker_col = colon as u32;
match sep {
Separator::Raw(after) => {
require_sep_end(
after,
line_num,
line_start,
marker_col,
after_colon_off + 1,
trimmed_span,
)?;
let value = Value::String(after.trim().into());
self.insert_object_pair(key, value, line_num, Span::new(key_start, key_end))
}
Separator::TypedInteger(body) => {
let body_span = body_span_for(body, after_colon, after_colon_off, trimmed_span);
let s = validate_typed_integer(body, line_num, body_span)?;
self.insert_object_pair(
key,
Value::Integer(s),
line_num,
Span::new(key_start, key_end),
)
}
Separator::TypedFloat(body) => {
let body_span = body_span_for(body, after_colon, after_colon_off, trimmed_span);
let s = validate_typed_float(body, line_num, body_span)?;
self.insert_object_pair(
key,
Value::Float(s),
line_num,
Span::new(key_start, key_end),
)
}
Separator::Plain(after) => {
require_sep_end(
after,
line_num,
line_start,
marker_col,
after_colon_off,
trimmed_span,
)?;
let key_span = Span::new(key_start, key_end);
match classify_value_start(after, line_num, trimmed_span)? {
ValueStart::Scalar(s) => {
self.insert_object_pair(key, Value::String(s), line_num, key_span)
}
ValueStart::Null => {
self.insert_object_pair(key, Value::Null, line_num, key_span)
}
ValueStart::Bool(b) => {
self.insert_object_pair(key, Value::Bool(b), line_num, key_span)
}
ValueStart::EmptyObject => self.insert_object_pair(
key,
Value::Object(ObjectMap::default()),
line_num,
key_span,
),
ValueStart::EmptyArray => {
self.insert_object_pair(key, Value::Array(Vec::new()), line_num, key_span)
}
ValueStart::OpenObject => {
self.set_pending_key(key, line_num, line_start, key_span)?;
self.stack.push(Frame::new_object());
self.opener_offsets.push(trimmed_span.end - 1);
Ok(())
}
ValueStart::OpenArray => {
self.set_pending_key(key, line_num, line_start, key_span)?;
self.stack.push(Frame::new_array());
self.opener_offsets.push(trimmed_span.end - 1);
Ok(())
}
ValueStart::OpenMultilineStripped => {
self.set_pending_key(key, line_num, line_start, key_span)?;
self.collecting = Some(Collecting::new(MultilineMode::Stripped));
self.multiline_opener = Some(trimmed_span.end - 1);
Ok(())
}
ValueStart::OpenMultilineVerbatim => {
self.set_pending_key(key, line_num, line_start, key_span)?;
self.collecting = Some(Collecting::new(MultilineMode::Verbatim));
self.multiline_opener = Some(trimmed_span.end - 2);
Ok(())
}
}
}
}
}
fn handle_array_item(
&mut self,
line: &str,
line_num: usize,
trimmed_span: Span,
) -> Result<(), Error> {
let line_start = trimmed_span.start;
if let Some(rest) = line.strip_prefix("::") {
require_sep_end(rest, line_num, line_start, 1, line_start + 2, trimmed_span)?;
let value = Value::String(rest.trim_start().into());
return self.push_array_item(value);
}
if let Some(rest) = line.strip_prefix(":i") {
require_sep_end(rest, line_num, line_start, 0, line_start + 2, trimmed_span)?;
let body_span = Span::new(line_start + 2, trimmed_span.end);
let s = validate_typed_integer(rest, line_num, body_span)?;
return self.push_array_item(Value::Integer(s));
}
if let Some(rest) = line.strip_prefix(":f") {
require_sep_end(rest, line_num, line_start, 0, line_start + 2, trimmed_span)?;
let body_span = Span::new(line_start + 2, trimmed_span.end);
let s = validate_typed_float(rest, line_num, body_span)?;
return self.push_array_item(Value::Float(s));
}
match classify_value_start(line, line_num, trimmed_span)? {
ValueStart::Scalar(s) => self.push_array_item(Value::String(s)),
ValueStart::Null => self.push_array_item(Value::Null),
ValueStart::Bool(b) => self.push_array_item(Value::Bool(b)),
ValueStart::EmptyObject => self.push_array_item(Value::Object(ObjectMap::default())),
ValueStart::EmptyArray => self.push_array_item(Value::Array(Vec::new())),
ValueStart::OpenObject => {
self.stack.push(Frame::new_object());
self.opener_offsets.push(trimmed_span.end - 1);
Ok(())
}
ValueStart::OpenArray => {
self.stack.push(Frame::new_array());
self.opener_offsets.push(trimmed_span.end - 1);
Ok(())
}
ValueStart::OpenMultilineStripped => {
self.collecting = Some(Collecting::new(MultilineMode::Stripped));
self.multiline_opener = Some(trimmed_span.end - 1);
Ok(())
}
ValueStart::OpenMultilineVerbatim => {
self.collecting = Some(Collecting::new(MultilineMode::Verbatim));
self.multiline_opener = Some(trimmed_span.end - 2);
Ok(())
}
}
}
fn insert_object_pair(
&mut self,
key: &str,
value: Value,
line_num: usize,
key_span: Span,
) -> Result<(), Error> {
match self.stack.last_mut().unwrap() {
Frame::Object { pairs, .. } => insert_value(pairs, key, value, line_num, key_span),
Frame::Array { .. } => unreachable!("dispatched as object"),
}
}
fn push_array_item(&mut self, value: Value) -> Result<(), Error> {
match self.stack.last_mut().unwrap() {
Frame::Array { items } => {
items.push(value);
Ok(())
}
Frame::Object { .. } => unreachable!("dispatched as array"),
}
}
fn set_pending_key(
&mut self,
key: &'a str,
line_num: usize,
line_start: u32,
key_span: Span,
) -> Result<(), Error> {
match self.stack.last_mut().unwrap() {
Frame::Object {
pending_key,
pending_key_span,
..
} => {
if pending_key.is_some() {
return Err(Error::Structured(ErrorKind::Other {
line: Some(line_num as u32),
message: format!(
"Line {}: internal error \u{2014} pending key already set",
line_num
),
span: Span::new(line_start, line_start),
}));
}
*pending_key = Some(key);
*pending_key_span = Some(key_span);
Ok(())
}
_ => unreachable!(),
}
}
fn close_frame(
&mut self,
expected: Bracket,
line_num: usize,
trimmed_span: Span,
) -> Result<(), Error> {
if self.stack.len() <= 1 {
return Err(Error::Structured(ErrorKind::UnbalancedBracket {
line: line_num as u32,
span: trimmed_span,
expected: bracket_to_compound(expected),
found: expected.close(),
}));
}
let frame = self.stack.pop().unwrap();
let _ = self.opener_offsets.pop();
let frame_kind = match frame {
Frame::Object { .. } => Bracket::Object,
Frame::Array { .. } => Bracket::Array,
};
let matches_expected = matches!(
(frame_kind, expected),
(Bracket::Object, Bracket::Object) | (Bracket::Array, Bracket::Array)
);
if !matches_expected {
return Err(Error::Structured(ErrorKind::UnbalancedBracket {
line: line_num as u32,
span: trimmed_span,
expected: bracket_to_compound(frame_kind),
found: expected.close(),
}));
}
let value = frame.into_value();
self.attach_child_value(value, line_num, trimmed_span)
}
fn attach_child_value(
&mut self,
value: Value,
line_num: usize,
trimmed_span: Span,
) -> Result<(), Error> {
match self.stack.last_mut().unwrap() {
Frame::Object {
pairs,
pending_key,
pending_key_span,
} => {
let key = pending_key.take().ok_or_else(|| {
Error::Structured(ErrorKind::Other {
line: Some(line_num as u32),
message: format!(
"Line {}: internal error \u{2014} closed compound without pending key",
line_num
),
span: trimmed_span,
})
})?;
let key_span = pending_key_span.take().unwrap_or(trimmed_span);
insert_value(pairs, key, value, line_num, key_span)
}
Frame::Array { items } => {
items.push(value);
Ok(())
}
}
}
}
fn bracket_to_compound(b: Bracket) -> CompoundKind {
match b {
Bracket::Object => CompoundKind::Object,
Bracket::Array => CompoundKind::Array,
}
}
fn trimmed_span_in(raw: &str, trimmed: &str, line_start: u32) -> Span {
if trimmed.is_empty() {
return Span::new(line_start, line_start);
}
let raw_ptr = raw.as_ptr() as usize;
let trim_ptr = trimmed.as_ptr() as usize;
debug_assert!(trim_ptr >= raw_ptr && trim_ptr - raw_ptr <= raw.len());
let off = (trim_ptr - raw_ptr) as u32;
let start = line_start + off;
Span::new(start, start + trimmed.len() as u32)
}
fn body_span_for(body: &str, after_colon: &str, after_colon_off: u32, fallback: Span) -> Span {
if body.is_empty() {
let p = after_colon_off + after_colon.len() as u32;
return Span::new(p, p);
}
let after_ptr = after_colon.as_ptr() as usize;
let body_ptr = body.as_ptr() as usize;
if body_ptr >= after_ptr && body_ptr - after_ptr <= after_colon.len() {
let off = (body_ptr - after_ptr) as u32;
Span::new(
after_colon_off + off,
after_colon_off + off + body.len() as u32,
)
} else {
fallback
}
}
enum Separator<'a> {
Raw(&'a str),
TypedInteger(&'a str),
TypedFloat(&'a str),
Plain(&'a str),
}
fn require_sep_end(
rest: &str,
line_num: usize,
line_start: u32,
column: u32,
body_off: u32,
trimmed_span: Span,
) -> Result<(), Error> {
if rest.is_empty() || rest.starts_with(char::is_whitespace) {
Ok(())
} else {
let _ = line_start;
let span = Span::new(body_off, trimmed_span.end);
Err(Error::Structured(ErrorKind::MissingSeparatorSpace {
line: line_num as u32,
column,
marker: ':',
span,
}))
}
}
#[derive(PartialEq)]
pub(super) enum RootKind {
Object,
Array,
}
pub(super) fn classify_root_kind(trimmed: &str) -> RootKind {
let bytes = trimmed.as_bytes();
let Some(colon_idx) = bytes.iter().position(|&b| b == b':') else {
return RootKind::Array;
};
let key_part = trimmed[..colon_idx].trim_end();
if key_part.is_empty() {
return RootKind::Array;
}
let after = &trimmed[colon_idx + 1..];
let after_bytes = after.as_bytes();
if after_bytes.first() == Some(&b':') {
return RootKind::Object;
}
if matches!(after_bytes.first(), Some(&b'i') | Some(&b'f')) {
match after_bytes.get(1) {
None | Some(b' ') | Some(b'\t') => return RootKind::Object,
_ => {}
}
}
if after.is_empty() || after.starts_with([' ', '\t']) {
return RootKind::Object;
}
RootKind::Array
}
fn classify_separator(after_colon: &str) -> Separator<'_> {
if let Some(rest) = after_colon.strip_prefix(':') {
return Separator::Raw(rest);
}
if let Some(rest) = after_colon.strip_prefix('i') {
if rest.is_empty() || rest.starts_with(char::is_whitespace) {
return Separator::TypedInteger(rest);
}
}
if let Some(rest) = after_colon.strip_prefix('f') {
if rest.is_empty() || rest.starts_with(char::is_whitespace) {
return Separator::TypedFloat(rest);
}
}
Separator::Plain(after_colon)
}