use crate::error::{Error, Result};
use crate::parser::{Event, ParseConfig, Parser, ScalarStyle};
use crate::path::{parse_query_path, QuerySegment};
use crate::prelude::*;
use core::hash::{Hash, Hasher};
use indexmap::IndexMap;
use rustc_hash::{FxBuildHasher, FxHashMap};
use serde::Serialize;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum TransformReason {
EscapeSequence,
LineFold,
TagResolution,
QuotedScalar,
AliasExpansion,
}
impl TransformReason {
#[must_use]
pub fn as_str(self) -> &'static str {
match self {
Self::EscapeSequence => "scalar contained escape sequences that required decoding",
Self::LineFold => "scalar spans multiple lines and required line folding",
Self::TagResolution => "tag resolution materialised a fresh representation",
Self::QuotedScalar => "double-quoted scalar with escapes produced an owned buffer",
Self::AliasExpansion => "scalar arrived via alias replay (`*anchor`)",
}
}
}
impl core::fmt::Display for TransformReason {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BorrowedValue<'a> {
Null,
Bool(bool),
Number(crate::value::Number),
String(Cow<'a, str>),
Sequence(Vec<BorrowedValue<'a>>),
Mapping(IndexMap<Cow<'a, str>, BorrowedValue<'a>, FxBuildHasher>),
}
impl<'a> BorrowedValue<'a> {
#[must_use]
pub fn is_null(&self) -> bool {
matches!(self, Self::Null)
}
#[must_use]
pub fn as_str(&self) -> Option<&str> {
match self {
Self::String(s) => Some(s),
_ => None,
}
}
#[must_use]
pub fn as_i64(&self) -> Option<i64> {
match self {
Self::Number(n) => n.as_i64(),
_ => None,
}
}
#[must_use]
pub fn as_bool(&self) -> Option<bool> {
match self {
Self::Bool(b) => Some(*b),
_ => None,
}
}
#[must_use]
pub fn as_sequence(&self) -> Option<&[BorrowedValue<'a>]> {
match self {
Self::Sequence(s) => Some(s),
_ => None,
}
}
#[must_use]
pub fn as_mapping(&self) -> Option<&IndexMap<Cow<'a, str>, BorrowedValue<'a>, FxBuildHasher>> {
match self {
Self::Mapping(m) => Some(m),
_ => None,
}
}
#[must_use]
pub fn query(&self, path: &str) -> Vec<&BorrowedValue<'a>> {
let segments = parse_query_path(path);
let mut results = Vec::new();
borrowed_query_recursive(self, &segments, 0, &mut results);
results
}
#[must_use]
pub fn get_path(&self, path: &str) -> Option<&BorrowedValue<'a>> {
let segments = parse_query_path(path);
let mut current = self;
for seg in &segments {
current = match seg {
QuerySegment::Key(key) => {
if let Self::Mapping(m) = current {
m.get(key.as_str())?
} else {
return None;
}
}
QuerySegment::Index(idx) => {
if let Self::Sequence(s) = current {
s.get(*idx)?
} else {
return None;
}
}
QuerySegment::Wildcard | QuerySegment::RecursiveDescent => {
return self.query(path).into_iter().next();
}
};
}
Some(current)
}
#[must_use]
pub fn into_owned(self) -> crate::Value {
match self {
Self::Null => crate::Value::Null,
Self::Bool(b) => crate::Value::Bool(b),
Self::Number(n) => crate::Value::Number(n),
Self::String(s) => crate::Value::String(s.into_owned()),
Self::Sequence(seq) => {
crate::Value::Sequence(seq.into_iter().map(|v| v.into_owned()).collect())
}
Self::Mapping(map) => {
let mut m = crate::Mapping::with_capacity(map.len());
for (k, v) in map {
let _ = m.insert(k.into_owned(), v.into_owned());
}
crate::Value::Mapping(m)
}
}
}
}
impl Hash for BorrowedValue<'_> {
fn hash<H: Hasher>(&self, state: &mut H) {
core::mem::discriminant(self).hash(state);
match self {
Self::Null => {}
Self::Bool(b) => b.hash(state),
Self::Number(n) => n.hash(state),
Self::String(s) => s.hash(state),
Self::Sequence(seq) => seq.hash(state),
Self::Mapping(map) => {
state.write_usize(map.len());
for (k, v) in map {
k.hash(state);
v.hash(state);
}
}
}
}
}
impl Serialize for BorrowedValue<'_> {
fn serialize<S>(&self, serializer: S) -> core::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
Self::Null => serializer.serialize_none(),
Self::Bool(b) => serializer.serialize_bool(*b),
Self::Number(n) => match n {
crate::value::Number::Integer(i) => serializer.serialize_i64(*i),
crate::value::Number::Float(f) => serializer.serialize_f64(*f),
},
Self::String(s) => serializer.serialize_str(s),
Self::Sequence(seq) => seq.serialize(serializer),
Self::Mapping(map) => {
use serde::ser::SerializeMap;
let mut m = serializer.serialize_map(Some(map.len()))?;
for (k, v) in map {
m.serialize_entry(k.as_ref(), v)?;
}
m.end()
}
}
}
}
impl PartialOrd for BorrowedValue<'_> {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for BorrowedValue<'_> {
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
use core::cmp::Ordering;
let rank = |v: &Self| -> u8 {
match v {
Self::Null => 0,
Self::Bool(_) => 1,
Self::Number(_) => 2,
Self::String(_) => 3,
Self::Sequence(_) => 4,
Self::Mapping(_) => 5,
}
};
let r = rank(self).cmp(&rank(other));
if r != Ordering::Equal {
return r;
}
match (self, other) {
(Self::Null, Self::Null) => Ordering::Equal,
(Self::Bool(a), Self::Bool(b)) => a.cmp(b),
(Self::Number(a), Self::Number(b)) => a.cmp(b),
(Self::String(a), Self::String(b)) => a.cmp(b),
(Self::Sequence(a), Self::Sequence(b)) => a.cmp(b),
(Self::Mapping(a), Self::Mapping(b)) => a.len().cmp(&b.len()),
_ => Ordering::Equal,
}
}
}
fn borrowed_query_recursive<'a, 'b>(
value: &'b BorrowedValue<'a>,
segments: &[QuerySegment],
depth: usize,
results: &mut Vec<&'b BorrowedValue<'a>>,
) {
if depth >= segments.len() {
results.push(value);
return;
}
match &segments[depth] {
QuerySegment::Key(key) => {
if let BorrowedValue::Mapping(m) = value {
if let Some(child) = m.get(key.as_str()) {
borrowed_query_recursive(child, segments, depth + 1, results);
}
}
}
QuerySegment::Index(idx) => {
if let BorrowedValue::Sequence(s) = value {
if let Some(child) = s.get(*idx) {
borrowed_query_recursive(child, segments, depth + 1, results);
}
}
}
QuerySegment::Wildcard => match value {
BorrowedValue::Sequence(seq) => {
for item in seq {
borrowed_query_recursive(item, segments, depth + 1, results);
}
}
BorrowedValue::Mapping(map) => {
for (_, v) in map {
borrowed_query_recursive(v, segments, depth + 1, results);
}
}
_ => {}
},
QuerySegment::RecursiveDescent => {
let remaining = &segments[depth + 1..];
if !remaining.is_empty() {
borrowed_query_recursive(value, segments, depth + 1, results);
match value {
BorrowedValue::Sequence(seq) => {
for item in seq {
borrowed_query_recursive(item, segments, depth, results);
}
}
BorrowedValue::Mapping(map) => {
for (_, v) in map {
borrowed_query_recursive(v, segments, depth, results);
}
}
_ => {}
}
}
}
}
}
pub fn from_str_borrowed(input: &str) -> Result<BorrowedValue<'_>> {
from_str_borrowed_with_config(input, &crate::ParserConfig::default())
}
pub fn from_str_borrowed_with_config<'a>(
input: &'a str,
user_config: &crate::ParserConfig,
) -> Result<BorrowedValue<'a>> {
let config = ParseConfig::from(user_config);
if input.len() > config.max_document_length {
return Err(Error::Parse(format!(
"document exceeds maximum length of {} bytes",
config.max_document_length
)));
}
let mut parser = Parser::new(input);
let mut builder = BorrowedBuilder::new(&config);
loop {
let event = parser
.next_event()
.map_err(|e| Error::parse_at(&*e.message, input, e.index))?;
match builder.process(event, input)? {
BuilderState::Continue => {}
BuilderState::Done => break,
}
}
Ok(builder.into_value())
}
enum BuilderState {
Continue,
Done,
}
enum Frame<'a> {
Sequence(Vec<BorrowedValue<'a>>, Option<String>),
MappingKey(
IndexMap<Cow<'a, str>, BorrowedValue<'a>, FxBuildHasher>,
Option<String>,
),
MappingValue(
IndexMap<Cow<'a, str>, BorrowedValue<'a>, FxBuildHasher>,
Cow<'a, str>,
Option<String>,
),
}
struct BorrowedBuilder<'a> {
stack: Vec<Frame<'a>>,
result: Option<BorrowedValue<'a>>,
max_depth: usize,
depth: usize,
in_document: bool,
anchors: FxHashMap<String, BorrowedValue<'a>>,
alias_expansions: usize,
max_alias_expansions: usize,
}
impl<'a> BorrowedBuilder<'a> {
fn new(config: &ParseConfig) -> Self {
Self {
stack: Vec::new(),
result: None,
max_depth: config.max_depth,
depth: 0,
in_document: false,
anchors: FxHashMap::default(),
alias_expansions: 0,
max_alias_expansions: config.max_alias_expansions,
}
}
fn into_value(self) -> BorrowedValue<'a> {
self.result.unwrap_or(BorrowedValue::Null)
}
fn resolve_scalar(&self, value: Cow<'a, str>, style: ScalarStyle) -> BorrowedValue<'a> {
if style != ScalarStyle::Plain {
return BorrowedValue::String(value);
}
match &*value {
"" | "~" | "null" | "Null" | "NULL" => BorrowedValue::Null,
"true" | "True" | "TRUE" => BorrowedValue::Bool(true),
"false" | "False" | "FALSE" => BorrowedValue::Bool(false),
".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => {
BorrowedValue::Number(crate::value::Number::Float(f64::INFINITY))
}
"-.inf" | "-.Inf" | "-.INF" => {
BorrowedValue::Number(crate::value::Number::Float(f64::NEG_INFINITY))
}
".nan" | ".NaN" | ".NAN" => {
BorrowedValue::Number(crate::value::Number::Float(f64::NAN))
}
s => {
let bytes = s.as_bytes();
if !bytes.is_empty() {
let first = bytes[0];
if first.is_ascii_digit() || first == b'+' || first == b'-' || first == b'.' {
if let Ok(n) = s.parse::<i64>() {
return BorrowedValue::Number(crate::value::Number::Integer(n));
}
if let Ok(f) = s.parse::<f64>() {
return BorrowedValue::Number(crate::value::Number::Float(f));
}
}
}
BorrowedValue::String(value)
}
}
}
fn push_value(&mut self, value: BorrowedValue<'a>) {
match self.stack.last_mut() {
Some(Frame::Sequence(seq, _)) => seq.push(value),
Some(Frame::MappingValue(map, key, _)) => {
let k = core::mem::replace(key, Cow::Borrowed(""));
let _ = map.insert(k, value);
let (map, anchor) = match self.stack.pop() {
Some(Frame::MappingValue(m, _, a)) => (m, a),
_ => crate::error::invariant_violated(
"stack frame must be MappingValue immediately after value emit",
),
};
self.stack.push(Frame::MappingKey(map, anchor));
}
Some(Frame::MappingKey(_, _)) => {
}
None => {
self.result = Some(value);
}
}
}
fn record_anchor(&mut self, anchor: Option<String>, value: &BorrowedValue<'a>) {
if let Some(name) = anchor {
let _ = self.anchors.insert(name, value.clone());
}
}
fn process(&mut self, event: Event<'a>, _input: &str) -> Result<BuilderState> {
match event {
Event::StreamStart => Ok(BuilderState::Continue),
Event::StreamEnd => Ok(BuilderState::Done),
Event::DocumentStart => {
self.in_document = true;
Ok(BuilderState::Continue)
}
Event::DocumentEnd => {
self.in_document = false;
self.anchors.clear();
Ok(BuilderState::Continue)
}
Event::Scalar {
value,
style,
anchor,
..
} => {
if let Some(Frame::MappingKey(_, _)) = self.stack.last_mut() {
let key = value;
let (map, frame_anchor) = match self.stack.pop() {
Some(Frame::MappingKey(m, a)) => (m, a),
_ => crate::error::invariant_violated(
"stack frame must be MappingKey when consuming a mapping key",
),
};
self.stack.push(Frame::MappingValue(map, key, frame_anchor));
return Ok(BuilderState::Continue);
}
let resolved = self.resolve_scalar(value, style);
self.record_anchor(anchor, &resolved);
self.push_value(resolved);
Ok(BuilderState::Continue)
}
Event::SequenceStart { anchor, .. } => {
self.depth += 1;
if self.depth > self.max_depth {
return Err(Error::RecursionLimitExceeded { depth: self.depth });
}
self.stack
.push(Frame::Sequence(Vec::with_capacity(4), anchor));
Ok(BuilderState::Continue)
}
Event::SequenceEnd { .. } => {
self.depth = self.depth.saturating_sub(1);
let (seq, anchor) = match self.stack.pop() {
Some(Frame::Sequence(s, a)) => (s, a),
_ => return Err(Error::Invalid("unexpected sequence end".to_string())),
};
let value = BorrowedValue::Sequence(seq);
self.record_anchor(anchor, &value);
self.push_value(value);
Ok(BuilderState::Continue)
}
Event::MappingStart { anchor, .. } => {
self.depth += 1;
if self.depth > self.max_depth {
return Err(Error::RecursionLimitExceeded { depth: self.depth });
}
self.stack.push(Frame::MappingKey(
IndexMap::with_capacity_and_hasher(4, FxBuildHasher),
anchor,
));
Ok(BuilderState::Continue)
}
Event::MappingEnd { .. } => {
self.depth = self.depth.saturating_sub(1);
let (map, anchor) = match self.stack.pop() {
Some(Frame::MappingKey(m, a)) => (m, a),
Some(Frame::MappingValue(m, _, a)) => (m, a),
_ => return Err(Error::Invalid("unexpected mapping end".to_string())),
};
let value = BorrowedValue::Mapping(map);
self.record_anchor(anchor, &value);
self.push_value(value);
Ok(BuilderState::Continue)
}
Event::Alias { anchor, .. } => {
self.alias_expansions += 1;
if self.alias_expansions > self.max_alias_expansions {
return Err(Error::Parse(format!(
"alias expansions exceeded limit of {}",
self.max_alias_expansions
)));
}
let referent = self
.anchors
.get(&anchor)
.cloned()
.ok_or_else(|| Error::Parse(format!("unknown anchor: '{anchor}'")))?;
if let Some(Frame::MappingKey(_, _)) = self.stack.last_mut() {
let key = match referent {
BorrowedValue::String(s) => s,
BorrowedValue::Bool(b) => Cow::Owned(b.to_string()),
BorrowedValue::Number(n) => Cow::Owned(n.to_string()),
BorrowedValue::Null => Cow::Borrowed("null"),
BorrowedValue::Sequence(_) | BorrowedValue::Mapping(_) => {
return Err(Error::Invalid(
"alias resolved to a non-scalar cannot be used as a mapping key"
.to_string(),
));
}
};
let (map, frame_anchor) = match self.stack.pop() {
Some(Frame::MappingKey(m, a)) => (m, a),
_ => crate::error::invariant_violated(
"stack frame must be MappingKey when consuming an alias key",
),
};
self.stack.push(Frame::MappingValue(map, key, frame_anchor));
return Ok(BuilderState::Continue);
}
self.push_value(referent);
Ok(BuilderState::Continue)
}
}
}
}