use crate::prelude::*;
use rustc_hash::{FxHashMap, FxHashSet};
use serde::de::{self, DeserializeSeed, IntoDeserializer, MapAccess, SeqAccess, Visitor};
use serde::Deserialize;
use smallvec::SmallVec;
use crate::error::{closest_name, Error, Result};
use crate::parser::{Event, ParseConfig, Parser, ScalarStyle};
use core::fmt;
const FALLBACK_SENTINEL: &str = "$__noyalib_streaming_fallback";
#[cfg(feature = "wasm-opt")]
const SMALL_VEC_SIZE: usize = 4;
#[cfg(not(feature = "wasm-opt"))]
const SMALL_VEC_SIZE: usize = 16;
#[derive(Debug, Clone)]
enum BufferedEvent {
Scalar { value: String, style: ScalarStyle },
SeqStart,
SeqEnd,
MapStart,
MapEnd,
Alias { anchor: String },
}
#[derive(Debug, Clone)]
pub(crate) enum Scalar<'a> {
Null,
Bool(bool),
Int(i64),
Float(f64),
Str(Cow<'a, str>),
}
pub struct StreamingDeserializer<'a> {
parser: Parser<'a>,
input: &'a str,
config: ParseConfig,
tag_registry: Option<Arc<crate::TagRegistry>>,
depth: usize,
current: Option<Event<'a>>,
raw_str_mode: bool,
anchor_events: FxHashMap<String, SmallVec<[BufferedEvent; SMALL_VEC_SIZE]>>,
anchor_def_spans: FxHashMap<String, usize>,
replay_stack: Vec<SmallVec<[BufferedEvent; SMALL_VEC_SIZE]>>,
recording: Option<(String, usize, SmallVec<[BufferedEvent; SMALL_VEC_SIZE]>)>,
alias_count: usize,
alias_bytes: usize,
}
impl fmt::Debug for StreamingDeserializer<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("StreamingDeserializer")
.field("input_len", &self.input.len())
.field("config", &self.config)
.field("depth", &self.depth)
.field("raw_str_mode", &self.raw_str_mode)
.field("anchor_events_len", &self.anchor_events.len())
.field("replay_stack_len", &self.replay_stack.len())
.field("is_recording", &self.recording.is_some())
.finish()
}
}
impl<'a> StreamingDeserializer<'a> {
pub fn new(input: &'a str) -> Self {
Self::with_config(input, ParseConfig::default())
}
pub fn with_config<C>(input: &'a str, config: C) -> Self
where
C: Into<ParseConfig>,
{
StreamingDeserializer {
parser: Parser::new(input),
input,
config: config.into(),
tag_registry: None,
depth: 0,
current: None,
raw_str_mode: false,
anchor_events: FxHashMap::default(),
anchor_def_spans: FxHashMap::default(),
replay_stack: Vec::new(),
recording: None,
alias_count: 0,
alias_bytes: 0,
}
}
#[must_use]
pub fn with_tag_registry(mut self, registry: Arc<crate::TagRegistry>) -> Self {
self.tag_registry = Some(registry);
self
}
fn peek_parser_event(&mut self) -> Result<&Event<'a>> {
if self.current.is_none() {
let event = self
.parser
.next_event()
.map_err(|e| Error::parse_at(&*e.message, self.input, e.index))?;
self.current = Some(event);
}
Ok(self.current.as_ref().unwrap())
}
fn next_parser_event(&mut self) -> Result<Event<'a>> {
let mut ev = if let Some(ev) = self.current.take() {
ev
} else {
self.parser
.next_event()
.map_err(|e| Error::parse_at(&*e.message, self.input, e.index))?
};
self.handle_anchor(&mut ev);
self.maybe_record(&ev);
Ok(ev)
}
fn peek_event(&mut self) -> Result<&Event<'a>> {
if self.current.is_none() {
let mut ev_opt = None;
while let Some(buf) = self.replay_stack.last_mut() {
if let Some(be) = buf.pop() {
ev_opt = Some(self.buffered_to_event(be));
break;
}
let _ = self.replay_stack.pop();
}
let mut ev = if let Some(ev) = ev_opt {
ev
} else {
let mut ev = self
.parser
.next_event()
.map_err(|e| Error::parse_at(&*e.message, self.input, e.index))?;
if let Event::Alias {
ref anchor,
ref span,
} = ev
{
let start = span.start;
ev = self.resolve_alias(anchor, start)?;
}
ev
};
self.handle_anchor(&mut ev);
self.maybe_record(&ev);
self.current = Some(ev);
}
Ok(self.current.as_ref().unwrap())
}
fn next_event(&mut self) -> Result<Event<'a>> {
if let Some(ev) = self.current.take() {
return Ok(ev);
}
let mut ev_opt = None;
while let Some(buf) = self.replay_stack.last_mut() {
if let Some(be) = buf.pop() {
ev_opt = Some(self.buffered_to_event(be));
break;
}
let _ = self.replay_stack.pop();
}
if let Some(mut ev) = ev_opt {
self.handle_anchor(&mut ev);
self.maybe_record(&ev);
return Ok(ev);
}
let mut ev = self
.parser
.next_event()
.map_err(|e| Error::parse_at(&*e.message, self.input, e.index))?;
if let Event::Alias {
ref anchor,
ref span,
} = ev
{
let start = span.start;
ev = self.resolve_alias(anchor, start)?;
}
self.handle_anchor(&mut ev);
self.maybe_record(&ev);
Ok(ev)
}
fn buffered_to_event(&self, be: BufferedEvent) -> Event<'a> {
let dummy_span = crate::parser::scanner_span_default();
match be {
BufferedEvent::Scalar { value, style } => Event::Scalar {
value: Cow::Owned(value),
style,
anchor: None,
tag: None,
span: dummy_span,
},
BufferedEvent::SeqStart => Event::SequenceStart {
anchor: None,
tag: None,
span: dummy_span,
},
BufferedEvent::SeqEnd => Event::SequenceEnd { span: dummy_span },
BufferedEvent::MapStart => Event::MappingStart {
anchor: None,
tag: None,
span: dummy_span,
},
BufferedEvent::MapEnd => Event::MappingEnd { span: dummy_span },
BufferedEvent::Alias { anchor } => Event::Alias {
anchor,
span: dummy_span,
},
}
}
fn handle_anchor(&mut self, ev: &mut Event<'_>) {
if self.recording.is_some() {
return;
}
let def_start = match ev {
Event::Scalar { span, .. }
| Event::SequenceStart { span, .. }
| Event::MappingStart { span, .. } => Some(span.start),
_ => None,
};
let anchor_name = match ev {
Event::Scalar { ref mut anchor, .. }
| Event::SequenceStart { ref mut anchor, .. }
| Event::MappingStart { ref mut anchor, .. } => anchor.take(),
_ => None,
};
if let Some(name) = anchor_name {
if let Some(start) = def_start {
let _ = self.anchor_def_spans.insert(name.clone(), start);
}
self.recording = Some((name, 0, SmallVec::new()));
}
}
fn maybe_record(&mut self, ev: &Event<'_>) {
if let Some((_, ref mut depth, ref mut buf)) = self.recording {
match ev {
Event::Scalar { value, style, .. } => {
buf.push(BufferedEvent::Scalar {
value: value.to_string(),
style: *style,
});
if *depth == 0 {
let (name, _, events) = self.recording.take().unwrap();
let _ = self.anchor_events.insert(name, events);
}
}
Event::SequenceStart { .. } => {
buf.push(BufferedEvent::SeqStart);
*depth += 1;
}
Event::SequenceEnd { .. } => {
buf.push(BufferedEvent::SeqEnd);
*depth -= 1;
if *depth == 0 {
let (name, _, events) = self.recording.take().unwrap();
let _ = self.anchor_events.insert(name, events);
}
}
Event::MappingStart { .. } => {
buf.push(BufferedEvent::MapStart);
*depth += 1;
}
Event::MappingEnd { .. } => {
buf.push(BufferedEvent::MapEnd);
*depth -= 1;
if *depth == 0 {
let (name, _, events) = self.recording.take().unwrap();
let _ = self.anchor_events.insert(name, events);
}
}
Event::Alias { anchor, .. } => {
buf.push(BufferedEvent::Alias {
anchor: anchor.clone(),
});
if *depth == 0 {
let (name, _, events) = self.recording.take().unwrap();
let _ = self.anchor_events.insert(name, events);
}
}
_ => {}
}
}
}
fn resolve_alias(&mut self, name: &str, alias_start: usize) -> Result<Event<'a>> {
self.alias_count += 1;
if self.alias_count > self.config.max_alias_expansions {
return Err(Error::RepetitionLimitExceeded);
}
if let Some(buf_ref) = self.anchor_events.get(name) {
let bytes: usize = buf_ref
.iter()
.map(|ev| match ev {
BufferedEvent::Scalar { value, .. } => value.len() + 8,
_ => 4,
})
.sum();
self.alias_bytes = self.alias_bytes.saturating_add(bytes);
if self.alias_bytes > self.config.max_document_length {
return Err(Error::RepetitionLimitExceeded);
}
}
let buf = self
.anchor_events
.get(name)
.cloned()
.ok_or_else(|| self.build_unknown_anchor(name, alias_start))?;
if buf.is_empty() {
return Err(self.build_unknown_anchor(name, alias_start));
}
let mut reversed = buf;
reversed.reverse();
let first = reversed.pop().unwrap();
if !reversed.is_empty() {
self.replay_stack.push(reversed);
}
Ok(self.buffered_to_event(first))
}
fn inject_multi_merge_mapping_contents(&mut self, sources: &[(String, usize)]) -> Result<()> {
let local_buf = self.buffer_rest_of_mapping()?;
let mut seen_keys = extract_local_keys(&local_buf);
let mut filtered_sources: SmallVec<[SmallVec<[BufferedEvent; SMALL_VEC_SIZE]>; 2]> =
SmallVec::new();
for (name, start) in sources {
let target_buf = self
.anchor_events
.get(name)
.cloned()
.ok_or_else(|| self.build_unknown_anchor(name, *start))?;
let body = extract_mapping_body(&target_buf).ok_or_else(|| self.fallback())?;
let filtered = filter_merge_entries(body, &seen_keys).ok_or_else(|| self.fallback())?;
collect_keys(body, &mut seen_keys).ok_or_else(|| self.fallback())?;
filtered_sources.push(filtered);
}
if !local_buf.is_empty() {
let mut rev = local_buf;
rev.reverse();
self.replay_stack.push(rev);
}
for mut filtered in filtered_sources.into_iter().rev() {
if !filtered.is_empty() {
filtered.reverse();
self.replay_stack.push(filtered);
}
}
Ok(())
}
fn buffer_rest_of_mapping(&mut self) -> Result<SmallVec<[BufferedEvent; SMALL_VEC_SIZE]>> {
let mut buf = SmallVec::new();
let mut depth: usize = 0;
loop {
let ev = self.next_parser_event()?;
match ev {
Event::MappingEnd { .. } => {
buf.push(BufferedEvent::MapEnd);
if depth == 0 {
return Ok(buf);
}
depth -= 1;
}
Event::MappingStart { .. } => {
buf.push(BufferedEvent::MapStart);
depth += 1;
}
Event::SequenceStart { .. } => {
buf.push(BufferedEvent::SeqStart);
depth += 1;
}
Event::SequenceEnd { .. } => {
buf.push(BufferedEvent::SeqEnd);
depth = depth.saturating_sub(1);
}
Event::Scalar { value, style, .. } => buf.push(BufferedEvent::Scalar {
value: value.into_owned(),
style,
}),
Event::Alias { anchor, .. } => buf.push(BufferedEvent::Alias { anchor }),
_ => {}
}
}
}
fn build_unknown_anchor(&self, name: &str, alias_start: usize) -> Error {
let loc = crate::error::Location::from_index(self.input, alias_start);
let suggestion = closest_name(name, self.anchor_def_spans.keys().map(|s| s.as_str()))
.and_then(|s| {
self.anchor_def_spans.get(s).map(|&idx| {
(
s.to_string(),
crate::error::Location::from_index(self.input, idx),
)
})
});
Error::UnknownAnchorAt {
name: name.to_owned(),
location: loc,
suggestion,
}
}
fn fallback(&self) -> Error {
Error::Custom(FALLBACK_SENTINEL.to_owned())
}
fn skip_event(&mut self) -> Result<()> {
let _ = self.next_event()?;
Ok(())
}
fn skip_to_content(&mut self) -> Result<()> {
loop {
match self.peek_event()? {
Event::StreamStart | Event::DocumentStart => {
self.skip_event()?;
}
_ => return Ok(()),
}
}
}
fn skip_value(&mut self) -> Result<()> {
let mut balance: i64 = 0;
loop {
match self.next_event()? {
Event::Scalar { .. } | Event::Alias { .. } if balance == 0 => {
return Ok(());
}
Event::Scalar { .. } | Event::Alias { .. } => {}
Event::SequenceStart { .. } | Event::MappingStart { .. } => {
balance += 1;
}
Event::SequenceEnd { .. } | Event::MappingEnd { .. } => {
balance -= 1;
if balance <= 0 {
return Ok(());
}
}
_ => {}
}
}
}
fn take_tag_from_current(&mut self) -> Option<(String, String)> {
let _ = self.peek_event().ok()?;
let t = match self.current.as_mut() {
Some(Event::Scalar { tag, .. })
| Some(Event::SequenceStart { tag, .. })
| Some(Event::MappingStart { tag, .. }) => tag.take(),
_ => None,
};
t
}
fn tag_in_registry(&self, tag: &(String, String)) -> bool {
if matches!(
(tag.0.as_str(), tag.1.as_str()),
("!!", "int")
| ("!!", "float")
| ("!!", "str")
| ("!!", "bool")
| ("!!", "null")
| ("!!", "seq")
| ("!!", "map")
) {
return false;
}
let Some(registry) = self.tag_registry.as_ref() else {
return false;
};
let full = format!("{}{}", tag.0, tag.1);
registry.contains(&full)
}
fn restore_tag_to_current(&mut self, t: (String, String)) {
if let Some(
Event::Scalar { tag, .. }
| Event::SequenceStart { tag, .. }
| Event::MappingStart { tag, .. },
) = self.current.as_mut()
{
*tag = Some(t);
}
}
fn resolve_scalar<'s>(&self, value: &'s str, style: ScalarStyle) -> Scalar<'s> {
if style == ScalarStyle::Plain {
resolve_plain_ext(
value,
self.config.strict_booleans,
self.config.legacy_booleans,
self.config.no_schema,
self.config.legacy_octal_numbers,
self.config.legacy_sexagesimal,
)
} else {
Scalar::Str(Cow::Borrowed(value))
}
}
}
impl<'de> de::Deserializer<'de> for &mut StreamingDeserializer<'de> {
type Error = Error;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Some(t) = self.take_tag_from_current() {
if !self.tag_in_registry(&t) {
self.restore_tag_to_current(t);
return Err(self.fallback());
}
}
match self.next_event()? {
Event::Scalar { value, style, .. } => match value {
Cow::Borrowed(s) => match self.resolve_scalar(s, style) {
Scalar::Null => visitor.visit_none(),
Scalar::Bool(b) => visitor.visit_bool(b),
Scalar::Int(n) => visitor.visit_i64(n),
Scalar::Float(f) => visitor.visit_f64(f),
Scalar::Str(Cow::Borrowed(b)) => visitor.visit_borrowed_str(b),
Scalar::Str(Cow::Owned(o)) => visitor.visit_string(o),
},
Cow::Owned(s) => match self.resolve_scalar(&s, style) {
Scalar::Null => visitor.visit_none(),
Scalar::Bool(b) => visitor.visit_bool(b),
Scalar::Int(n) => visitor.visit_i64(n),
Scalar::Float(f) => visitor.visit_f64(f),
Scalar::Str(_) => visitor.visit_string(s),
},
},
Event::SequenceStart { .. } => {
self.depth += 1;
if self.depth > self.config.max_depth {
return Err(Error::RecursionLimitExceeded { depth: self.depth });
}
let res = visitor.visit_seq(StreamingSeqAccess {
de: self,
finished: false,
count: 0,
})?;
self.depth = self.depth.saturating_sub(1);
Ok(res)
}
Event::MappingStart { .. } => {
self.depth += 1;
if self.depth > self.config.max_depth {
return Err(Error::RecursionLimitExceeded { depth: self.depth });
}
let res = visitor.visit_map(StreamingMapAccess {
de: self,
finished: false,
has_emitted_key: false,
key_count: 0,
seen_keys: FxHashSet::default(),
})?;
self.depth = self.depth.saturating_sub(1);
Ok(res)
}
_ => Err(self.fallback()),
}
}
fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Event::Scalar { value, style, .. } = self.next_event()? {
if let Scalar::Bool(b) = self.resolve_scalar(&value, style) {
return visitor.visit_bool(b);
}
}
Err(Error::TypeMismatch {
expected: "bool",
found: "other".into(),
})
}
fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Event::Scalar { value, style, .. } = self.next_event()? {
match self.resolve_scalar(&value, style) {
Scalar::Int(n) => return visitor.visit_i64(n),
Scalar::Float(f)
if f.is_finite()
&& f.fract() == 0.0
&& f >= i64::MIN as f64
&& f <= i64::MAX as f64 =>
{
return visitor.visit_i64(f as i64);
}
_ => {}
}
}
Err(Error::TypeMismatch {
expected: "integer",
found: "other".into(),
})
}
fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Event::Scalar { value, style, .. } = self.next_event()? {
match self.resolve_scalar(&value, style) {
Scalar::Int(n) if n >= 0 => return visitor.visit_u64(n as u64),
Scalar::Float(f)
if f.is_finite() && f.fract() == 0.0 && f >= 0.0 && f <= u64::MAX as f64 =>
{
return visitor.visit_u64(f as u64);
}
_ => {}
}
}
Err(Error::TypeMismatch {
expected: "unsigned integer",
found: "other".into(),
})
}
fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Event::Scalar { value, style, .. } = self.next_event()? {
match self.resolve_scalar(&value, style) {
Scalar::Float(f) => return visitor.visit_f64(f),
Scalar::Int(n) => return visitor.visit_f64(n as f64),
_ => {}
}
}
Err(Error::TypeMismatch {
expected: "float",
found: "other".into(),
})
}
fn deserialize_str<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Some(t) = self.take_tag_from_current() {
if !self.tag_in_registry(&t) {
self.restore_tag_to_current(t);
return Err(self.fallback());
}
}
match self.peek_event()? {
Event::Scalar { .. } => {
if let Event::Scalar { value, style, .. } = self.next_event()? {
if self.raw_str_mode {
return match value {
Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
Cow::Owned(s) => visitor.visit_string(s),
};
}
if style != ScalarStyle::Plain {
return match value {
Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
Cow::Owned(s) => visitor.visit_string(s),
};
}
match value {
Cow::Borrowed(s) => match self.resolve_scalar(s, style) {
Scalar::Str(Cow::Borrowed(b)) => visitor.visit_borrowed_str(b),
Scalar::Str(Cow::Owned(o)) => visitor.visit_string(o),
_ => Err(Error::TypeMismatch {
expected: "string",
found: "non-string scalar".into(),
}),
},
Cow::Owned(s) => match self.resolve_scalar(&s, style) {
Scalar::Str(_) => visitor.visit_string(s),
_ => Err(Error::TypeMismatch {
expected: "string",
found: "non-string scalar".into(),
}),
},
}
} else {
Err(Error::TypeMismatch {
expected: "string",
found: "other".into(),
})
}
}
_ => Err(self.fallback()),
}
}
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.deserialize_str(visitor)
}
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Event::Scalar {
ref value, style, ..
} = self.peek_event()?
{
if *style == ScalarStyle::Plain {
match &**value {
"" | "~" | "null" | "Null" | "NULL" => {
self.skip_event()?;
return visitor.visit_none();
}
_ => {}
}
}
}
visitor.visit_some(self)
}
fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Event::Scalar { value, style, .. } = self.next_event()? {
if let Scalar::Null = self.resolve_scalar(&value, style) {
return visitor.visit_unit();
}
}
Err(Error::TypeMismatch {
expected: "null",
found: "other".into(),
})
}
fn deserialize_newtype_struct<V>(self, name: &'static str, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
if name == crate::spanned::SPANNED_TYPE_NAME {
return Err(self.fallback());
}
self.skip_to_content()?;
if let Some(t) = self.take_tag_from_current() {
match (t.0.as_str(), t.1.as_str()) {
("!!", "int")
| ("!!", "float")
| ("!!", "str")
| ("!!", "bool")
| ("!!", "null")
| ("!!", "seq")
| ("!!", "map") => {}
_ => {
if self.tag_in_registry(&t) {
return visitor.visit_newtype_struct(self);
}
return visitor.visit_map(StreamingTagMapAccess {
de: self,
tag: t,
done: false,
});
}
}
}
visitor.visit_newtype_struct(self)
}
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Some(t) = self.take_tag_from_current() {
if !self.tag_in_registry(&t) {
self.restore_tag_to_current(t);
return Err(self.fallback());
}
}
if let Event::SequenceStart { .. } = self.next_event()? {
self.depth += 1;
if self.depth > self.config.max_depth {
return Err(Error::RecursionLimitExceeded { depth: self.depth });
}
let res = visitor.visit_seq(StreamingSeqAccess {
de: self,
finished: false,
count: 0,
})?;
self.depth = self.depth.saturating_sub(1);
Ok(res)
} else {
Err(Error::TypeMismatch {
expected: "sequence",
found: "other".into(),
})
}
}
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Some(t) = self.take_tag_from_current() {
if !self.tag_in_registry(&t) {
self.restore_tag_to_current(t);
return Err(self.fallback());
}
}
if let Event::MappingStart { .. } = self.next_event()? {
self.depth += 1;
if self.depth > self.config.max_depth {
return Err(Error::RecursionLimitExceeded { depth: self.depth });
}
let res = visitor.visit_map(StreamingMapAccess {
de: self,
finished: false,
has_emitted_key: false,
key_count: 0,
seen_keys: FxHashSet::default(),
})?;
self.depth = self.depth.saturating_sub(1);
Ok(res)
} else {
Err(Error::TypeMismatch {
expected: "mapping",
found: "other".into(),
})
}
}
fn deserialize_enum<V>(
self,
_name: &'static str,
_variants: &'static [&'static str],
visitor: V,
) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Some(t) = self.take_tag_from_current() {
match (t.0.as_str(), t.1.as_str()) {
("!!", "int")
| ("!!", "float")
| ("!!", "str")
| ("!!", "bool")
| ("!!", "null")
| ("!!", "seq")
| ("!!", "map") => {}
_ => {
return visitor.visit_enum(StreamingTagEnumAccess { de: self, tag: t });
}
}
}
match self.next_event()? {
Event::Scalar { value, .. } => {
visitor.visit_enum(value.into_owned().into_deserializer())
}
Event::MappingStart { .. } => {
if let Event::Scalar { value, .. } = self.next_event()? {
visitor.visit_enum(StreamingEnumAccess {
de: self,
variant: value.into_owned(),
})
} else {
Err(Error::TypeMismatch {
expected: "variant name",
found: "non-scalar".into(),
})
}
}
_ => Err(Error::TypeMismatch {
expected: "enum",
found: "other".into(),
}),
}
}
fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
if let Event::Scalar { value, .. } = self.next_event()? {
return match value {
Cow::Borrowed(s) => visitor.visit_borrowed_str(s),
Cow::Owned(s) => visitor.visit_string(s),
};
}
Err(Error::TypeMismatch {
expected: "identifier",
found: "non-scalar".into(),
})
}
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_value()?;
visitor.visit_unit()
}
fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
self.deserialize_unit(visitor)
}
fn deserialize_struct<V>(
self,
name: &'static str,
_fields: &'static [&'static str],
visitor: V,
) -> Result<V::Value>
where
V: Visitor<'de>,
{
if name == crate::spanned::SPANNED_TYPE_NAME {
return Err(self.fallback());
}
self.deserialize_map(visitor)
}
fn deserialize_bytes<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.skip_to_content()?;
let is_binary = match self.current.as_ref() {
Some(Event::Scalar { tag: Some(t), .. }) => {
let full = format!("{}{}", t.0, t.1);
crate::de::is_binary_tag(&full)
}
_ => false,
};
if is_binary {
let _ = self.take_tag_from_current();
if let Event::Scalar { value, .. } = self.next_event()? {
return match crate::base64::decode(&value) {
Ok(bytes) => visitor.visit_byte_buf(bytes),
Err(why) => Err(Error::Deserialize(format!("!!binary: {why}"))),
};
}
return Err(Error::TypeMismatch {
expected: "string-shaped !!binary content",
found: "non-scalar".into(),
});
}
if let Event::Scalar { value, style, .. } = self.next_event()? {
return match self.resolve_scalar(&value, style) {
Scalar::Str(s) => visitor.visit_bytes(s.as_bytes()),
Scalar::Null => Err(Error::TypeMismatch {
expected: "bytes",
found: "null".into(),
}),
Scalar::Bool(_) => Err(Error::TypeMismatch {
expected: "bytes",
found: "bool".into(),
}),
Scalar::Int(_) => Err(Error::TypeMismatch {
expected: "bytes",
found: "integer".into(),
}),
Scalar::Float(_) => Err(Error::TypeMismatch {
expected: "bytes",
found: "float".into(),
}),
};
}
Err(Error::TypeMismatch {
expected: "bytes",
found: "non-scalar".into(),
})
}
fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
self.deserialize_bytes(visitor)
}
serde::forward_to_deserialize_any! {
i8 i16 i32 u8 u16 u32 f32 char
tuple tuple_struct
}
}
struct StreamingSeqAccess<'a, 'de> {
de: &'a mut StreamingDeserializer<'de>,
finished: bool,
count: usize,
}
impl<'de> SeqAccess<'de> for StreamingSeqAccess<'_, 'de> {
type Error = Error;
fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
where
T: DeserializeSeed<'de>,
{
if matches!(self.de.peek_event()?, Event::SequenceEnd { .. }) {
self.de.skip_event()?;
self.finished = true;
return Ok(None);
}
self.count += 1;
if self.count > self.de.config.max_sequence_length {
return Err(Error::Parse(format!(
"sequence exceeds maximum length of {}",
self.de.config.max_sequence_length
)));
}
seed.deserialize(&mut *self.de).map(Some)
}
}
impl Drop for StreamingSeqAccess<'_, '_> {
fn drop(&mut self) {
if !self.finished {
loop {
match self.de.peek_event() {
Ok(Event::SequenceEnd { .. }) => {
let _ = self.de.skip_event();
break;
}
Ok(_) => {
if self.de.skip_value().is_err() {
break;
}
}
Err(_) => break,
}
}
}
}
}
struct StreamingMapAccess<'a, 'de> {
de: &'a mut StreamingDeserializer<'de>,
finished: bool,
has_emitted_key: bool,
key_count: usize,
seen_keys: FxHashSet<String>,
}
impl<'de> MapAccess<'de> for StreamingMapAccess<'_, 'de> {
type Error = Error;
fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
where
K: DeserializeSeed<'de>,
{
loop {
let ev = self.de.peek_event()?;
if matches!(ev, Event::MappingEnd { .. }) {
self.de.skip_event()?;
self.finished = true;
return Ok(None);
}
if let Event::Scalar {
value,
style: ScalarStyle::Plain,
..
} = ev
{
if value == "<<" {
if self.has_emitted_key {
return Err(self.de.fallback());
}
self.de.skip_event()?;
match self.de.peek_parser_event()? {
Event::Alias { anchor, span } => {
let name = anchor.clone();
let start = span.start;
self.de.current = None;
self.de
.inject_multi_merge_mapping_contents(&[(name, start)])?;
}
Event::SequenceStart { .. } => {
self.de.skip_event()?;
let mut sources = Vec::new();
loop {
match self.de.peek_parser_event()? {
Event::SequenceEnd { .. } => {
self.de.skip_event()?;
break;
}
Event::Alias { anchor, span } => {
sources.push((anchor.clone(), span.start));
self.de.skip_event()?;
}
_ => return Err(self.de.fallback()),
}
}
self.de.inject_multi_merge_mapping_contents(&sources)?;
}
_ => return Err(self.de.fallback()),
}
continue;
}
}
let key_str_opt = if let Event::Scalar { value: key_val, .. } = ev {
Some(key_val.to_string())
} else {
None
};
let policy = self.de.config.duplicate_key_policy;
if let Some(key_str) = key_str_opt {
if policy != crate::parser::InternalDuplicateKeyPolicy::Last {
if self.seen_keys.contains(&key_str) {
match policy {
crate::parser::InternalDuplicateKeyPolicy::Error => {
return Err(Error::DuplicateKey(key_str));
}
crate::parser::InternalDuplicateKeyPolicy::First => {
self.de.skip_value()?;
self.de.skip_value()?;
continue;
}
_ => {}
}
} else {
let _ = self.seen_keys.insert(key_str);
}
}
}
self.key_count += 1;
if self.key_count > self.de.config.max_mapping_keys {
return Err(Error::Parse(format!(
"mapping exceeds maximum of {} keys",
self.de.config.max_mapping_keys
)));
}
self.de.raw_str_mode = true;
let res = seed.deserialize(&mut *self.de).map(Some);
self.de.raw_str_mode = false;
if res.is_ok() {
self.has_emitted_key = true;
}
return res;
}
}
fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
where
V: DeserializeSeed<'de>,
{
seed.deserialize(&mut *self.de)
}
}
impl Drop for StreamingMapAccess<'_, '_> {
fn drop(&mut self) {
if !self.finished {
loop {
match self.de.peek_event() {
Ok(Event::MappingEnd { .. }) => {
let _ = self.de.skip_event();
break;
}
Ok(_) => {
if self.de.skip_value().is_err() {
break;
}
if self.de.skip_value().is_err() {
break;
}
}
Err(_) => break,
}
}
}
}
}
struct StreamingEnumAccess<'a, 'de> {
de: &'a mut StreamingDeserializer<'de>,
variant: String,
}
impl<'a, 'de> de::EnumAccess<'de> for StreamingEnumAccess<'a, 'de> {
type Error = Error;
type Variant = StreamingVariantAccess<'a, 'de>;
fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
where
V: DeserializeSeed<'de>,
{
let de = de::value::StringDeserializer::<Error>::new(self.variant);
let variant = seed.deserialize(de)?;
Ok((variant, StreamingVariantAccess { de: self.de }))
}
}
struct StreamingVariantAccess<'a, 'de> {
de: &'a mut StreamingDeserializer<'de>,
}
impl<'de> de::VariantAccess<'de> for StreamingVariantAccess<'_, 'de> {
type Error = Error;
fn unit_variant(self) -> Result<()> {
let ev = self.de.next_event()?;
if !matches!(ev, Event::MappingEnd { .. }) {
self.de.current = Some(ev);
self.de.skip_value()?;
if !matches!(self.de.next_event()?, Event::MappingEnd { .. }) {
return Err(Error::Invalid("expected mapping end".into()));
}
}
Ok(())
}
fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
where
T: DeserializeSeed<'de>,
{
let res = seed.deserialize(&mut *self.de)?;
if !matches!(self.de.next_event()?, Event::MappingEnd { .. }) {
return Err(Error::Invalid("expected mapping end".into()));
}
Ok(res)
}
fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
let res = de::Deserializer::deserialize_seq(&mut *self.de, visitor)?;
if !matches!(self.de.next_event()?, Event::MappingEnd { .. }) {
return Err(Error::Invalid("expected mapping end".into()));
}
Ok(res)
}
fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
let res = de::Deserializer::deserialize_map(&mut *self.de, visitor)?;
if !matches!(self.de.next_event()?, Event::MappingEnd { .. }) {
return Err(Error::Invalid("expected mapping end".into()));
}
Ok(res)
}
}
#[allow(dead_code)]
struct StreamingTagMapAccess<'a, 'de> {
de: &'a mut StreamingDeserializer<'de>,
tag: (String, String),
done: bool,
}
impl<'de> MapAccess<'de> for StreamingTagMapAccess<'_, 'de> {
type Error = Error;
fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
where
K: DeserializeSeed<'de>,
{
if self.done {
Ok(None)
} else {
let full = if self.tag.0 == "!" {
format!("!{}", self.tag.1)
} else {
format!("{}{}", self.tag.0, self.tag.1)
};
let de = de::value::StringDeserializer::<Error>::new(full);
seed.deserialize(de).map(Some)
}
}
fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
where
V: DeserializeSeed<'de>,
{
self.done = true;
seed.deserialize(&mut *self.de)
}
}
#[allow(dead_code)]
struct StreamingTagEnumAccess<'a, 'de> {
de: &'a mut StreamingDeserializer<'de>,
tag: (String, String),
}
impl<'a, 'de> de::EnumAccess<'de> for StreamingTagEnumAccess<'a, 'de> {
type Error = Error;
type Variant = StreamingTagVariantAccess<'a, 'de>;
fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
where
V: DeserializeSeed<'de>,
{
let full = if self.tag.0 == "!" {
format!("!{}", self.tag.1)
} else {
format!("{}{}", self.tag.0, self.tag.1)
};
let de = de::value::StringDeserializer::<Error>::new(full);
let variant = seed.deserialize(de)?;
Ok((variant, StreamingTagVariantAccess { de: self.de }))
}
}
#[allow(dead_code)]
struct StreamingTagVariantAccess<'a, 'de> {
de: &'a mut StreamingDeserializer<'de>,
}
impl<'de> de::VariantAccess<'de> for StreamingTagVariantAccess<'_, 'de> {
type Error = Error;
fn unit_variant(self) -> Result<()> {
self.de.skip_value()
}
fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
where
T: DeserializeSeed<'de>,
{
seed.deserialize(self.de)
}
fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
de::Deserializer::deserialize_seq(self.de, visitor)
}
fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
where
V: Visitor<'de>,
{
de::Deserializer::deserialize_map(self.de, visitor)
}
}
pub(crate) fn from_str_streaming<T>(s: &str, config: &crate::de::ParserConfig) -> Option<Result<T>>
where
T: for<'de> Deserialize<'de>,
{
let parse_config = ParseConfig::from(config);
if s.len() > parse_config.max_document_length {
return Some(Err(Error::Parse(format!(
"document exceeds maximum length of {} bytes",
parse_config.max_document_length
))));
}
let mut de = StreamingDeserializer::with_config(s, parse_config);
if let Some(registry) = config.tag_registry.as_ref() {
de = de.with_tag_registry(Arc::clone(registry));
}
let res = T::deserialize(&mut de);
match res {
Ok(val) => {
loop {
match de.next_event() {
Ok(Event::StreamEnd) => break,
Ok(Event::DocumentEnd | Event::StreamStart) => continue,
Ok(_) => break,
Err(e) => return Some(Err(e)),
}
}
Some(Ok(val))
}
Err(ref e) => {
if is_fallback_error(e) {
None
} else {
Some(res)
}
}
}
}
fn is_fallback_error(e: &Error) -> bool {
match e {
Error::Custom(msg) => msg == FALLBACK_SENTINEL,
_ => false,
}
}
pub(crate) fn resolve_plain_ext(
s: &str,
strict: bool,
legacy: bool,
no_schema: bool,
legacy_octal: bool,
legacy_sexagesimal: bool,
) -> Scalar<'_> {
if no_schema {
return Scalar::Str(Cow::Borrowed(s));
}
match s {
"" | "~" | "null" | "Null" | "NULL" => Scalar::Null,
"true" => Scalar::Bool(true),
"false" => Scalar::Bool(false),
"True" | "TRUE" if !strict => Scalar::Bool(true),
"False" | "FALSE" if !strict => Scalar::Bool(false),
".inf" | ".Inf" | ".INF" | "+.inf" | "+.Inf" | "+.INF" => Scalar::Float(f64::INFINITY),
"-.inf" | "-.Inf" | "-.INF" => Scalar::Float(f64::NEG_INFINITY),
".nan" | ".NaN" | ".NAN" => Scalar::Float(f64::NAN),
"yes" | "Yes" | "YES" | "y" | "Y" if legacy => Scalar::Bool(true),
"no" | "No" | "NO" | "n" | "N" if legacy => Scalar::Bool(false),
"on" | "On" | "ON" if !strict && legacy => Scalar::Bool(true),
"off" | "Off" | "OFF" if !strict && legacy => Scalar::Bool(false),
_ => {
if let Some(n) = parse_integer(s, legacy_octal) {
Scalar::Int(n)
} else if legacy_sexagesimal {
if let Some(n) = parse_sexagesimal_int(s) {
Scalar::Int(n)
} else if let Some(f) = parse_sexagesimal_float(s) {
Scalar::Float(f)
} else if let Ok(f) = s.parse::<f64>() {
Scalar::Float(f)
} else {
Scalar::Str(Cow::Borrowed(s))
}
} else if let Ok(f) = s.parse::<f64>() {
Scalar::Float(f)
} else {
Scalar::Str(Cow::Borrowed(s))
}
}
}
}
fn parse_sexagesimal_int(s: &str) -> Option<i64> {
let (sign, rest) = match s.as_bytes().first() {
Some(b'-') => (-1i64, &s[1..]),
Some(b'+') => (1i64, &s[1..]),
_ => (1i64, s),
};
if !rest.contains(':') {
return None;
}
let parts: Vec<&str> = rest.split(':').collect();
if parts.len() < 2 {
return None;
}
let mut total: i64 = 0;
for (idx, part) in parts.iter().enumerate() {
if part.is_empty() || !part.bytes().all(|b| b.is_ascii_digit()) {
return None;
}
let n: i64 = part.parse().ok()?;
if idx > 0 && n >= 60 {
return None;
}
total = total.checked_mul(60)?.checked_add(n)?;
}
sign.checked_mul(total)
}
fn parse_sexagesimal_float(s: &str) -> Option<f64> {
let (sign, rest) = match s.as_bytes().first() {
Some(b'-') => (-1.0f64, &s[1..]),
Some(b'+') => (1.0f64, &s[1..]),
_ => (1.0f64, s),
};
if !rest.contains(':') {
return None;
}
let parts: Vec<&str> = rest.split(':').collect();
if parts.len() < 2 {
return None;
}
let last_idx = parts.len() - 1;
let mut total: f64 = 0.0;
for (idx, part) in parts.iter().enumerate() {
if part.is_empty() {
return None;
}
let last_with_decimal = idx == last_idx && part.contains('.');
if !last_with_decimal && !part.bytes().all(|b| b.is_ascii_digit()) {
return None;
}
let n: f64 = part.parse().ok()?;
if idx > 0 && n >= 60.0 {
return None;
}
total = total * 60.0 + n;
}
Some(sign * total)
}
fn parse_integer(s: &str, legacy_octal: bool) -> Option<i64> {
let b = s.as_bytes();
if b.is_empty() {
return None;
}
if b.len() > 2 && b[0] == b'0' && (bytes_to_char(b[1]) == 'x' || bytes_to_char(b[1]) == 'X') {
return i64::from_str_radix(&s[2..], 16).ok();
}
if b.len() > 2 && b[0] == b'0' && (bytes_to_char(b[1]) == 'o' || bytes_to_char(b[1]) == 'O') {
return i64::from_str_radix(&s[2..], 8).ok();
}
if legacy_octal
&& b.len() >= 2
&& b[0] == b'0'
&& b[1..].iter().all(|&c| c.is_ascii_digit() && c <= b'7')
{
return i64::from_str_radix(&s[1..], 8).ok();
}
let start = if b[0] == b'+' || b[0] == b'-' { 1 } else { 0 };
if start >= b.len() {
return None;
}
if b[start..].iter().all(|&c| c.is_ascii_digit()) {
crate::simd::parse_decimal_i64(b)
} else {
None
}
}
fn bytes_to_char(b: u8) -> char {
b as char
}
fn extract_mapping_body(buf: &[BufferedEvent]) -> Option<&[BufferedEvent]> {
if buf.len() < 2
|| !matches!(buf.first(), Some(BufferedEvent::MapStart))
|| !matches!(buf.last(), Some(BufferedEvent::MapEnd))
{
None
} else {
Some(&buf[1..buf.len() - 1])
}
}
fn collect_keys(body: &[BufferedEvent], seen: &mut FxHashSet<String>) -> Option<()> {
let mut i = 0;
while i < body.len() {
if let BufferedEvent::Scalar { value, .. } = &body[i] {
let _ = seen.insert(value.clone());
} else {
return None;
}
i += 1;
if i < body.len() {
let len = skip_buffered_value(&body[i..]);
if len == 0 {
return None;
}
i += len;
} else {
return None;
}
}
Some(())
}
fn skip_buffered_value(slice: &[BufferedEvent]) -> usize {
if slice.is_empty() {
return 0;
}
match &slice[0] {
BufferedEvent::Scalar { .. } | BufferedEvent::Alias { .. } => 1,
BufferedEvent::SeqStart => {
let mut d = 1;
let mut i = 1;
while i < slice.len() && d > 0 {
match &slice[i] {
BufferedEvent::SeqStart => d += 1,
BufferedEvent::SeqEnd => d -= 1,
_ => {}
}
i += 1;
}
i
}
BufferedEvent::MapStart => {
let mut d = 1;
let mut i = 1;
while i < slice.len() && d > 0 {
match &slice[i] {
BufferedEvent::MapStart => d += 1,
BufferedEvent::MapEnd => d -= 1,
_ => {}
}
i += 1;
}
i
}
_ => 1,
}
}
fn extract_local_keys(buf: &[BufferedEvent]) -> FxHashSet<String> {
let mut keys = FxHashSet::default();
let mut d: usize = 0;
let mut key = true;
for ev in buf {
match ev {
BufferedEvent::Scalar { value, .. } => {
if d == 0 {
if key {
let _ = keys.insert(value.clone());
}
key = !key;
}
}
BufferedEvent::Alias { .. } => {
if d == 0 {
key = !key;
}
}
BufferedEvent::MapStart | BufferedEvent::SeqStart => d += 1,
BufferedEvent::MapEnd | BufferedEvent::SeqEnd => {
if d == 1 {
key = true;
}
d = d.saturating_sub(1);
}
}
}
keys
}
fn filter_merge_entries(
inner: &[BufferedEvent],
local: &FxHashSet<String>,
) -> Option<SmallVec<[BufferedEvent; SMALL_VEC_SIZE]>> {
let mut out = SmallVec::with_capacity(inner.len());
let mut i = 0;
while i < inner.len() {
let key = if let BufferedEvent::Scalar { value, .. } = &inner[i] {
value.clone()
} else {
return None;
};
let start = i;
i += 1;
if i >= inner.len() {
return None;
}
let len = skip_buffered_value(&inner[i..]);
if len == 0 {
return None;
}
let end = i + len;
if !local.contains(&key) {
for ev in &inner[start..end] {
out.push(ev.clone());
}
}
i = end;
}
Some(out)
}