use std::io::Read;
use std::marker::PhantomData;
#[doc(hidden)]
pub use paste::item as paste_item;
use serde::de::DeserializeOwned;
use tuple_utils::Prepend;
use crate::de::{DeserializeError, Deserializer};
use crate::parser::{EventCode, Parser};
pub trait DeTuple {
type Output;
fn detuple(self) -> Self::Output;
}
impl<A> DeTuple for (A,) {
type Output = A;
fn detuple(self) -> Self::Output { self.0 }
}
macro_rules! detuple_impls {
($i:ident, $j:ident) => {
detuple_impls!(impl $i, $j);
};
($i:ident, $j:ident, $($r_i:ident),+) => {
detuple_impls!(impl $i, $j, $($r_i),+);
detuple_impls!($j, $($r_i),+);
};
(impl $($i:ident),+) => {
impl<$($i),+> DeTuple for ($($i),+) {
type Output = ($($i),+);
fn detuple(self) -> Self::Output { self }
}
};
}
detuple_impls!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
macro_rules! try_some {
($e:expr) => {
match $e {
Ok(v) => v,
Err(err) => return Some(Err(core::convert::From::from(err)))
}
}
}
pub trait TagMatcher {
fn matches(&self, tag_name: &str) -> bool;
}
#[derive(Debug, PartialEq)]
pub struct ExactTagMatch {
needle: &'static str,
}
impl TagMatcher for ExactTagMatch {
#[inline(always)]
fn matches(&self, tag_name: &str) -> bool {
self.needle == tag_name
}
}
#[derive(Debug, Default, PartialEq)]
pub struct AnyTagMatch {}
impl TagMatcher for AnyTagMatch {
#[inline(always)]
fn matches(&self, _tag_name: &str) -> bool {
true
}
}
#[derive(Debug)]
pub struct ElementEnter<M, N> {
tag_matcher: M,
next: N,
entered: bool,
}
impl<M: Default, N: Default> Default for ElementEnter<M, N> {
fn default() -> Self {
Self {
tag_matcher: M::default(),
next: N::default(),
entered: false,
}
}
}
impl<M, N> ElementEnter<M, N> {
pub fn new(tag_matcher: M, next: N) -> Self {
Self { tag_matcher, next, entered: false }
}
}
impl<N> ElementEnter<ExactTagMatch, N> {
pub fn tag(tag: &'static str, next: N) -> Self {
Self::new(ExactTagMatch { needle: tag }, next)
}
}
impl<N> ElementEnter<AnyTagMatch, N> {
pub fn any(next: N) -> Self {
Self::new(AnyTagMatch {}, next)
}
}
#[derive(Debug)]
pub struct ElementEnterDeserialize<T, M, N> {
tag_matcher: M,
next: N,
entered: Option<T>,
}
impl<T, M: Default, N: Default> Default for ElementEnterDeserialize<T, M, N> {
fn default() -> Self {
Self {
tag_matcher: M::default(),
next: N::default(),
entered: None,
}
}
}
impl<T, M, N> ElementEnterDeserialize<T, M, N> {
pub fn new(tag_matcher: M, next: N) -> Self {
Self { tag_matcher, next, entered: None }
}
}
impl<T, N> ElementEnterDeserialize<T, ExactTagMatch, N> {
pub fn tag(tag: &'static str, next: N) -> Self {
Self::new(ExactTagMatch { needle: tag }, next)
}
}
impl<T, N> ElementEnterDeserialize<T, AnyTagMatch, N> {
pub fn any(next: N) -> Self {
Self::new(AnyTagMatch {}, next)
}
}
#[derive(Debug, PartialEq)]
pub struct ElementDeserialize<T: DeserializeOwned, M> {
tag_matcher: M,
_phantom: PhantomData<T>,
}
impl<T: DeserializeOwned, M: Default> Default for ElementDeserialize<T, M> {
fn default() -> Self {
Self {
tag_matcher: M::default(),
_phantom: PhantomData,
}
}
}
impl<T: DeserializeOwned, M> ElementDeserialize<T, M> {
pub fn new(tag_matcher: M) -> Self {
Self { tag_matcher, _phantom: PhantomData }
}
}
impl<T: DeserializeOwned> ElementDeserialize<T, ExactTagMatch> {
pub fn tag(tag: &'static str) -> Self {
Self::new(ExactTagMatch { needle: tag })
}
}
impl<T: DeserializeOwned> ElementDeserialize<T, AnyTagMatch> {
pub fn any() -> Self {
Self::new(AnyTagMatch {})
}
}
mod private {
use serde::de::DeserializeOwned;
pub trait Sealed {}
impl<N, M> Sealed for super::ElementEnter<N, M> {}
impl<T: DeserializeOwned, N, M> Sealed for super::ElementEnterDeserialize<T, N, M> {}
impl<T: DeserializeOwned, M> Sealed for super::ElementDeserialize<T, M> {}
}
#[doc(hidden)]
pub trait XmlPath: private::Sealed {
type Output: DeTuple;
fn go<R: Read>(&mut self, parser: &mut Parser<R>) -> Option<Result<Self::Output, DeserializeError>>;
}
impl<M: TagMatcher, N: XmlPath> XmlPath for ElementEnter<M, N> {
type Output = N::Output;
fn go<R: Read>(&mut self, parser: &mut Parser<R>) -> Option<Result<Self::Output, DeserializeError>> {
loop {
if self.entered {
if let Some(out) = self.next.go(parser) {
return Some(out);
}
}
self.entered = false;
let mut event = try_some!(parser.next());
match event.code() {
EventCode::StartTag => {
let tag_name = try_some!(event.get_str());
if self.tag_matcher.matches(tag_name.as_ref()) {
self.entered = true;
} else {
try_some!(parser.finish_tag(1));
}
},
EventCode::EndTagImmediate | EventCode::EndTag | EventCode::Eof => {
return None;
},
EventCode::AttributeName | EventCode::AttributeValue | EventCode::Text => {}
}
}
}
}
impl<T: DeserializeOwned + Clone, M: TagMatcher, N: XmlPath> XmlPath for ElementEnterDeserialize<T, M, N>
where N::Output: Prepend<T>, <N::Output as Prepend<T>>::Output: DeTuple
{
type Output = <N::Output as Prepend<T>>::Output;
fn go<R: Read>(&mut self, parser: &mut Parser<R>) -> Option<Result<Self::Output, DeserializeError>> {
loop {
if let Some(entered) = &self.entered {
if let Some(out) = self.next.go(parser) {
return match out {
Ok(out) => {
Some(Ok(out.prepend((*entered).clone())))
}
Err(err) => Some(Err(err))
};
}
}
self.entered = None;
let mut event = try_some!(parser.next());
match event.code() {
EventCode::StartTag => {
let tag_name = try_some!(event.get_str());
if self.tag_matcher.matches(&tag_name) {
let opening_tag = tag_name.into();
let mut des = Deserializer::new_inside_tag(parser, opening_tag, true);
match T::deserialize(&mut des) {
Ok(value) => {
self.entered = Some(value);
}
Err(DeserializeError::UnexpectedEndTag) => {
}
Err(err) => {
return Some(Err(err));
}
}
} else {
try_some!(parser.finish_tag(1));
}
},
EventCode::EndTagImmediate | EventCode::EndTag | EventCode::Eof => {
return None;
},
EventCode::AttributeName | EventCode::AttributeValue | EventCode::Text => {}
}
}
}
}
impl<T: DeserializeOwned, M: TagMatcher> XmlPath for ElementDeserialize<T, M> {
type Output = (T,);
fn go<R: Read>(&mut self, parser: &mut Parser<R>) -> Option<Result<Self::Output, DeserializeError>> {
loop {
let mut event = try_some!(parser.next());
match event.code() {
EventCode::StartTag => {
let tag_name = try_some!(event.get_str());
if self.tag_matcher.matches(tag_name.as_ref()) {
let opening_tag = tag_name.into();
let mut des = Deserializer::new_inside_tag(parser, opening_tag, false);
return Some(Ok((try_some!(T::deserialize(&mut des)), )))
}
},
EventCode::EndTagImmediate | EventCode::EndTag => {
return None;
},
EventCode::Eof => {
return Some(Err(DeserializeError::UnexpectedEof));
},
EventCode::AttributeName | EventCode::AttributeValue | EventCode::Text => {}
}
}
}
}
pub struct TreeDeserializer<R: Read, N> {
parser: Parser<R>,
path: N,
}
pub type TreeDeserializerOutput<N> = <<N as XmlPath>::Output as DeTuple>::Output;
impl<R: Read, N: XmlPath> TreeDeserializer<R, N> {
pub fn from_path(path: N, parser: Parser<R>) -> Self {
Self {
parser,
path,
}
}
pub fn from_path_and_reader(path: N, reader: R) -> Self {
Self::from_path(path, Parser::new(reader))
}
}
impl<R: Read, N: XmlPath + Default> TreeDeserializer<R, N> {
pub fn new(parser: Parser<R>) -> Self {
Self {
parser,
path: N::default(),
}
}
pub fn from_reader(reader: R) -> Self {
Self::new(Parser::new(reader))
}
}
impl<R: Read, N: XmlPath> Iterator for TreeDeserializer<R, N> where N::Output: DeTuple {
type Item = Result<<N::Output as DeTuple>::Output, DeserializeError>;
fn next(&mut self) -> Option<Self::Item> {
match self.path.go(&mut self.parser) {
Some(Ok(tuple)) => Some(Ok(tuple.detuple())),
Some(Err(err)) => Some(Err(err)),
None => None,
}
}
}
#[macro_export]
macro_rules! xml_path {
($tag_name:literal => $t:ty) => {
$crate::tree::ElementDeserialize::<$t, _>::tag($tag_name)
};
(* => $t:ty) => {
$crate::tree::ElementDeserialize::<$t, _>::any()
};
($tag_name:literal) => { $crate::xml_path!(*) };
(*) => { compile_error!("Paths must end with `\"tag_name\" => Type` expression.") };
($tag_name:literal => $t:ty, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::ElementEnterDeserialize::<$t, _, _>::tag($tag_name,
$crate::xml_path!($($r_tag_name $(=> $r_t)?),+)
)
};
(* => $t:ty, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::ElementEnterDeserialize::<$t, _, _>::any(
$crate::xml_path!($($r_tag_name $(=> $r_t)?),+)
)
};
($tag_name:literal, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::ElementEnter::tag($tag_name,
$crate::xml_path!($($r_tag_name $(=> $r_t)?),+)
)
};
(*, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::ElementEnter::any(
$crate::xml_path!($($r_tag_name $(=> $r_t)?),+)
)
};
}
#[macro_export]
macro_rules! xml_path_type {
($type_name:ident : $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::paste_item! {
#[allow(non_snake_case)]
#[doc(hidden)]
mod [<$type_name __rapid_xml_generated_matchers>] {
$crate::xml_path_type!(@structs A $($r_tag_name $(=> $r_t)?),+);
}
}
type $type_name = $crate::xml_path_type!(@types $type_name A $($r_tag_name $(=> $r_t)?),+);
};
(@structs $suffix:ident $tag_name:literal $(=> $t:ty)?) => {
$crate::tree::paste_item! {
#[derive(Debug, Default, PartialEq)]
pub struct [<$($t)? Matcher $suffix>] {}
impl $crate::tree::TagMatcher for [<$($t)? Matcher $suffix>] {
#[inline(always)]
fn matches(&self, tag_name: &str) -> bool {
tag_name == $tag_name
}
}
}
};
(@structs $suffix:ident * $(=> $t:ty)?) => {
};
(@structs $suffix:ident $tag_name:literal $(=> $t:ty)?, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::paste_item! {
$crate::xml_path_type!(@structs $suffix $tag_name $(=> $t)?);
$crate::xml_path_type!(@structs [<$suffix A>] $($r_tag_name $(=> $r_t)?),*);
}
};
(@structs $suffix:ident * $(=> $t:ty)?, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::paste_item! {
$crate::xml_path_type!(@structs [<$suffix A>] $($r_tag_name $(=> $r_t)?),+);
}
};
(@types $type_name:ident $suffix:ident $tag_name:literal => $t:ty) => {
$crate::tree::paste_item! {
$crate::tree::ElementDeserialize<$t, [<$type_name __rapid_xml_generated_matchers>]::[<$t Matcher $suffix>]>
}
};
(@types $type_name:ident $suffix:ident * => $t:ty) => {
$crate::tree::ElementDeserialize::<$t, $crate::tree::AnyTagMatch>
};
(@types $type_name:ident $suffix:ident $tag_name:literal) => { $crate::xml_path_type!(*) };
(@types $type_name:ident $suffix:ident *) => { compile_error!("Paths must end with `\"tag_name\" => Type` expression.") };
(@types $type_name:ident $suffix:ident $tag_name:literal => $t:ty, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::paste_item! {
$crate::tree::ElementEnterDeserialize<$t, [<$type_name __rapid_xml_generated_matchers>]::[<$t Matcher $suffix>],
$crate::xml_path_type!(@types $type_name [<$suffix A>] $($r_tag_name $(=> $r_t)?),+)
>
}
};
(@types $type_name:ident $suffix:ident * => $t:ty, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::paste_item! {
$crate::tree::ElementEnterDeserialize::<$t, $crate::tree::AnyTagMatch,
$crate::xml_path_type!(@types $type_name [<$suffix A>] $($r_tag_name $(=> $r_t)?),+)
>
}
};
(@types $type_name:ident $suffix:ident $tag_name:literal, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::paste_item! {
$crate::tree::ElementEnter<[<$type_name __rapid_xml_generated_matchers>]::[<Matcher $suffix>],
$crate::xml_path_type!(@types $type_name [<$suffix A>] $($r_tag_name $(=> $r_t)?),+)
>
}
};
(@types $type_name:ident $suffix:ident *, $($r_tag_name:tt $(=> $r_t:ty)?),+) => {
$crate::tree::paste_item! {
$crate::tree::ElementEnter::<$crate::tree::AnyTagMatch,
$crate::xml_path_type!(@types $type_name [<$suffix A>] $($r_tag_name $(=> $r_t)?),+)
>
}
};
}
#[cfg(test)]
mod tests {
use std::io::Cursor;
use serde_derive::Deserialize;
use super::*;
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct Root {
xyz: u32,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct Bbb {
n: u32,
}
#[derive(Debug, Deserialize, PartialEq, Eq)]
struct Ccc {
m: u32,
}
const SAMPLE_XML: &[u8] = br#"
<root xyz="42">
<aaa>
<bbb n="1">
<ccc m="100"/>
<ccc m="200"/>
</bbb>
<xxx>Unknown tag</xxx>
</aaa>
<xxx>Unknown tag</xxx>
<aaa>
<bbb n="99"/>
</aaa>
<aaa/>
<aaa>
<bbb n="99">Matched tag without anything nested</bbb>
<bbb n="2">
<ccc><m>300</m></ccc>
<ccc><m>400</m></ccc>
</bbb>
</aaa>
<aaa2>
<bbb n="3">
<ccc m="500"/>
</bbb>
</aaa2>
</root>
"#;
const SAMPLE_XML_ERRORS: &[u8] = br#"
<root xyz="42">
<aaa>
<bbb n="1">
<ccc m="100"/>
<ccc/>
<ccc m="200"/>
<ccc m=250/>
</bbb>
<xxx>Unknown tag</xxx>
</aaa>
<xxx>Unknown tag</xxx>
<aaa>
<bbb n="99">Matched tag without anything nested</bbb>
<bbb n="2">
<ccc><m>300</m></ccc>
<ccc><m>asdf</m></ccc>
</bbb>
</aaa>
</root>
"#;
#[test]
fn basic() {
let path = xml_path!("root", "aaa", "bbb", "ccc" => Ccc);
let mut des = TreeDeserializer::from_path_and_reader(path, Cursor::new(&SAMPLE_XML[..]));
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 100 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 200 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 300 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 400 });
assert!(des.next().is_none());
}
#[test]
fn basic_100_times() {
let path = xml_path!("root", "aaa", "bbb", "ccc" => Ccc);
let xml = SAMPLE_XML.repeat(100);
let mut des = TreeDeserializer::from_path_and_reader(path, Cursor::new(&xml));
for _ in 0..100 {
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 100 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 200 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 300 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 400 });
}
assert!(des.next().is_none());
}
#[test]
fn wildcard() {
let path = xml_path!("root", *, "bbb", "ccc" => Ccc);
let mut des = TreeDeserializer::from_path_and_reader(path, Cursor::new(&SAMPLE_XML[..]));
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 100 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 200 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 300 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 400 });
assert_eq!(des.next().unwrap().unwrap(), Ccc { m: 500 });
assert!(des.next().is_none());
}
#[test]
fn multiple_elements() {
let path = xml_path!("root", "aaa", "bbb" => Bbb, "ccc" => Ccc);
let mut des = TreeDeserializer::from_path_and_reader(path, Cursor::new(&SAMPLE_XML[..]));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 1 }, Ccc { m: 100 }));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 1 }, Ccc { m: 200 }));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 2 }, Ccc { m: 300 }));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 2 }, Ccc { m: 400 }));
assert!(des.next().is_none());
}
xml_path_type!(MyPath: "root", "aaa", "bbb" => Bbb, "ccc" => Ccc);
#[test]
fn multiple_elements_with_type() {
let mut des = TreeDeserializer::<_, MyPath>::from_reader(Cursor::new(&SAMPLE_XML[..]));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 1 }, Ccc { m: 100 }));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 1 }, Ccc { m: 200 }));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 2 }, Ccc { m: 300 }));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 2 }, Ccc { m: 400 }));
assert!(des.next().is_none());
}
#[test]
fn with_errors() {
let path = xml_path!("root", "aaa", "bbb" => Bbb, "ccc" => Ccc);
let mut des = TreeDeserializer::from_path_and_reader(path, Cursor::new(&SAMPLE_XML_ERRORS[..]));
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 1 }, Ccc { m: 100 }));
assert!(des.next().unwrap().is_err());
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 1 }, Ccc { m: 200 }));
assert!(des.next().unwrap().is_err());
assert_eq!(des.next().unwrap().unwrap(), (Bbb { n: 2 }, Ccc { m: 300 }));
assert!(des.next().unwrap().is_err());
assert!(des.next().is_none());
}
#[test]
fn parse_root() {
let path = xml_path!("root" => Root, "aaa", "bbb", "ccc" => Ccc);
let mut des = TreeDeserializer::from_path_and_reader(path, Cursor::new(&SAMPLE_XML[..]));
assert_eq!(des.next().unwrap().unwrap(), (Root { xyz: 42, }, Ccc { m: 100 }));
assert_eq!(des.next().unwrap().unwrap(), (Root { xyz: 42, }, Ccc { m: 200 }));
assert_eq!(des.next().unwrap().unwrap(), (Root { xyz: 42, }, Ccc { m: 300 }));
assert_eq!(des.next().unwrap().unwrap(), (Root { xyz: 42, }, Ccc { m: 400 }));
assert!(des.next().is_none());
}
}
#[cfg(test)]
#[cfg(feature = "bencher")]
mod bench {
use std::io::Cursor;
use test::{Bencher, black_box};
use serde_derive::Deserialize;
use super::*;
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
pub enum Thing {
VariantA {
some_field: String,
},
VariantB {
some_field: String,
another_field: Option<u32>,
abcd_efgh: Option<u32>,
mumble_rumble: i16,
short: Option<String>,
field_with_long_name: Option<u32>,
xyz_qwerty: Option<u32>,
},
VariantC {
some_field: String,
field_1: u32,
field_2: i16,
field_3: u32,
},
}
const BENCH_XML: &[u8] = br#"<root><group>
<VariantA some_field="TextAbcd"/>
<VariantB some_field="TextAbcd"><another_field>80</another_field><abcd_efgh>4587</abcd_efgh><mumble_rumble>-8</mumble_rumble><short>AnotherText</short><field_with_long_name>79452</field_with_long_name></VariantB>
<VariantC some_field="TextAbcd"><field_1>123</field_1><field_2>-3</field_2><field_3>567</field_3></VariantC>
</group></root>"#;
#[bench]
fn bench_tree_deserializer(b: &mut Bencher) {
let xml = BENCH_XML.repeat(10000);
b.iter(move || {
let path = xml_path!("root", "group", * => Thing);
let mut des = TreeDeserializer::from_path_and_reader(path, Cursor::new(&xml));
while let Some(item) = des.next() {
black_box(item.unwrap());
}
});
}
}