#[cfg(feature = "encoding")]
use encoding_rs::Encoding;
use std::ops::Range;
use crate::encoding::Decoder;
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::reader::state::ReaderState;
use memchr;
macro_rules! configure_methods {
($($holder:ident)?) => {
pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self {
self $(.$holder)? .state.expand_empty_elements = val;
self
}
pub fn trim_text(&mut self, val: bool) -> &mut Self {
self $(.$holder)? .state.trim_text_start = val;
self $(.$holder)? .state.trim_text_end = val;
self
}
pub fn trim_text_end(&mut self, val: bool) -> &mut Self {
self $(.$holder)? .state.trim_text_end = val;
self
}
pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Self {
self $(.$holder)? .state.trim_markup_names_in_closing_tags = val;
self
}
pub fn check_end_names(&mut self, val: bool) -> &mut Self {
self $(.$holder)? .state.check_end_names = val;
self
}
pub fn check_comments(&mut self, val: bool) -> &mut Self {
self $(.$holder)? .state.check_comments = val;
self
}
};
}
macro_rules! read_event_impl {
(
$self:ident, $buf:ident,
$reader:expr,
$read_until_open:ident,
$read_until_close:ident
$(, $await:ident)?
) => {{
let event = loop {
match $self.state.state {
ParseState::Init => { #[cfg(feature = "encoding")]
if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
if $self.state.encoding.can_be_refined() {
$self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding);
}
}
#[cfg(not(feature = "encoding"))]
$reader.remove_utf8_bom() $(.$await)? ?;
match $self.$read_until_open($buf) $(.$await)? {
Ok(Ok(ev)) => break Ok(ev),
Ok(Err(b)) => $buf = b,
Err(err) => break Err(err),
}
},
ParseState::ClosedTag => { match $self.$read_until_open($buf) $(.$await)? {
Ok(Ok(ev)) => break Ok(ev),
Ok(Err(b)) => $buf = b,
Err(err) => break Err(err),
}
},
ParseState::OpenedTag => break $self.$read_until_close($buf) $(.$await)?,
ParseState::Empty => break $self.state.close_expanded_empty(),
ParseState::Exit => break Ok(Event::Eof),
};
};
match event {
Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Exit,
_ => {}
}
event
}};
}
macro_rules! read_until_open {
(
$self:ident, $buf:ident,
$reader:expr,
$read_event:ident
$(, $await:ident)?
) => {{
$self.state.state = ParseState::OpenedTag;
if $self.state.trim_text_start {
$reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
}
if $reader.skip_one(b'<', &mut $self.state.offset) $(.$await)? ? {
return Ok(Err($buf));
}
match $reader
.read_bytes_until(b'<', $buf, &mut $self.state.offset)
$(.$await)?
{
Ok(Some(bytes)) => $self.state.emit_text(bytes).map(Ok),
Ok(None) => Ok(Ok(Event::Eof)),
Err(e) => Err(e),
}
}};
}
macro_rules! read_until_close {
(
$self:ident, $buf:ident,
$reader:expr
$(, $await:ident)?
) => {{
$self.state.state = ParseState::ClosedTag;
match $reader.peek_one() $(.$await)? {
Ok(Some(b'!')) => match $reader
.read_bang_element($buf, &mut $self.state.offset)
$(.$await)?
{
Ok(None) => Ok(Event::Eof),
Ok(Some((bang_type, bytes))) => $self.state.emit_bang(bang_type, bytes),
Err(e) => Err(e),
},
Ok(Some(b'/')) => match $reader
.read_bytes_until(b'>', $buf, &mut $self.state.offset)
$(.$await)?
{
Ok(None) => Ok(Event::Eof),
Ok(Some(bytes)) => $self.state.emit_end(bytes),
Err(e) => Err(e),
},
Ok(Some(b'?')) => match $reader
.read_bytes_until(b'>', $buf, &mut $self.state.offset)
$(.$await)?
{
Ok(None) => Ok(Event::Eof),
Ok(Some(bytes)) => $self.state.emit_question_mark(bytes),
Err(e) => Err(e),
},
Ok(Some(_)) => match $reader
.read_element($buf, &mut $self.state.offset)
$(.$await)?
{
Ok(None) => Ok(Event::Eof),
Ok(Some(bytes)) => $self.state.emit_start(bytes),
Err(e) => Err(e),
},
Ok(None) => Ok(Event::Eof),
Err(e) => Err(e),
}
}};
}
macro_rules! read_to_end {
(
$self:expr, $end:expr, $buf:expr,
$read_event:ident,
// Code block that performs clearing of internal buffer after read of each event
$clear:block
$(, $await:ident)?
) => {{
let start = $self.buffer_position();
let mut depth = 0;
loop {
$clear
let end = $self.buffer_position();
match $self.$read_event($buf) $(.$await)? {
Err(e) => return Err(e),
Ok(Event::Start(e)) if e.name() == $end => depth += 1,
Ok(Event::End(e)) if e.name() == $end => {
if depth == 0 {
break start..end;
}
depth -= 1;
}
Ok(Event::Eof) => {
let name = $self.decoder().decode($end.as_ref());
return Err(Error::UnexpectedEof(format!("</{:?}>", name)));
}
_ => (),
}
}
}};
}
#[cfg(feature = "async-tokio")]
mod async_tokio;
mod buffered_reader;
mod ns_reader;
mod slice_reader;
mod state;
pub use ns_reader::NsReader;
pub type Span = Range<usize>;
#[derive(Clone)]
enum ParseState {
Init,
OpenedTag,
ClosedTag,
Empty,
Exit,
}
#[cfg(feature = "encoding")]
#[derive(Clone, Copy)]
enum EncodingRef {
Implicit(&'static Encoding),
Explicit(&'static Encoding),
BomDetected(&'static Encoding),
XmlDetected(&'static Encoding),
}
#[cfg(feature = "encoding")]
impl EncodingRef {
#[inline]
fn encoding(&self) -> &'static Encoding {
match self {
Self::Implicit(e) => e,
Self::Explicit(e) => e,
Self::BomDetected(e) => e,
Self::XmlDetected(e) => e,
}
}
#[inline]
fn can_be_refined(&self) -> bool {
match self {
Self::Implicit(_) | Self::BomDetected(_) => true,
Self::Explicit(_) | Self::XmlDetected(_) => false,
}
}
}
#[derive(Clone)]
pub struct Reader<R> {
reader: R,
state: ReaderState,
}
impl<R> Reader<R> {
pub fn from_reader(reader: R) -> Self {
Self {
reader,
state: ReaderState::default(),
}
}
configure_methods!();
}
impl<R> Reader<R> {
pub fn into_inner(self) -> R {
self.reader
}
pub fn get_ref(&self) -> &R {
&self.reader
}
pub fn get_mut(&mut self) -> &mut R {
&mut self.reader
}
pub fn buffer_position(&self) -> usize {
if let ParseState::OpenedTag = self.state.state {
self.state.offset - 1
} else {
self.state.offset
}
}
#[inline]
pub fn decoder(&self) -> Decoder {
self.state.decoder()
}
}
impl<R> Reader<R> {
fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>>
where
R: XmlSource<'i, B>,
{
read_event_impl!(self, buf, self.reader, read_until_open, read_until_close)
}
fn read_until_open<'i, B>(&mut self, buf: B) -> Result<std::result::Result<Event<'i>, B>>
where
R: XmlSource<'i, B>,
{
read_until_open!(self, buf, self.reader, read_event_impl)
}
fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>>
where
R: XmlSource<'i, B>,
{
read_until_close!(self, buf, self.reader)
}
}
trait XmlSource<'r, B> {
#[cfg(not(feature = "encoding"))]
fn remove_utf8_bom(&mut self) -> Result<()>;
#[cfg(feature = "encoding")]
fn detect_encoding(&mut self) -> Result<Option<&'static Encoding>>;
fn read_bytes_until(
&mut self,
byte: u8,
buf: B,
position: &mut usize,
) -> Result<Option<&'r [u8]>>;
fn read_bang_element(
&mut self,
buf: B,
position: &mut usize,
) -> Result<Option<(BangType, &'r [u8])>>;
fn read_element(&mut self, buf: B, position: &mut usize) -> Result<Option<&'r [u8]>>;
fn skip_whitespace(&mut self, position: &mut usize) -> Result<()>;
fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result<bool>;
fn peek_one(&mut self) -> Result<Option<u8>>;
}
#[derive(Debug, PartialEq)]
enum BangType {
CData,
Comment,
DocType,
}
impl BangType {
#[inline(always)]
fn new(byte: Option<u8>) -> Result<Self> {
Ok(match byte {
Some(b'[') => Self::CData,
Some(b'-') => Self::Comment,
Some(b'D') | Some(b'd') => Self::DocType,
Some(b) => return Err(Error::UnexpectedBang(b)),
None => return Err(Error::UnexpectedEof("Bang".to_string())),
})
}
#[inline(always)]
fn parse<'b>(&self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
for i in memchr::memchr_iter(b'>', chunk) {
match self {
Self::Comment if buf.len() + i > 4 => {
if chunk[..i].ends_with(b"--") {
return Some((&chunk[..i], i + 1)); }
if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
return Some((&chunk[..i], i + 1)); }
if i == 0 && buf.ends_with(b"--") {
return Some((&[], i + 1)); }
}
Self::Comment => {}
Self::CData => {
if chunk[..i].ends_with(b"]]") {
return Some((&chunk[..i], i + 1)); }
if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
return Some((&chunk[..i], i + 1)); }
if i == 0 && buf.ends_with(b"]]") {
return Some((&[], i + 1)); }
}
Self::DocType => {
let content = &chunk[..i];
let balance = memchr::memchr2_iter(b'<', b'>', content)
.map(|p| if content[p] == b'<' { 1i32 } else { -1 })
.sum::<i32>();
if balance == 0 {
return Some((content, i + 1)); }
}
}
}
None
}
#[inline]
fn to_err(&self) -> Error {
let bang_str = match self {
Self::CData => "CData",
Self::Comment => "Comment",
Self::DocType => "DOCTYPE",
};
Error::UnexpectedEof(bang_str.to_string())
}
}
#[derive(Clone, Copy)]
enum ReadElementState {
Elem,
SingleQ,
DoubleQ,
}
impl ReadElementState {
#[inline(always)]
fn change<'b>(&mut self, chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
for i in memchr::memchr3_iter(b'>', b'\'', b'"', chunk) {
*self = match (*self, chunk[i]) {
(Self::Elem, b'>') => return Some((&chunk[..i], i + 1)),
(Self::Elem, b'\'') => Self::SingleQ,
(Self::Elem, b'\"') => Self::DoubleQ,
(Self::SingleQ, b'\'') | (Self::DoubleQ, b'"') => Self::Elem,
_ => *self,
};
}
None
}
}
#[inline]
pub(crate) const fn is_whitespace(b: u8) -> bool {
matches!(b, b' ' | b'\r' | b'\n' | b'\t')
}
#[cfg(test)]
mod test {
macro_rules! check {
(
#[$test:meta]
$read_event:ident,
$read_until_close:ident,
// constructor of the XML source on which internal functions will be called
$source:path,
// constructor of the buffer to which read data will stored
$buf:expr
$(, $async:ident, $await:ident)?
) => {
mod read_bytes_until {
use super::*;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = b"".as_ref();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
None
);
assert_eq!(position, 0);
}
#[$test]
$($async)? fn non_existent() {
let buf = $buf;
let mut position = 0;
let mut input = b"abcdef".as_ref();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
Some(Bytes(b"abcdef"))
);
assert_eq!(position, 6);
}
#[$test]
$($async)? fn at_the_start() {
let buf = $buf;
let mut position = 0;
let mut input = b"*abcdef".as_ref();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
Some(Bytes(b""))
);
assert_eq!(position, 1); }
#[$test]
$($async)? fn inside() {
let buf = $buf;
let mut position = 0;
let mut input = b"abc*def".as_ref();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
Some(Bytes(b"abc"))
);
assert_eq!(position, 4); }
#[$test]
$($async)? fn in_the_end() {
let buf = $buf;
let mut position = 0;
let mut input = b"abcdef*".as_ref();
assert_eq!(
$source(&mut input)
.read_bytes_until(b'*', buf, &mut position)
$(.$await)?
.unwrap()
.map(Bytes),
Some(Bytes(b"abcdef"))
);
assert_eq!(position, 7); }
}
mod read_bang_element {
use super::*;
mod cdata {
use super::*;
use crate::errors::Error;
use crate::reader::BangType;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
#[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
$($async)? fn not_properly_start() {
let buf = $buf;
let mut position = 0;
let mut input = b"![]]>other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "CData" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn not_closed() {
let buf = $buf;
let mut position = 0;
let mut input = b"![CDATA[other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "CData" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = b"![CDATA[]]>other content".as_ref();
assert_eq!(
$source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap()
.map(|(ty, data)| (ty, Bytes(data))),
Some((BangType::CData, Bytes(b"![CDATA[]]")))
);
assert_eq!(position, 11);
}
#[$test]
$($async)? fn with_content() {
let buf = $buf;
let mut position = 0;
let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref();
assert_eq!(
$source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap()
.map(|(ty, data)| (ty, Bytes(data))),
Some((BangType::CData, Bytes(b"![CDATA[cdata]] ]>content]]")))
);
assert_eq!(position, 28);
}
}
mod comment {
use super::*;
use crate::errors::Error;
use crate::reader::BangType;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
#[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
$($async)? fn not_properly_start() {
let buf = $buf;
let mut position = 0;
let mut input = b"!- -->other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn not_properly_end() {
let buf = $buf;
let mut position = 0;
let mut input = b"!->other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn not_closed1() {
let buf = $buf;
let mut position = 0;
let mut input = b"!--other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn not_closed2() {
let buf = $buf;
let mut position = 0;
let mut input = b"!-->other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn not_closed3() {
let buf = $buf;
let mut position = 0;
let mut input = b"!--->other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = b"!---->other content".as_ref();
assert_eq!(
$source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap()
.map(|(ty, data)| (ty, Bytes(data))),
Some((BangType::Comment, Bytes(b"!----")))
);
assert_eq!(position, 6);
}
#[$test]
$($async)? fn with_content() {
let buf = $buf;
let mut position = 0;
let mut input = b"!--->comment<--->other content".as_ref();
assert_eq!(
$source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap()
.map(|(ty, data)| (ty, Bytes(data))),
Some((BangType::Comment, Bytes(b"!--->comment<---")))
);
assert_eq!(position, 17);
}
}
mod doctype {
use super::*;
mod uppercase {
use super::*;
use crate::errors::Error;
use crate::reader::BangType;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn not_properly_start() {
let buf = $buf;
let mut position = 0;
let mut input = b"!D other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn without_space() {
let buf = $buf;
let mut position = 0;
let mut input = b"!DOCTYPEother content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = b"!DOCTYPE>other content".as_ref();
assert_eq!(
$source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap()
.map(|(ty, data)| (ty, Bytes(data))),
Some((BangType::DocType, Bytes(b"!DOCTYPE")))
);
assert_eq!(position, 9);
}
#[$test]
$($async)? fn not_closed() {
let buf = $buf;
let mut position = 0;
let mut input = b"!DOCTYPE other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
}
mod lowercase {
use super::*;
use crate::errors::Error;
use crate::reader::BangType;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn not_properly_start() {
let buf = $buf;
let mut position = 0;
let mut input = b"!d other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn without_space() {
let buf = $buf;
let mut position = 0;
let mut input = b"!doctypeother content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = b"!doctype>other content".as_ref();
assert_eq!(
$source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap()
.map(|(ty, data)| (ty, Bytes(data))),
Some((BangType::DocType, Bytes(b"!doctype")))
);
assert_eq!(position, 9);
}
#[$test]
$($async)? fn not_closed() {
let buf = $buf;
let mut position = 0;
let mut input = b"!doctype other content".as_ref();
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
x
),
}
assert_eq!(position, 0);
}
}
}
}
mod read_element {
use super::*;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = b"".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
None
);
assert_eq!(position, 0);
}
mod open {
use super::*;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = b">".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(b""))
);
assert_eq!(position, 1);
}
#[$test]
$($async)? fn normal() {
let buf = $buf;
let mut position = 0;
let mut input = b"tag>".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(b"tag"))
);
assert_eq!(position, 4);
}
#[$test]
$($async)? fn empty_ns_empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = b":>".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(b":"))
);
assert_eq!(position, 2);
}
#[$test]
$($async)? fn empty_ns() {
let buf = $buf;
let mut position = 0;
let mut input = b":tag>".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(b":tag"))
);
assert_eq!(position, 5);
}
#[$test]
$($async)? fn with_attributes() {
let buf = $buf;
let mut position = 0;
let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#))
);
assert_eq!(position, 38);
}
}
mod self_closed {
use super::*;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = b"/>".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(b"/"))
);
assert_eq!(position, 2);
}
#[$test]
$($async)? fn normal() {
let buf = $buf;
let mut position = 0;
let mut input = b"tag/>".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(b"tag/"))
);
assert_eq!(position, 5);
}
#[$test]
$($async)? fn empty_ns_empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = b":/>".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(b":/"))
);
assert_eq!(position, 3);
}
#[$test]
$($async)? fn empty_ns() {
let buf = $buf;
let mut position = 0;
let mut input = b":tag/>".as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(b":tag/"))
);
assert_eq!(position, 6);
}
#[$test]
$($async)? fn with_attributes() {
let buf = $buf;
let mut position = 0;
let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref();
assert_eq!(
$source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes),
Some(Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#))
);
assert_eq!(position, 41);
}
}
}
mod issue_344 {
use crate::errors::Error;
use crate::reader::Reader;
#[$test]
$($async)? fn cdata() {
let mut reader = Reader::from_str("![]]>");
match reader.$read_until_close($buf) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "CData" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("CData")`, but result is: {:?}"#,
x
),
}
}
#[$test]
$($async)? fn comment() {
let mut reader = Reader::from_str("!- -->");
match reader.$read_until_close($buf) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "Comment" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("Comment")`, but result is: {:?}"#,
x
),
}
}
#[$test]
$($async)? fn doctype_uppercase() {
let mut reader = Reader::from_str("!D>");
match reader.$read_until_close($buf) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
x
),
}
}
#[$test]
$($async)? fn doctype_lowercase() {
let mut reader = Reader::from_str("!d>");
match reader.$read_until_close($buf) $(.$await)? {
Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {}
x => assert!(
false,
r#"Expected `UnexpectedEof("DOCTYPE")`, but result is: {:?}"#,
x
),
}
}
}
mod $read_event {
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn bom_from_reader() {
let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Text(BytesText::from_escaped("\u{feff}"))
);
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn bom_from_str() {
let mut reader = Reader::from_str("\u{feff}\u{feff}");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Text(BytesText::from_escaped("\u{feff}"))
);
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn declaration() {
let mut reader = Reader::from_str("<?xml ?>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
);
}
#[$test]
$($async)? fn doctype() {
let mut reader = Reader::from_str("<!DOCTYPE x>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::DocType(BytesText::from_escaped("x"))
);
}
#[$test]
$($async)? fn processing_instruction() {
let mut reader = Reader::from_str("<?xml-stylesheet?>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::PI(BytesText::from_escaped("xml-stylesheet"))
);
}
#[$test]
$($async)? fn start() {
let mut reader = Reader::from_str("<tag>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Start(BytesStart::new("tag"))
);
}
#[$test]
$($async)? fn end() {
let mut reader = Reader::from_str("</tag>");
reader.check_end_names(false);
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::End(BytesEnd::new("tag"))
);
}
#[$test]
$($async)? fn empty() {
let mut reader = Reader::from_str("<tag/>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Empty(BytesStart::new("tag"))
);
}
#[$test]
$($async)? fn text() {
let mut reader = Reader::from_str("text");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Text(BytesText::from_escaped("text"))
);
}
#[$test]
$($async)? fn cdata() {
let mut reader = Reader::from_str("<![CDATA[]]>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new(""))
);
}
#[$test]
$($async)? fn comment() {
let mut reader = Reader::from_str("<!---->");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::from_escaped(""))
);
}
#[$test]
$($async)? fn eof() {
let mut reader = Reader::from_str("");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Eof
);
}
}
};
}
macro_rules! small_buffers {
(
#[$test:meta]
$read_event:ident: $BufReader:ty
$(, $async:ident, $await:ident)?
) => {
mod small_buffers {
use crate::events::{BytesCData, BytesDecl, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn decl() {
let xml = "<?xml ?>";
let size = xml.match_indices("?>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn pi() {
let xml = "<?pi?>";
let size = xml.match_indices("?>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::PI(BytesText::new("pi"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn empty() {
let xml = "<empty/>";
let size = xml.match_indices("/>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Empty(BytesStart::new("empty"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn cdata1() {
let xml = "<![CDATA[cdata]]>";
let size = xml.match_indices("]]>").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new("cdata"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn cdata2() {
let xml = "<![CDATA[cdata]]>";
let size = xml.match_indices("]]>").next().unwrap().0 + 2;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new("cdata"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn comment1() {
let xml = "<!--comment-->";
let size = xml.match_indices("-->").next().unwrap().0 + 1;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::new("comment"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn comment2() {
let xml = "<!--comment-->";
let size = xml.match_indices("-->").next().unwrap().0 + 2;
let br = <$BufReader>::with_capacity(size, xml.as_bytes());
let mut reader = Reader::from_reader(br);
let mut buf = Vec::new();
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::new("comment"))
);
assert_eq!(
reader.$read_event(&mut buf) $(.$await)? .unwrap(),
Event::Eof
);
}
}
};
}
pub(super) use check;
pub(super) use small_buffers;
}