#[cfg(feature = "encoding")]
use encoding_rs::Encoding;
use std::io;
use std::ops::Range;
use crate::encoding::Decoder;
use crate::errors::{Error, IllFormedError, SyntaxError};
use crate::events::{BytesRef, Event};
use crate::parser::{DtdParser, ElementParser, Parser, PiParser};
use crate::reader::state::ReaderState;
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
#[cfg_attr(feature = "serde-types", derive(serde::Deserialize, serde::Serialize))]
#[non_exhaustive]
pub struct Config {
pub allow_dangling_amp: bool,
pub allow_unmatched_ends: bool,
pub check_comments: bool,
pub check_end_names: bool,
pub expand_empty_elements: bool,
pub trim_markup_names_in_closing_tags: bool,
pub trim_text_start: bool,
pub trim_text_end: bool,
}
impl Config {
#[inline]
pub fn trim_text(&mut self, trim: bool) {
self.trim_text_start = trim;
self.trim_text_end = trim;
}
#[inline]
pub fn enable_all_checks(&mut self, enable: bool) {
self.check_comments = enable;
self.check_end_names = enable;
}
}
impl Default for Config {
fn default() -> Self {
Self {
allow_dangling_amp: false,
allow_unmatched_ends: false,
check_comments: false,
check_end_names: true,
expand_empty_elements: false,
trim_markup_names_in_closing_tags: true,
trim_text_start: false,
trim_text_end: false,
}
}
}
macro_rules! read_event_impl {
(
$self:ident, $buf:ident,
$reader:expr,
$read_until_close:ident
$(, $await:ident)?
) => {{
let event = loop {
break match $self.state.state {
ParseState::Init => { #[cfg(feature = "encoding")]
if let Some(encoding) = $reader.detect_encoding() $(.$await)? ? {
if $self.state.encoding.can_be_refined() {
$self.state.encoding = crate::reader::EncodingRef::BomDetected(encoding);
}
}
#[cfg(not(feature = "encoding"))]
$reader.remove_utf8_bom() $(.$await)? ?;
$self.state.state = ParseState::InsideText;
continue;
},
ParseState::InsideRef => { let start = $self.state.offset;
match $reader.read_ref($buf, &mut $self.state.offset) $(.$await)? {
ReadRefResult::Ref(bytes) => {
$self.state.state = ParseState::InsideText;
Ok(Event::GeneralRef(BytesRef::wrap(&bytes[1..bytes.len() - 1], $self.decoder())))
}
ReadRefResult::UpToEof(bytes) if $self.state.config.allow_dangling_amp => {
$self.state.state = ParseState::Done;
Ok(Event::Text($self.state.emit_text(bytes)))
}
ReadRefResult::UpToEof(_) => {
$self.state.state = ParseState::Done;
$self.state.last_error_offset = start;
Err(Error::IllFormed(IllFormedError::UnclosedReference))
}
ReadRefResult::UpToRef(bytes) if $self.state.config.allow_dangling_amp => {
Ok(Event::Text($self.state.emit_text(bytes)))
}
ReadRefResult::UpToRef(_) => {
$self.state.last_error_offset = start;
Err(Error::IllFormed(IllFormedError::UnclosedReference))
}
ReadRefResult::UpToMarkup(bytes) if $self.state.config.allow_dangling_amp => {
$self.state.state = ParseState::InsideMarkup;
Ok(Event::Text($self.state.emit_text(bytes)))
}
ReadRefResult::UpToMarkup(_) => {
$self.state.state = ParseState::InsideMarkup;
$self.state.last_error_offset = start;
Err(Error::IllFormed(IllFormedError::UnclosedReference))
}
ReadRefResult::Err(e) => Err(Error::Io(e.into())),
}
}
ParseState::InsideText => { if $self.state.config.trim_text_start {
$reader.skip_whitespace(&mut $self.state.offset) $(.$await)? ?;
}
match $reader.read_text($buf, &mut $self.state.offset) $(.$await)? {
ReadTextResult::Markup(buf) => {
$self.state.state = ParseState::InsideMarkup;
$buf = buf;
continue;
}
ReadTextResult::Ref(buf) => {
$self.state.state = ParseState::InsideRef;
$buf = buf;
continue;
}
ReadTextResult::UpToMarkup(bytes) => {
$self.state.state = ParseState::InsideMarkup;
Ok(Event::Text($self.state.emit_text(bytes)))
}
ReadTextResult::UpToRef(bytes) => {
$self.state.state = ParseState::InsideRef;
Ok(Event::Text($self.state.emit_text(bytes)))
}
ReadTextResult::UpToEof(bytes) => {
$self.state.state = ParseState::Done;
let event = $self.state.emit_text(bytes);
if event.is_empty() {
Ok(Event::Eof)
} else {
Ok(Event::Text(event))
}
}
ReadTextResult::Err(e) => Err(Error::Io(e.into())),
}
},
ParseState::InsideMarkup => $self.$read_until_close($buf) $(.$await)?,
ParseState::InsideEmpty => Ok(Event::End($self.state.close_expanded_empty())),
ParseState::Done => Ok(Event::Eof),
};
};
match event {
Err(Error::IllFormed(_)) => {}
Err(_) | Ok(Event::Eof) => $self.state.state = ParseState::Done,
_ => {}
}
event
}};
}
macro_rules! read_until_close {
(
$self:ident, $buf:ident,
$reader:expr
$(, $await:ident)?
) => {{
$self.state.state = ParseState::InsideText;
let start = $self.state.offset;
match $reader.peek_one() $(.$await)? {
Ok(Some(b'!')) => match $reader
.read_bang_element($buf, &mut $self.state.offset)
$(.$await)?
{
Ok((bang_type, bytes)) => $self.state.emit_bang(bang_type, bytes),
Err(e) => {
$self.state.last_error_offset = start;
Err(e)
}
},
Ok(Some(b'/')) => match $reader
.read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
$(.$await)?
{
Ok(bytes) => $self.state.emit_end(bytes),
Err(e) => {
$self.state.last_error_offset = start;
Err(e)
}
},
Ok(Some(b'?')) => match $reader
.read_with(PiParser(false), $buf, &mut $self.state.offset)
$(.$await)?
{
Ok(bytes) => $self.state.emit_question_mark(bytes),
Err(e) => {
$self.state.last_error_offset = start;
Err(e)
}
},
Ok(Some(_)) => match $reader
.read_with(ElementParser::Outside, $buf, &mut $self.state.offset)
$(.$await)?
{
Ok(bytes) => Ok($self.state.emit_start(bytes)),
Err(e) => {
$self.state.last_error_offset = start;
Err(e)
}
},
Ok(None) => {
$self.state.last_error_offset = start;
Err(Error::Syntax(SyntaxError::UnclosedTag))
}
Err(e) => Err(Error::Io(e.into())),
}
}};
}
macro_rules! read_to_end {
(
// $self: &mut Reader
$self:expr, $end:expr, $buf:expr,
$read_event:ident,
// Code block that performs clearing of internal buffer after read of each event
$clear:block
$(, $await:ident)?
) => {{
let config = $self.config_mut();
let trim = config.trim_text_start;
config.trim_text_start = false;
let start = $self.buffer_position();
let mut depth = 0;
loop {
$clear
let end = $self.buffer_position();
match $self.$read_event($buf) $(.$await)? {
Err(e) => {
$self.config_mut().trim_text_start = trim;
return Err(e);
}
Ok(Event::Start(e)) if e.name() == $end => depth += 1,
Ok(Event::End(e)) if e.name() == $end => {
if depth == 0 {
$self.config_mut().trim_text_start = trim;
break start..end;
}
depth -= 1;
}
Ok(Event::Eof) => {
$self.config_mut().trim_text_start = trim;
return Err(Error::missed_end($end, $self.decoder()));
}
_ => (),
}
}
}};
}
#[cfg(feature = "async-tokio")]
mod async_tokio;
mod buffered_reader;
mod ns_reader;
mod slice_reader;
mod state;
pub use ns_reader::NsReader;
pub type Span = Range<u64>;
#[derive(Clone, Debug)]
enum ParseState {
Init,
InsideRef,
InsideMarkup,
InsideText,
InsideEmpty,
Done,
}
#[cfg(feature = "encoding")]
#[derive(Clone, Copy, Debug)]
enum EncodingRef {
Implicit(&'static Encoding),
Explicit(&'static Encoding),
BomDetected(&'static Encoding),
XmlDetected(&'static Encoding),
}
#[cfg(feature = "encoding")]
impl EncodingRef {
#[inline]
const fn encoding(&self) -> &'static Encoding {
match self {
Self::Implicit(e) => e,
Self::Explicit(e) => e,
Self::BomDetected(e) => e,
Self::XmlDetected(e) => e,
}
}
#[inline]
const fn can_be_refined(&self) -> bool {
match self {
Self::Implicit(_) | Self::BomDetected(_) => true,
Self::Explicit(_) | Self::XmlDetected(_) => false,
}
}
}
#[derive(Debug)]
#[must_use = "streams do nothing unless read or polled"]
pub struct BinaryStream<'r, R> {
inner: &'r mut R,
offset: &'r mut u64,
}
impl<'r, R> BinaryStream<'r, R> {
#[inline]
pub const fn offset(&self) -> u64 {
*self.offset
}
#[inline]
pub const fn get_ref(&self) -> &R {
self.inner
}
#[inline]
pub fn get_mut(&mut self) -> &mut R {
self.inner
}
}
impl<'r, R> io::Read for BinaryStream<'r, R>
where
R: io::Read,
{
#[inline]
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let amt = self.inner.read(buf)?;
*self.offset += amt as u64;
Ok(amt)
}
}
impl<'r, R> io::BufRead for BinaryStream<'r, R>
where
R: io::BufRead,
{
#[inline]
fn fill_buf(&mut self) -> io::Result<&[u8]> {
self.inner.fill_buf()
}
#[inline]
fn consume(&mut self, amt: usize) {
self.inner.consume(amt);
*self.offset += amt as u64;
}
}
#[derive(Debug, Clone)]
pub struct Reader<R> {
reader: R,
state: ReaderState,
}
impl<R> Reader<R> {
pub fn from_reader(reader: R) -> Self {
Self {
reader,
state: ReaderState::default(),
}
}
pub const fn config(&self) -> &Config {
&self.state.config
}
pub fn config_mut(&mut self) -> &mut Config {
&mut self.state.config
}
}
impl<R> Reader<R> {
pub fn into_inner(self) -> R {
self.reader
}
pub const fn get_ref(&self) -> &R {
&self.reader
}
pub fn get_mut(&mut self) -> &mut R {
&mut self.reader
}
pub const fn buffer_position(&self) -> u64 {
self.state.offset
}
pub const fn error_position(&self) -> u64 {
self.state.last_error_offset
}
#[inline]
pub const fn decoder(&self) -> Decoder {
self.state.decoder()
}
#[inline]
pub fn stream(&mut self) -> BinaryStream<'_, R> {
BinaryStream {
inner: &mut self.reader,
offset: &mut self.state.offset,
}
}
}
impl<R> Reader<R> {
fn read_event_impl<'i, B>(&mut self, mut buf: B) -> Result<Event<'i>, Error>
where
R: XmlSource<'i, B>,
{
read_event_impl!(self, buf, self.reader, read_until_close)
}
fn read_until_close<'i, B>(&mut self, buf: B) -> Result<Event<'i>, Error>
where
R: XmlSource<'i, B>,
{
read_until_close!(self, buf, self.reader)
}
}
#[derive(Debug)]
enum ReadTextResult<'r, B> {
Markup(B),
Ref(B),
UpToMarkup(&'r [u8]),
UpToRef(&'r [u8]),
UpToEof(&'r [u8]),
Err(io::Error),
}
#[derive(Debug)]
enum ReadRefResult<'r> {
Ref(&'r [u8]),
UpToEof(&'r [u8]),
UpToRef(&'r [u8]),
UpToMarkup(&'r [u8]),
Err(io::Error),
}
trait XmlSource<'r, B> {
#[cfg(not(feature = "encoding"))]
fn remove_utf8_bom(&mut self) -> io::Result<()>;
#[cfg(feature = "encoding")]
fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>>;
fn read_text(&mut self, buf: B, position: &mut u64) -> ReadTextResult<'r, B>;
fn read_ref(&mut self, buf: B, position: &mut u64) -> ReadRefResult<'r>;
fn read_with<P>(&mut self, parser: P, buf: B, position: &mut u64) -> Result<&'r [u8], Error>
where
P: Parser;
fn read_bang_element(
&mut self,
buf: B,
position: &mut u64,
) -> Result<(BangType, &'r [u8]), Error>;
fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()>;
fn peek_one(&mut self) -> io::Result<Option<u8>>;
}
#[derive(Debug, PartialEq)]
enum BangType {
CData,
Comment,
DocType(DtdParser),
}
impl BangType {
#[inline(always)]
const fn new(byte: Option<u8>) -> Result<Self, SyntaxError> {
Ok(match byte {
Some(b'[') => Self::CData,
Some(b'-') => Self::Comment,
Some(b'D') | Some(b'd') => Self::DocType(DtdParser::BeforeInternalSubset(0)),
_ => return Err(SyntaxError::InvalidBangMarkup),
})
}
#[inline(always)]
fn feed<'b>(&mut self, buf: &[u8], chunk: &'b [u8]) -> Option<usize> {
match self {
Self::Comment => {
for i in memchr::memchr_iter(b'>', chunk) {
if buf.len() + i > 5 {
if chunk[..i].ends_with(b"--") {
return Some(i);
}
if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
return Some(i);
}
if i == 0 && buf.ends_with(b"--") {
return Some(i);
}
}
}
}
Self::CData => {
for i in memchr::memchr_iter(b'>', chunk) {
if chunk[..i].ends_with(b"]]") {
return Some(i);
}
if i == 1 && buf.ends_with(b"]") && chunk[0] == b']' {
return Some(i);
}
if i == 0 && buf.ends_with(b"]]") {
return Some(i);
}
}
}
Self::DocType(ref mut parser) => return parser.feed(buf, chunk),
}
None
}
#[inline]
const fn to_err(&self) -> SyntaxError {
match self {
Self::CData => SyntaxError::UnclosedCData,
Self::Comment => SyntaxError::UnclosedComment,
Self::DocType(_) => SyntaxError::UnclosedDoctype,
}
}
}
#[cfg(test)]
mod test {
macro_rules! check {
(
#[$test:meta]
$read_event:ident,
$read_until_close:ident,
// constructor of the XML source on which internal functions will be called
$source:path,
$skip:literal,
// constructor of the buffer to which read data will stored
$buf:expr
$(, $async:ident, $await:ident)?
) => {
mod read_bang_element {
use super::*;
use crate::errors::{Error, SyntaxError};
use crate::reader::{BangType, DtdParser};
use crate::utils::Bytes;
mod cdata {
use super::*;
use pretty_assertions::assert_eq;
#[$test]
#[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"]
$($async)? fn not_properly_start() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<![]]>other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 1);
}
#[$test]
$($async)? fn not_closed() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<![CDATA[other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 22);
}
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<![CDATA[]]>other content"[$skip..];
let (ty, bytes) = $source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
(ty, Bytes(bytes)),
(BangType::CData, Bytes(b"<![CDATA[]]>"))
);
assert_eq!(position, 12);
}
#[$test]
$($async)? fn with_content() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<![CDATA[cdata]] ]>content]]>other content]]>"[$skip..];
let (ty, bytes) = $source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
(ty, Bytes(bytes)),
(BangType::CData, Bytes(b"<![CDATA[cdata]] ]>content]]>"))
);
assert_eq!(position, 29);
}
}
mod comment {
use super::*;
use pretty_assertions::assert_eq;
#[$test]
#[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
$($async)? fn not_properly_start() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!- -->other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 1);
}
#[$test]
$($async)? fn not_properly_end() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!->other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 17);
}
#[$test]
$($async)? fn not_closed1() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!--other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 17);
}
#[$test]
$($async)? fn not_closed2() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!-->other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 18);
}
#[$test]
$($async)? fn not_closed3() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!--->other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 19);
}
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!---->other content"[$skip..];
let (ty, bytes) = $source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
(ty, Bytes(bytes)),
(BangType::Comment, Bytes(b"<!---->"))
);
assert_eq!(position, 7);
}
#[$test]
$($async)? fn with_content() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!--->comment<--->other content"[$skip..];
let (ty, bytes) = $source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
(ty, Bytes(bytes)),
(BangType::Comment, Bytes(b"<!--->comment<--->"))
);
assert_eq!(position, 18);
}
}
mod doctype {
use super::*;
mod uppercase {
use super::*;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn not_properly_start() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!D other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 17);
}
#[$test]
$($async)? fn without_space() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!DOCTYPEother content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 22);
}
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!DOCTYPE>other content"[$skip..];
let (ty, bytes) = $source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
(ty, Bytes(bytes)),
(BangType::DocType(DtdParser::Finished), Bytes(b"<!DOCTYPE>"))
);
assert_eq!(position, 10);
}
#[$test]
$($async)? fn not_closed() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!DOCTYPE other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 23);
}
}
mod lowercase {
use super::*;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn not_properly_start() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!d other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 17);
}
#[$test]
$($async)? fn without_space() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!doctypeother content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 22);
}
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!doctype>other content"[$skip..];
let (ty, bytes) = $source(&mut input)
.read_bang_element(buf, &mut position)
$(.$await)?
.unwrap();
assert_eq!(
(ty, Bytes(bytes)),
(BangType::DocType(DtdParser::Finished), Bytes(b"<!doctype>"))
);
assert_eq!(position, 10);
}
#[$test]
$($async)? fn not_closed() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<!doctype other content"[$skip..];
match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedDoctype),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 23);
}
}
}
}
mod read_text {
use super::*;
use crate::reader::ReadTextResult;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 1;
let mut input = b"".as_ref();
match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"")),
x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
}
assert_eq!(position, 1);
}
#[$test]
$($async)? fn markup() {
let buf = $buf;
let mut position = 1;
let mut input = b"<".as_ref();
match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
ReadTextResult::Markup(b) => assert_eq!(b, $buf),
x => panic!("Expected `Markup(_)`, but got `{:?}`", x),
}
assert_eq!(position, 1);
}
#[$test]
$($async)? fn ref_() {
let buf = $buf;
let mut position = 1;
let mut input = b"&".as_ref();
match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
ReadTextResult::Ref(b) => assert_eq!(b, $buf),
x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
}
assert_eq!(position, 1);
}
#[$test]
$($async)? fn up_to_markup() {
let buf = $buf;
let mut position = 1;
let mut input = b"a<".as_ref();
match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
ReadTextResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
}
assert_eq!(position, 2);
}
#[$test]
$($async)? fn up_to_ref() {
let buf = $buf;
let mut position = 1;
let mut input = b"a&".as_ref();
match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
ReadTextResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
}
assert_eq!(position, 2);
}
#[$test]
$($async)? fn up_to_eof() {
let buf = $buf;
let mut position = 1;
let mut input = b"a".as_ref();
match $source(&mut input).read_text(buf, &mut position) $(.$await)? {
ReadTextResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"a")),
x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
}
assert_eq!(position, 2);
}
}
mod read_ref {
use super::*;
use crate::reader::ReadRefResult;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn up_to_eof() {
let buf = $buf;
let mut position = 1;
let mut input = b"&".as_ref();
match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
ReadRefResult::UpToEof(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
x => panic!("Expected `UpToEof(_)`, but got `{:?}`", x),
}
assert_eq!(position, 2);
}
#[$test]
$($async)? fn up_to_ref() {
let buf = $buf;
let mut position = 1;
let mut input = b"&&".as_ref();
match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
ReadRefResult::UpToRef(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
x => panic!("Expected `UpToRef(_)`, but got `{:?}`", x),
}
assert_eq!(position, 2);
}
#[$test]
$($async)? fn up_to_markup() {
let buf = $buf;
let mut position = 1;
let mut input = b"&<".as_ref();
match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
ReadRefResult::UpToMarkup(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&")),
x => panic!("Expected `UpToMarkup(_)`, but got `{:?}`", x),
}
assert_eq!(position, 2);
}
#[$test]
$($async)? fn empty_ref() {
let buf = $buf;
let mut position = 1;
let mut input = b"&;".as_ref();
match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"&;")),
x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
}
assert_eq!(position, 3);
}
#[$test]
$($async)? fn normal() {
let buf = $buf;
let mut position = 1;
let mut input = b"<".as_ref();
match $source(&mut input).read_ref(buf, &mut position) $(.$await)? {
ReadRefResult::Ref(bytes) => assert_eq!(Bytes(bytes), Bytes(b"<")),
x => panic!("Expected `Ref(_)`, but got `{:?}`", x),
}
assert_eq!(position, 5);
}
}
mod read_element {
use super::*;
use crate::errors::{Error, SyntaxError};
use crate::parser::ElementParser;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<"[$skip..];
match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? {
Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag),
x => panic!(
"Expected `Err(Syntax(_))`, but got `{:?}`",
x
),
}
assert_eq!(position, 1);
}
mod open {
use super::*;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"<>")
);
assert_eq!(position, 2);
}
#[$test]
$($async)? fn normal() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<tag>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"<tag>")
);
assert_eq!(position, 5);
}
#[$test]
$($async)? fn empty_ns_empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<:>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"<:>")
);
assert_eq!(position, 3);
}
#[$test]
$($async)? fn empty_ns() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<:tag>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"<:tag>")
);
assert_eq!(position, 6);
}
#[$test]
$($async)? fn with_attributes() {
let buf = $buf;
let mut position = 0;
let mut input = &br#"<tag attr-1=">" attr2 = '>' 3attr>"#[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(br#"<tag attr-1=">" attr2 = '>' 3attr>"#)
);
assert_eq!(position, 39);
}
}
mod self_closed {
use super::*;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = &b"</>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"</>")
);
assert_eq!(position, 3);
}
#[$test]
$($async)? fn normal() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<tag/>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"<tag/>")
);
assert_eq!(position, 6);
}
#[$test]
$($async)? fn empty_ns_empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<:/>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"<:/>")
);
assert_eq!(position, 4);
}
#[$test]
$($async)? fn empty_ns() {
let buf = $buf;
let mut position = 0;
let mut input = &b"<:tag/>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"<:tag/>")
);
assert_eq!(position, 7);
}
#[$test]
$($async)? fn with_attributes() {
let buf = $buf;
let mut position = 0;
let mut input = &br#"<tag attr-1="/>" attr2 = '/>' 3attr/>"#[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(br#"<tag attr-1="/>" attr2 = '/>' 3attr/>"#)
);
assert_eq!(position, 42);
}
}
mod close {
use super::*;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = &b"</ >"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"</ >")
);
assert_eq!(position, 4);
}
#[$test]
$($async)? fn normal() {
let buf = $buf;
let mut position = 0;
let mut input = &b"</tag>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"</tag>")
);
assert_eq!(position, 6);
}
#[$test]
$($async)? fn empty_ns_empty_tag() {
let buf = $buf;
let mut position = 0;
let mut input = &b"</:>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"</:>")
);
assert_eq!(position, 4);
}
#[$test]
$($async)? fn empty_ns() {
let buf = $buf;
let mut position = 0;
let mut input = &b"</:tag>"[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(b"</:tag>")
);
assert_eq!(position, 7);
}
#[$test]
$($async)? fn with_attributes() {
let buf = $buf;
let mut position = 0;
let mut input = &br#"</tag attr-1=">" attr2 = '>' 3attr>"#[$skip..];
assert_eq!(
Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()),
Bytes(br#"</tag attr-1=">" attr2 = '>' 3attr>"#)
);
assert_eq!(position, 40);
}
}
}
mod $read_event {
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesPI, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;
#[$test]
$($async)? fn bom_from_reader() {
let mut reader = Reader::from_reader("\u{feff}\u{feff}".as_bytes());
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Text(BytesText::from_escaped("\u{feff}"))
);
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn bom_from_str() {
let mut reader = Reader::from_str("\u{feff}\u{feff}");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Text(BytesText::from_escaped("\u{feff}"))
);
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Eof
);
}
#[$test]
$($async)? fn declaration() {
let mut reader = Reader::from_str("<?xml ?>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3)))
);
}
#[$test]
$($async)? fn doctype() {
let mut reader = Reader::from_str("<!DOCTYPE x>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::DocType(BytesText::from_escaped("x"))
);
}
#[$test]
$($async)? fn processing_instruction() {
let mut reader = Reader::from_str("<?xml-stylesheet '? >\" ?>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::PI(BytesPI::new("xml-stylesheet '? >\" "))
);
}
#[$test]
$($async)? fn start_and_end() {
let mut reader = Reader::from_str("<tag></tag>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Start(BytesStart::new("tag"))
);
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::End(BytesEnd::new("tag"))
);
}
#[$test]
$($async)? fn empty() {
let mut reader = Reader::from_str("<tag/>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Empty(BytesStart::new("tag"))
);
}
#[$test]
$($async)? fn text() {
let mut reader = Reader::from_str("text");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Text(BytesText::from_escaped("text"))
);
}
#[$test]
$($async)? fn cdata() {
let mut reader = Reader::from_str("<![CDATA[]]>");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::CData(BytesCData::new(""))
);
}
#[$test]
$($async)? fn comment() {
let mut reader = Reader::from_str("<!---->");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Comment(BytesText::from_escaped(""))
);
}
#[$test]
$($async)? fn eof() {
let mut reader = Reader::from_str("");
assert_eq!(
reader.$read_event($buf) $(.$await)? .unwrap(),
Event::Eof
);
}
}
};
}
pub(super) use check;
}