use std::borrow::Cow;
use std::io;
#[cfg(feature = "encoding")]
use crate::reader::EncodingRef;
#[cfg(feature = "encoding")]
use encoding_rs::{Encoding, UTF_8};
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::name::QName;
use crate::parser::Parser;
use crate::reader::{BangType, ReadRefResult, ReadTextResult, Reader, Span, XmlSource};
use crate::utils::is_whitespace;
impl<'a> Reader<&'a [u8]> {
#[allow(clippy::should_implement_trait)]
pub fn from_str(s: &'a str) -> Self {
#[cfg(feature = "encoding")]
{
let mut reader = Self::from_reader(s.as_bytes());
reader.state.encoding = EncodingRef::Explicit(UTF_8);
reader
}
#[cfg(not(feature = "encoding"))]
Self::from_reader(s.as_bytes())
}
#[inline]
pub fn read_event(&mut self) -> Result<Event<'a>> {
self.read_event_impl(())
}
pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
Ok(read_to_end!(self, end, (), read_event_impl, {}))
}
pub fn read_text(&mut self, end: QName) -> Result<Cow<'a, str>> {
let buffer = self.reader;
let span = self.read_to_end(end)?;
let len = span.end - span.start;
Ok(self.decoder().decode(&buffer[0..len as usize])?)
}
}
impl<'a> XmlSource<'a, ()> for &'a [u8] {
#[cfg(not(feature = "encoding"))]
#[inline]
fn remove_utf8_bom(&mut self) -> io::Result<()> {
if self.starts_with(crate::encoding::UTF8_BOM) {
*self = &self[crate::encoding::UTF8_BOM.len()..];
}
Ok(())
}
#[cfg(feature = "encoding")]
#[inline]
fn detect_encoding(&mut self) -> io::Result<Option<&'static Encoding>> {
if let Some((enc, bom_len)) = crate::encoding::detect_encoding(self) {
*self = &self[bom_len..];
return Ok(Some(enc));
}
Ok(None)
}
#[inline]
fn read_text(&mut self, _buf: (), position: &mut u64) -> ReadTextResult<'a, ()> {
match memchr::memchr2(b'<', b'&', self) {
Some(0) if self[0] == b'<' => ReadTextResult::Markup(()),
Some(0) => ReadTextResult::Ref(()),
Some(i) if self[i] == b'<' => {
let (bytes, rest) = self.split_at(i);
*self = rest;
*position += i as u64;
ReadTextResult::UpToMarkup(bytes)
}
Some(i) => {
let (bytes, rest) = self.split_at(i);
*self = rest;
*position += i as u64;
ReadTextResult::UpToRef(bytes)
}
None => {
let bytes = &self[..];
*self = &[];
*position += bytes.len() as u64;
ReadTextResult::UpToEof(bytes)
}
}
}
#[inline]
fn read_ref(&mut self, _buf: (), position: &mut u64) -> ReadRefResult<'a> {
debug_assert!(
self.starts_with(b"&"),
"`read_ref` must be called at `&`:\n{:?}",
crate::utils::Bytes(self)
);
match memchr::memchr3(b';', b'&', b'<', &self[1..]) {
Some(i) if self[i + 1] == b';' => {
let end = i + 2;
let (bytes, rest) = self.split_at(end);
*self = rest;
*position += end as u64;
ReadRefResult::Ref(bytes)
}
Some(i) => {
let is_amp = self[i + 1] == b'&';
let (bytes, rest) = self.split_at(i + 1);
*self = rest;
*position += i as u64 + 1;
if is_amp {
ReadRefResult::UpToRef(bytes)
} else {
ReadRefResult::UpToMarkup(bytes)
}
}
None => {
let bytes = &self[..];
*self = &[];
*position += bytes.len() as u64;
ReadRefResult::UpToEof(bytes)
}
}
}
#[inline]
fn read_with<P>(&mut self, mut parser: P, _buf: (), position: &mut u64) -> Result<&'a [u8]>
where
P: Parser,
{
if let Some(i) = parser.feed(self) {
let used = i + 1; *position += used as u64;
let (bytes, rest) = self.split_at(used);
*self = rest;
return Ok(bytes);
}
*position += self.len() as u64;
Err(Error::Syntax(parser.eof_error(self)))
}
#[inline]
fn read_bang_element(&mut self, _buf: (), position: &mut u64) -> Result<(BangType, &'a [u8])> {
debug_assert!(
self.starts_with(b"<!"),
"`read_bang_element` must be called at `<!`:\n{:?}",
crate::utils::Bytes(self)
);
let mut bang_type = BangType::new(self.get(2).copied())?;
if let Some(i) = bang_type.feed(&[], self) {
let consumed = i + 1; *position += consumed as u64;
let (bytes, rest) = self.split_at(consumed);
*self = rest;
return Ok((bang_type, bytes));
}
*position += self.len() as u64;
Err(Error::Syntax(bang_type.to_err()))
}
#[inline]
fn skip_whitespace(&mut self, position: &mut u64) -> io::Result<()> {
let whitespaces = self
.iter()
.position(|b| !is_whitespace(*b))
.unwrap_or(self.len());
*position += whitespaces as u64;
*self = &self[whitespaces..];
Ok(())
}
#[inline]
fn peek_one(&mut self) -> io::Result<Option<u8>> {
debug_assert!(
self.starts_with(b"<"),
"markup must start from '<':\n{:?}",
crate::utils::Bytes(self)
);
Ok(self.get(1).copied())
}
}
#[cfg(test)]
mod test {
use crate::reader::test::check;
use crate::reader::XmlSource;
fn identity<T>(input: T) -> T {
input
}
check!(
#[test]
read_event_impl,
read_until_close,
identity,
0,
()
);
}