use std::borrow::Cow;
#[cfg(feature = "encoding")]
use crate::reader::EncodingRef;
#[cfg(feature = "encoding")]
use encoding_rs::{Encoding, UTF_8};
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::name::QName;
use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, Span, XmlSource};
use memchr;
impl<'a> Reader<&'a [u8]> {
#[allow(clippy::should_implement_trait)]
pub fn from_str(s: &'a str) -> Self {
#[cfg(feature = "encoding")]
{
let mut reader = Self::from_reader(s.as_bytes());
reader.state.encoding = EncodingRef::Explicit(UTF_8);
reader
}
#[cfg(not(feature = "encoding"))]
Self::from_reader(s.as_bytes())
}
#[inline]
pub fn read_event(&mut self) -> Result<Event<'a>> {
self.read_event_impl(())
}
pub fn read_to_end(&mut self, end: QName) -> Result<Span> {
Ok(read_to_end!(self, end, (), read_event_impl, {}))
}
pub fn read_text(&mut self, end: QName) -> Result<Cow<'a, str>> {
let buffer = self.reader;
let span = self.read_to_end(end)?;
self.decoder().decode(&buffer[0..span.len()])
}
}
impl<'a> XmlSource<'a, ()> for &'a [u8] {
#[cfg(not(feature = "encoding"))]
fn remove_utf8_bom(&mut self) -> Result<()> {
if self.starts_with(crate::encoding::UTF8_BOM) {
*self = &self[crate::encoding::UTF8_BOM.len()..];
}
Ok(())
}
#[cfg(feature = "encoding")]
fn detect_encoding(&mut self) -> Result<Option<&'static Encoding>> {
if let Some((enc, bom_len)) = crate::encoding::detect_encoding(self) {
*self = &self[bom_len..];
return Ok(Some(enc));
}
Ok(None)
}
fn read_bytes_until(
&mut self,
byte: u8,
_buf: (),
position: &mut usize,
) -> Result<Option<&'a [u8]>> {
debug_assert!(byte.is_ascii());
if self.is_empty() {
return Ok(None);
}
Ok(Some(if let Some(i) = memchr::memchr(byte, self) {
*position += i + 1;
let bytes = &self[..i];
*self = &self[i + 1..];
bytes
} else {
*position += self.len();
let bytes = &self[..];
*self = &[];
bytes
}))
}
fn read_bang_element(
&mut self,
_buf: (),
position: &mut usize,
) -> Result<Option<(BangType, &'a [u8])>> {
debug_assert_eq!(self[0], b'!');
let bang_type = BangType::new(self[1..].first().copied())?;
if let Some((bytes, i)) = bang_type.parse(&[], self) {
*position += i;
*self = &self[i..];
return Ok(Some((bang_type, bytes)));
}
Err(bang_type.to_err())
}
fn read_element(&mut self, _buf: (), position: &mut usize) -> Result<Option<&'a [u8]>> {
if self.is_empty() {
return Ok(None);
}
let mut state = ReadElementState::Elem;
if let Some((bytes, i)) = state.change(self) {
*position += i;
*self = &self[i..];
return Ok(Some(bytes));
}
Err(Error::UnexpectedEof("Element".to_string()))
}
fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> {
let whitespaces = self
.iter()
.position(|b| !is_whitespace(*b))
.unwrap_or(self.len());
*position += whitespaces;
*self = &self[whitespaces..];
Ok(())
}
fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result<bool> {
debug_assert!(byte.is_ascii());
if self.first() == Some(&byte) {
*self = &self[1..];
*position += 1;
Ok(true)
} else {
Ok(false)
}
}
fn peek_one(&mut self) -> Result<Option<u8>> {
Ok(self.first().copied())
}
}
#[cfg(test)]
mod test {
use crate::reader::test::check;
use crate::reader::XmlSource;
fn identity<T>(input: T) -> T {
input
}
check!(
#[test]
read_event_impl,
read_until_close,
identity,
()
);
#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::UTF_8;
use pretty_assertions::assert_eq;
#[test]
fn str_always_has_utf8() {
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");
assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event().unwrap();
assert_eq!(reader.decoder().encoding(), UTF_8);
assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}
}