pub mod attributes;
#[cfg(feature = "encoding")]
use encoding_rs::Encoding;
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
use std::str::from_utf8;
use crate::encoding::Decoder;
use crate::errors::{Error, Result};
use crate::escape::{escape, partial_escape, unescape_with};
use crate::name::{LocalName, QName};
use crate::reader::is_whitespace;
use crate::utils::write_cow_string;
#[cfg(feature = "serialize")]
use crate::utils::CowRef;
use attributes::{Attribute, Attributes};
use std::mem::replace;
#[derive(Clone, Eq, PartialEq)]
pub struct BytesStart<'a> {
pub(crate) buf: Cow<'a, [u8]>,
pub(crate) name_len: usize,
}
impl<'a> BytesStart<'a> {
#[inline]
pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self {
BytesStart {
buf: Cow::Borrowed(content),
name_len,
}
}
#[inline]
pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
let buf = str_cow_to_bytes(name);
BytesStart {
name_len: buf.len(),
buf,
}
}
#[inline]
pub fn from_content<C: Into<Cow<'a, str>>>(content: C, name_len: usize) -> Self {
BytesStart {
buf: str_cow_to_bytes(content),
name_len,
}
}
pub fn into_owned(self) -> BytesStart<'static> {
BytesStart {
buf: Cow::Owned(self.buf.into_owned()),
name_len: self.name_len,
}
}
pub fn to_owned(&self) -> BytesStart<'static> {
BytesStart {
buf: Cow::Owned(self.buf.clone().into_owned()),
name_len: self.name_len,
}
}
pub fn borrow(&self) -> BytesStart {
BytesStart {
buf: Cow::Borrowed(&self.buf),
name_len: self.name_len,
}
}
pub fn to_end(&self) -> BytesEnd {
BytesEnd::wrap(self.name().into_inner().into())
}
#[inline]
pub fn name(&self) -> QName {
QName(&self.buf[..self.name_len])
}
#[inline]
pub fn local_name(&self) -> LocalName {
self.name().into()
}
pub fn set_name(&mut self, name: &[u8]) -> &mut BytesStart<'a> {
let bytes = self.buf.to_mut();
bytes.splice(..self.name_len, name.iter().cloned());
self.name_len = name.len();
self
}
#[cfg(feature = "serialize")]
pub(crate) fn raw_name<'e>(&'e self) -> CowRef<'a, 'e, [u8]> {
match self.buf {
Cow::Borrowed(b) => CowRef::Input(&b[..self.name_len]),
Cow::Owned(ref o) => CowRef::Slice(&o[..self.name_len]),
}
}
}
impl<'a> BytesStart<'a> {
pub fn with_attributes<'b, I>(mut self, attributes: I) -> Self
where
I: IntoIterator,
I::Item: Into<Attribute<'b>>,
{
self.extend_attributes(attributes);
self
}
pub fn extend_attributes<'b, I>(&mut self, attributes: I) -> &mut BytesStart<'a>
where
I: IntoIterator,
I::Item: Into<Attribute<'b>>,
{
for attr in attributes {
self.push_attribute(attr);
}
self
}
pub fn push_attribute<'b, A>(&mut self, attr: A)
where
A: Into<Attribute<'b>>,
{
let a = attr.into();
let bytes = self.buf.to_mut();
bytes.push(b' ');
bytes.extend_from_slice(a.key.as_ref());
bytes.extend_from_slice(b"=\"");
bytes.extend_from_slice(a.value.as_ref());
bytes.push(b'"');
}
pub fn clear_attributes(&mut self) -> &mut BytesStart<'a> {
self.buf.to_mut().truncate(self.name_len);
self
}
pub fn attributes(&self) -> Attributes {
Attributes::wrap(&self.buf, self.name_len, false)
}
pub fn html_attributes(&self) -> Attributes {
Attributes::wrap(&self.buf, self.name_len, true)
}
#[inline]
pub fn attributes_raw(&self) -> &[u8] {
&self.buf[self.name_len..]
}
pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
&'a self,
attr_name: N,
) -> Result<Option<Attribute<'a>>> {
for a in self.attributes().with_checks(false) {
let a = a?;
if a.key.as_ref() == attr_name.as_ref() {
return Ok(Some(a));
}
}
Ok(None)
}
}
impl<'a> Debug for BytesStart<'a> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "BytesStart {{ buf: ")?;
write_cow_string(f, &self.buf)?;
write!(f, ", name_len: {} }}", self.name_len)
}
}
impl<'a> Deref for BytesStart<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&self.buf
}
}
#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesStart<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
let s = <&str>::arbitrary(u)?;
if s.is_empty() || !s.chars().all(char::is_alphanumeric) {
return Err(arbitrary::Error::IncorrectFormat);
}
let mut result = Self::new(s);
result.extend_attributes(Vec::<(&str, &str)>::arbitrary(u)?.into_iter());
Ok(result)
}
fn size_hint(depth: usize) -> (usize, Option<usize>) {
return <&str as arbitrary::Arbitrary>::size_hint(depth);
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct BytesDecl<'a> {
content: BytesStart<'a>,
}
impl<'a> BytesDecl<'a> {
pub fn new(
version: &str,
encoding: Option<&str>,
standalone: Option<&str>,
) -> BytesDecl<'static> {
let encoding_attr_len = if let Some(xs) = encoding {
12 + xs.len()
} else {
0
};
let standalone_attr_len = if let Some(xs) = standalone {
14 + xs.len()
} else {
0
};
let mut buf = String::with_capacity(14 + encoding_attr_len + standalone_attr_len);
buf.push_str("xml version=\"");
buf.push_str(version);
if let Some(encoding_val) = encoding {
buf.push_str("\" encoding=\"");
buf.push_str(encoding_val);
}
if let Some(standalone_val) = standalone {
buf.push_str("\" standalone=\"");
buf.push_str(standalone_val);
}
buf.push('"');
BytesDecl {
content: BytesStart::from_content(buf, 3),
}
}
pub fn from_start(start: BytesStart<'a>) -> Self {
Self { content: start }
}
pub fn version(&self) -> Result<Cow<[u8]>> {
match self.content.attributes().with_checks(false).next() {
Some(Ok(a)) if a.key.as_ref() == b"version" => Ok(a.value),
Some(Ok(a)) => {
let found = from_utf8(a.key.as_ref())?.to_string();
Err(Error::XmlDeclWithoutVersion(Some(found)))
}
Some(Err(e)) => Err(e.into()),
None => Err(Error::XmlDeclWithoutVersion(None)),
}
}
pub fn encoding(&self) -> Option<Result<Cow<[u8]>>> {
self.content
.try_get_attribute("encoding")
.map(|a| a.map(|a| a.value))
.transpose()
}
pub fn standalone(&self) -> Option<Result<Cow<[u8]>>> {
self.content
.try_get_attribute("standalone")
.map(|a| a.map(|a| a.value))
.transpose()
}
#[cfg(feature = "encoding")]
pub fn encoder(&self) -> Option<&'static Encoding> {
self.encoding()
.and_then(|e| e.ok())
.and_then(|e| Encoding::for_label(&e))
}
pub fn into_owned(self) -> BytesDecl<'static> {
BytesDecl {
content: self.content.into_owned(),
}
}
#[inline]
pub fn borrow(&self) -> BytesDecl {
BytesDecl {
content: self.content.borrow(),
}
}
}
impl<'a> Deref for BytesDecl<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&self.content
}
}
#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesDecl<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self::new(
<&str>::arbitrary(u)?,
Option::<&str>::arbitrary(u)?,
Option::<&str>::arbitrary(u)?,
))
}
fn size_hint(depth: usize) -> (usize, Option<usize>) {
return <&str as arbitrary::Arbitrary>::size_hint(depth);
}
}
#[derive(Clone, Eq, PartialEq)]
pub struct BytesEnd<'a> {
name: Cow<'a, [u8]>,
}
impl<'a> BytesEnd<'a> {
#[inline]
pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self {
BytesEnd { name }
}
#[inline]
pub fn new<C: Into<Cow<'a, str>>>(name: C) -> Self {
Self::wrap(str_cow_to_bytes(name))
}
pub fn into_owned(self) -> BytesEnd<'static> {
BytesEnd {
name: Cow::Owned(self.name.into_owned()),
}
}
#[inline]
pub fn borrow(&self) -> BytesEnd {
BytesEnd {
name: Cow::Borrowed(&self.name),
}
}
#[inline]
pub fn name(&self) -> QName {
QName(&self.name)
}
#[inline]
pub fn local_name(&self) -> LocalName {
self.name().into()
}
}
impl<'a> Debug for BytesEnd<'a> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "BytesEnd {{ name: ")?;
write_cow_string(f, &self.name)?;
write!(f, " }}")
}
}
impl<'a> Deref for BytesEnd<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&self.name
}
}
#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesEnd<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self::new(<&str>::arbitrary(u)?))
}
fn size_hint(depth: usize) -> (usize, Option<usize>) {
return <&str as arbitrary::Arbitrary>::size_hint(depth);
}
}
#[derive(Clone, Eq, PartialEq)]
pub struct BytesText<'a> {
content: Cow<'a, [u8]>,
decoder: Decoder,
}
impl<'a> BytesText<'a> {
#[inline]
pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
Self {
content: content.into(),
decoder,
}
}
#[inline]
pub fn from_escaped<C: Into<Cow<'a, str>>>(content: C) -> Self {
Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
}
#[inline]
pub fn new(content: &'a str) -> Self {
Self::from_escaped(escape(content))
}
#[inline]
pub fn into_owned(self) -> BytesText<'static> {
BytesText {
content: self.content.into_owned().into(),
decoder: self.decoder,
}
}
#[inline]
pub fn into_inner(self) -> Cow<'a, [u8]> {
self.content
}
#[inline]
pub fn borrow(&self) -> BytesText {
BytesText {
content: Cow::Borrowed(&self.content),
decoder: self.decoder,
}
}
pub fn unescape(&self) -> Result<Cow<'a, str>> {
self.unescape_with(|_| None)
}
pub fn unescape_with<'entity>(
&self,
resolve_entity: impl FnMut(&str) -> Option<&'entity str>,
) -> Result<Cow<'a, str>> {
let decoded = match &self.content {
Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
};
match unescape_with(&decoded, resolve_entity)? {
Cow::Borrowed(_) => Ok(decoded),
Cow::Owned(s) => Ok(s.into()),
}
}
pub fn inplace_trim_start(&mut self) -> bool {
self.content = trim_cow(
replace(&mut self.content, Cow::Borrowed(b"")),
trim_xml_start,
);
self.content.is_empty()
}
pub fn inplace_trim_end(&mut self) -> bool {
self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
self.content.is_empty()
}
}
impl<'a> Debug for BytesText<'a> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "BytesText {{ content: ")?;
write_cow_string(f, &self.content)?;
write!(f, " }}")
}
}
impl<'a> Deref for BytesText<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&self.content
}
}
#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesText<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
let s = <&str>::arbitrary(u)?;
if !s.chars().all(char::is_alphanumeric) {
return Err(arbitrary::Error::IncorrectFormat);
}
Ok(Self::new(s))
}
fn size_hint(depth: usize) -> (usize, Option<usize>) {
return <&str as arbitrary::Arbitrary>::size_hint(depth);
}
}
#[derive(Clone, Eq, PartialEq)]
pub struct BytesCData<'a> {
content: Cow<'a, [u8]>,
decoder: Decoder,
}
impl<'a> BytesCData<'a> {
#[inline]
pub(crate) fn wrap<C: Into<Cow<'a, [u8]>>>(content: C, decoder: Decoder) -> Self {
Self {
content: content.into(),
decoder,
}
}
#[inline]
pub fn new<C: Into<Cow<'a, str>>>(content: C) -> Self {
Self::wrap(str_cow_to_bytes(content), Decoder::utf8())
}
#[inline]
pub fn into_owned(self) -> BytesCData<'static> {
BytesCData {
content: self.content.into_owned().into(),
decoder: self.decoder,
}
}
#[inline]
pub fn into_inner(self) -> Cow<'a, [u8]> {
self.content
}
#[inline]
pub fn borrow(&self) -> BytesCData {
BytesCData {
content: Cow::Borrowed(&self.content),
decoder: self.decoder,
}
}
pub fn escape(self) -> Result<BytesText<'a>> {
let decoded = self.decode()?;
Ok(BytesText::wrap(
match escape(&decoded) {
Cow::Borrowed(_) => self.content,
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
},
Decoder::utf8(),
))
}
pub fn partial_escape(self) -> Result<BytesText<'a>> {
let decoded = self.decode()?;
Ok(BytesText::wrap(
match partial_escape(&decoded) {
Cow::Borrowed(_) => self.content,
Cow::Owned(escaped) => Cow::Owned(escaped.into_bytes()),
},
Decoder::utf8(),
))
}
pub(crate) fn decode(&self) -> Result<Cow<'a, str>> {
Ok(match &self.content {
Cow::Borrowed(bytes) => self.decoder.decode(bytes)?,
Cow::Owned(bytes) => self.decoder.decode(bytes)?.into_owned().into(),
})
}
}
impl<'a> Debug for BytesCData<'a> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "BytesCData {{ content: ")?;
write_cow_string(f, &self.content)?;
write!(f, " }}")
}
}
impl<'a> Deref for BytesCData<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
&self.content
}
}
#[cfg(feature = "arbitrary")]
impl<'a> arbitrary::Arbitrary<'a> for BytesCData<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
Ok(Self::new(<&str>::arbitrary(u)?))
}
fn size_hint(depth: usize) -> (usize, Option<usize>) {
return <&str as arbitrary::Arbitrary>::size_hint(depth);
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
pub enum Event<'a> {
Start(BytesStart<'a>),
End(BytesEnd<'a>),
Empty(BytesStart<'a>),
Text(BytesText<'a>),
CData(BytesCData<'a>),
Comment(BytesText<'a>),
Decl(BytesDecl<'a>),
PI(BytesText<'a>),
DocType(BytesText<'a>),
Eof,
}
impl<'a> Event<'a> {
pub fn into_owned(self) -> Event<'static> {
match self {
Event::Start(e) => Event::Start(e.into_owned()),
Event::End(e) => Event::End(e.into_owned()),
Event::Empty(e) => Event::Empty(e.into_owned()),
Event::Text(e) => Event::Text(e.into_owned()),
Event::Comment(e) => Event::Comment(e.into_owned()),
Event::CData(e) => Event::CData(e.into_owned()),
Event::Decl(e) => Event::Decl(e.into_owned()),
Event::PI(e) => Event::PI(e.into_owned()),
Event::DocType(e) => Event::DocType(e.into_owned()),
Event::Eof => Event::Eof,
}
}
#[inline]
pub fn borrow(&self) -> Event {
match self {
Event::Start(e) => Event::Start(e.borrow()),
Event::End(e) => Event::End(e.borrow()),
Event::Empty(e) => Event::Empty(e.borrow()),
Event::Text(e) => Event::Text(e.borrow()),
Event::Comment(e) => Event::Comment(e.borrow()),
Event::CData(e) => Event::CData(e.borrow()),
Event::Decl(e) => Event::Decl(e.borrow()),
Event::PI(e) => Event::PI(e.borrow()),
Event::DocType(e) => Event::DocType(e.borrow()),
Event::Eof => Event::Eof,
}
}
}
impl<'a> Deref for Event<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
match *self {
Event::Start(ref e) | Event::Empty(ref e) => e,
Event::End(ref e) => e,
Event::Text(ref e) => e,
Event::Decl(ref e) => e,
Event::PI(ref e) => e,
Event::CData(ref e) => e,
Event::Comment(ref e) => e,
Event::DocType(ref e) => e,
Event::Eof => &[],
}
}
}
impl<'a> AsRef<Event<'a>> for Event<'a> {
fn as_ref(&self) -> &Event<'a> {
self
}
}
#[inline]
fn str_cow_to_bytes<'a, C: Into<Cow<'a, str>>>(content: C) -> Cow<'a, [u8]> {
match content.into() {
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
}
}
const fn trim_xml_start(mut bytes: &[u8]) -> &[u8] {
while let [first, rest @ ..] = bytes {
if is_whitespace(*first) {
bytes = rest;
} else {
break;
}
}
bytes
}
const fn trim_xml_end(mut bytes: &[u8]) -> &[u8] {
while let [rest @ .., last] = bytes {
if is_whitespace(*last) {
bytes = rest;
} else {
break;
}
}
bytes
}
fn trim_cow<'a, F>(value: Cow<'a, [u8]>, trim: F) -> Cow<'a, [u8]>
where
F: FnOnce(&[u8]) -> &[u8],
{
match value {
Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
Cow::Owned(mut bytes) => {
let trimmed = trim(&bytes);
if trimmed.len() != bytes.len() {
bytes = trimmed.to_vec();
}
Cow::Owned(bytes)
}
}
}
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn bytestart_create() {
let b = BytesStart::new("test");
assert_eq!(b.len(), 4);
assert_eq!(b.name(), QName(b"test"));
}
#[test]
fn bytestart_set_name() {
let mut b = BytesStart::new("test");
assert_eq!(b.len(), 4);
assert_eq!(b.name(), QName(b"test"));
assert_eq!(b.attributes_raw(), b"");
b.push_attribute(("x", "a"));
assert_eq!(b.len(), 10);
assert_eq!(b.attributes_raw(), b" x=\"a\"");
b.set_name(b"g");
assert_eq!(b.len(), 7);
assert_eq!(b.name(), QName(b"g"));
}
#[test]
fn bytestart_clear_attributes() {
let mut b = BytesStart::new("test");
b.push_attribute(("x", "y\"z"));
b.push_attribute(("x", "y\"z"));
b.clear_attributes();
assert!(b.attributes().next().is_none());
assert_eq!(b.len(), 4);
assert_eq!(b.name(), QName(b"test"));
}
}