use std::borrow::Cow;
use std::fmt::{self, Write};
use crate::util::make_owned;
use crate::{entities, is_blank, is_sgml_whitespace};
pub use Data::*;
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Data<'a> {
CData(Cow<'a, str>),
RcData(Cow<'a, str>),
}
impl<'a> Data<'a> {
pub fn verbatim(&self) -> bool {
match self {
Data::CData(_) => true,
Data::RcData(_) => false,
}
}
pub fn as_str(&self) -> &str {
match self {
Data::CData(s) => s,
Data::RcData(s) => s,
}
}
#[cfg_attr(feature = "deserialize", allow(unused))]
pub(crate) fn into_cow(self) -> Cow<'a, str> {
match self {
CData(s) => s,
RcData(s) => s,
}
}
pub fn is_blank(&self) -> bool {
is_blank(self.as_str())
}
pub fn trim(self) -> Data<'a> {
fn trim_cow(cow: Cow<str>) -> Cow<str> {
match cow {
Cow::Borrowed(s) => Cow::Borrowed(s.trim_matches(is_sgml_whitespace)),
Cow::Owned(s) => {
let trimmed = s.trim_matches(is_sgml_whitespace);
if trimmed.len() == s.len() {
Cow::Owned(s)
} else {
trimmed.to_owned().into()
}
}
}
}
match self {
Data::CData(s) => Data::CData(trim_cow(s)),
Data::RcData(s) => Data::RcData(trim_cow(s)),
}
}
pub fn into_owned(self) -> Data<'static> {
match self {
Data::CData(s) => Data::CData(make_owned(s)),
Data::RcData(s) => Data::RcData(make_owned(s)),
}
}
pub fn escape(&self) -> EscapeData {
EscapeData {
escape_ampersand: self.verbatim(),
chars: self.as_str().chars(),
escape_buffer: None,
}
}
pub fn expand_character_references(self) -> entities::Result<Self> {
match self {
CData(_) => Ok(self),
RcData(s) => {
let expanded = entities::expand_character_references(&s)?;
if expanded == *s {
Ok(CData(s))
} else {
Ok(CData(expanded.into_owned().into()))
}
}
}
}
pub fn expand_entities<F, T>(self, f: F) -> entities::Result<Self>
where
F: FnMut(&str) -> Option<T>,
T: AsRef<str>,
{
match self {
CData(_) => Ok(self),
RcData(s) => {
let expanded = entities::expand_entities(&s, f)?;
if expanded == *s {
Ok(CData(s))
} else {
Ok(CData(expanded.into_owned().into()))
}
}
}
}
}
impl Default for Data<'_> {
fn default() -> Self {
Data::CData(Cow::Borrowed(""))
}
}
#[derive(Clone, Debug)]
pub struct EscapeData<'a> {
escape_ampersand: bool,
chars: std::str::Chars<'a>,
escape_buffer: Option<std::slice::Iter<'static, u8>>,
}
impl<'a> Iterator for EscapeData<'a> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if let Some(buffer) = &mut self.escape_buffer {
match buffer.next() {
Some(c) => return Some(*c as char),
None => self.escape_buffer = None,
}
}
match self.chars.next() {
Some('<') => {
self.escape_buffer = Some(b"#60;".iter());
Some('&')
}
Some('>') => {
self.escape_buffer = Some(b"#62;".iter());
Some('&')
}
Some('&') if self.escape_ampersand => {
self.escape_buffer = Some(b"#38;".iter());
Some('&')
}
x => x,
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let (min, max) = self.chars.size_hint();
let escape_len = self
.escape_buffer
.as_ref()
.map(|buf| buf.len())
.unwrap_or(0);
(
min + escape_len,
max
.and_then(|n| n.checked_mul(5))
.and_then(|n| n.checked_add(escape_len)),
)
}
}
impl<'a> fmt::Display for EscapeData<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.clone().try_for_each(|c| f.write_char(c))
}
}
#[cfg(test)]
mod tests {
use super::*;
fn cdata(s: &str) -> Data {
CData(s.into())
}
fn rcdata(s: &str) -> Data {
RcData(s.into())
}
#[test]
fn test_escape_data_noop() {
assert_eq!(cdata("hello!").escape().to_string(), "hello!");
assert_eq!(rcdata("hello!").escape().to_string(), "hello!");
}
#[test]
fn test_escape_data_sequences() {
assert_eq!(
cdata("hello && <world>").escape().to_string(),
"hello && <world>"
);
assert_eq!(
rcdata("hello && <world>").escape().to_string(),
"hello && <world>"
);
}
#[test]
fn test_escape_data_iter() {
let data = rcdata("wo<rld");
let mut escape = data.escape();
assert_eq!(escape.size_hint(), (2, Some(30)));
assert_eq!(escape.next(), Some('w'));
assert_eq!(escape.size_hint(), (2, Some(25)));
assert_eq!(escape.next(), Some('o'));
assert_eq!(escape.size_hint(), (1, Some(20)));
assert_eq!(escape.next(), Some('&'));
assert_eq!(escape.size_hint(), (4 + 1, Some(4 + 15)));
assert_eq!(escape.next(), Some('#'));
assert_eq!(escape.size_hint(), (3 + 1, Some(3 + 15)));
assert_eq!(escape.next(), Some('6'));
assert_eq!(escape.size_hint(), (2 + 1, Some(2 + 15)));
assert_eq!(escape.next(), Some('0'));
assert_eq!(escape.size_hint(), (1 + 1, Some(1 + 15)));
assert_eq!(escape.next(), Some(';'));
assert_eq!(escape.size_hint(), (0 + 1, Some(0 + 15)));
assert_eq!(escape.next(), Some('r'));
assert_eq!(escape.size_hint(), (1, Some(10)));
assert_eq!(escape.next(), Some('l'));
assert_eq!(escape.size_hint(), (1, Some(5)));
assert_eq!(escape.next(), Some('d'));
assert_eq!(escape.size_hint(), (0, Some(0)));
assert_eq!(escape.next(), None);
assert_eq!(escape.size_hint(), (0, Some(0)));
}
}