use crate::shared::syntax::*;
use std::convert::TryFrom;
use std::fmt::{Display, Formatter, Result as FmtResult};
use std::str::FromStr;
#[derive(Clone, Debug, PartialEq)]
pub(crate) enum SpaceHandling {
Default,
Preserve,
}
pub(crate) trait EntityResolver {
fn resolve(&self, entity: &str) -> Option<String>;
}
pub(crate) fn normalize_attribute_value(
value: &str,
resolver: &dyn EntityResolver,
is_cdata: bool,
) -> String {
let step_1 = normalize_end_of_lines(value);
let step_3 = if step_1.is_empty() {
step_1
} else {
let find = regex::Regex::new(
r"(?P<entity_ref>[&%][\pL_][\pL\.\d_\-]*;)|(?P<char>&#\d+;)|(?P<char_hex>&#x[0-9a-fA-F]+;)|(?P<ws>[\u{09}\u{0A}\u{0D}])",
)
.unwrap();
let mut step_2 = String::new();
let mut last_end = 0;
for capture in find.captures_iter(&step_1) {
let (start, end, replacement) = if let Some(a_match) = capture.name("entity_ref") {
let replacement = match resolver.resolve(a_match.as_str()) {
None => panic!("unknown entity reference {}", a_match.as_str()),
Some(replacement) => {
normalize_attribute_value(&replacement, resolver, is_cdata)
}
};
(a_match.start(), a_match.end(), replacement)
} else if let Some(a_match) = capture.name("char") {
let replacement = char_from_entity(a_match.as_str());
(a_match.start(), a_match.end(), replacement)
} else if let Some(a_match) = capture.name("char_hex") {
let replacement = char_from_entity(a_match.as_str());
(a_match.start(), a_match.end(), replacement)
} else if let Some(a_match) = capture.name("ws") {
(a_match.start(), a_match.end(), "\u{20}".to_string())
} else {
panic!("unexpected result");
};
step_2.push_str(&step_1[last_end..start]);
step_2.push_str(&replacement);
last_end = end;
}
if last_end < value.len() {
step_2.push_str(&step_1[last_end..]);
}
step_2
};
if is_cdata {
step_3
} else {
step_3.trim_matches(' ').to_string()
}
}
pub(crate) fn normalize_end_of_lines(value: &str) -> String {
if value.is_empty() {
value.to_string()
} else {
let line_ends = regex::Regex::new(r"\u{0D}[\u{0A}\u{85}]?|\u{85}|\u{2028}").unwrap();
line_ends.replace_all(value, "\u{0A}").to_string()
}
}
pub(crate) fn escape(input: &str) -> String {
let mut result = String::with_capacity(input.len());
for c in input.chars() {
match c {
XML_ESC_AMP_CHAR => result.push_str(&to_entity(XML_ESC_AMP_CHAR)),
XML_ESC_APOS_CHAR => result.push_str(&to_entity(XML_ESC_APOS_CHAR)),
XML_ESC_GT_CHAR => result.push_str(&to_entity(XML_ESC_GT_CHAR)),
XML_ESC_LT_CHAR => result.push_str(&to_entity(XML_ESC_LT_CHAR)),
XML_ESC_QUOT_CHAR => result.push_str(&to_entity(XML_ESC_QUOT_CHAR)),
o => result.push(o),
}
}
result
}
pub(crate) fn to_entity(c: char) -> String {
format!(
"{}{}{}",
XML_NUMBERED_ENTITYREF_START, c as u16, XML_ENTITYREF_END
)
}
#[allow(dead_code)]
pub(crate) fn to_entity_hex(c: char) -> String {
format!(
"{}{:X}{}",
XML_HEX_NUMBERED_ENTITYREF_START, c as u16, XML_ENTITYREF_END
)
}
fn char_from_entity(entity: &str) -> String {
assert!(entity.starts_with("&#"));
assert!(entity.ends_with(';'));
let code_point = if &entity[2..3] == "x" {
let code_point = &entity[3..entity.len() - 1];
u32::from_str_radix(code_point, 16).unwrap()
} else {
let code_point = &entity[2..entity.len() - 1];
u32::from_str_radix(code_point, 10).unwrap()
};
let character = char::try_from(code_point).unwrap();
character.to_string()
}
#[allow(dead_code)]
pub(crate) fn is_xml_10_char(c: char) -> bool {
c == '\u{0009}'
|| c == '\u{000A}'
|| c == '\u{000D}'
|| (c >= '\u{0020}' && c <= '\u{D7FF}')
|| (c >= '\u{E000}' && c <= '\u{FFFD}')
|| (c >= '\u{10000}' && c <= '\u{10FFF}')
}
#[allow(dead_code)]
pub(crate) fn is_xml_10_restricted_char(c: char) -> bool {
c == XML_ESC_AMP_CHAR
|| c == XML_ESC_APOS_CHAR
|| c == XML_ESC_GT_CHAR
|| c == XML_ESC_LT_CHAR
|| c == XML_ESC_QUOT_CHAR
}
#[allow(dead_code)]
pub(crate) fn is_xml_11_char(c: char) -> bool {
(c >= '\u{0001}' && c <= '\u{D7FF}')
|| (c >= '\u{E000}' && c <= '\u{FFFD}')
|| (c >= '\u{10000}' && c <= '\u{10FFF}')
}
#[allow(dead_code)]
pub(crate) fn is_xml_11_restricted_char(c: char) -> bool {
(c >= '\u{01}' && c <= '\u{08}')
|| (c >= '\u{0B}' && c <= '\u{0C}')
|| (c >= '\u{0E}' && c <= '\u{1F}')
|| (c >= '\u{7F}' && c <= '\u{84}')
|| (c >= '\u{86}' && c <= '\u{9F}')
}
#[allow(dead_code)]
pub(crate) fn is_xml_space(c: char) -> bool {
c == '\u{09}' || c == '\u{0A}' || c == '\u{0D}' || c == '\u{20}'
}
#[allow(dead_code)]
pub(crate) fn is_xml_name_start_char(c: char) -> bool {
c == ':'
|| (c >= 'A' && c <= 'Z')
|| c == '_'
|| (c >= 'a' && c <= 'z')
|| (c >= '\u{C0}' && c <= '\u{D6}')
|| (c >= '\u{D8}' && c <= '\u{F6}')
|| (c >= '\u{0F8}' && c <= '\u{2FF}')
|| (c >= '\u{370}' && c <= '\u{37D}')
|| (c >= '\u{037F}' && c <= '\u{1FFF}')
|| (c >= '\u{200C}' && c <= '\u{200D}')
|| (c >= '\u{2070}' && c <= '\u{218F}')
|| (c >= '\u{2C00}' && c <= '\u{2FEF}')
|| (c >= '\u{3001}' && c <= '\u{D7FF}')
|| (c >= '\u{F900}' && c <= '\u{FDCF}')
|| (c >= '\u{FDF0}' && c <= '\u{FFFD}')
|| (c >= '\u{10000}' && c <= '\u{EFFFF}')
}
pub(crate) fn is_xml_name_char(c: char) -> bool {
is_xml_name_start_char(c)
|| c == '-'
|| c == '.'
|| (c >= '0' && c <= '9')
|| c == '\u{B7}'
|| (c >= '\u{0300}' && c <= '\u{036F}')
|| (c >= '\u{203F}' && c <= '\u{2040}')
}
pub(crate) fn is_xml_name(s: &str) -> bool {
!s.is_empty() && s.starts_with(is_xml_name_start_char) && s[1..].chars().all(is_xml_name_char)
}
#[allow(dead_code)]
pub(crate) fn is_xml_names(s: &str) -> bool {
!s.is_empty() && s.split(' ').all(is_xml_name)
}
#[allow(dead_code)]
pub(crate) fn is_xml_nmtoken(s: &str) -> bool {
!s.is_empty() && s.chars().all(is_xml_name_char)
}
#[allow(dead_code)]
pub(crate) fn is_xml_nmtokens(s: &str) -> bool {
!s.is_empty() && s.split(' ').all(is_xml_nmtoken)
}
impl Default for SpaceHandling {
fn default() -> Self {
SpaceHandling::Default
}
}
impl Display for SpaceHandling {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
write!(
f,
"{}{}{}=\"{}\"",
XML_NS_ATTRIBUTE,
XML_NS_SEPARATOR,
XML_NS_ATTR_SPACE,
match self {
SpaceHandling::Default => XML_NS_ATTR_SPACE_DEFAULT,
SpaceHandling::Preserve => XML_NS_ATTR_SPACE_PRESERVE,
}
)
}
}
impl FromStr for SpaceHandling {
type Err = ();
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s == XML_NS_ATTR_SPACE_DEFAULT {
Ok(SpaceHandling::Default)
} else if s == XML_NS_ATTR_SPACE_PRESERVE {
Ok(SpaceHandling::Preserve)
} else {
Err(())
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::borrow::Borrow;
use std::collections::HashMap;
#[test]
fn test_space_handling_default() {
let sh = SpaceHandling::default();
assert_eq!(sh, SpaceHandling::Default);
}
#[test]
fn test_space_handling_display() {
assert_eq!(
format!("{}", SpaceHandling::Default),
format!(
"{}{}{}=\"{}\"",
XML_NS_ATTRIBUTE, XML_NS_SEPARATOR, XML_NS_ATTR_SPACE, XML_NS_ATTR_SPACE_DEFAULT
)
);
assert_eq!(
format!("{}", SpaceHandling::Preserve),
format!(
"{}{}{}=\"{}\"",
XML_NS_ATTRIBUTE, XML_NS_SEPARATOR, XML_NS_ATTR_SPACE, XML_NS_ATTR_SPACE_PRESERVE
)
);
}
#[test]
fn test_space_handling_from_str() {
assert_eq!(
SpaceHandling::from_str(XML_NS_ATTR_SPACE_DEFAULT).unwrap(),
SpaceHandling::Default
);
assert_eq!(
SpaceHandling::from_str(XML_NS_ATTR_SPACE_PRESERVE).unwrap(),
SpaceHandling::Preserve
);
assert!(SpaceHandling::from_str("").is_err());
assert!(SpaceHandling::from_str("other").is_err());
}
#[test]
fn test_end_of_line_handling() {
let input = "one\u{0D}two\u{0D}\u{0A}\u{0A}three\u{0A}\u{0D}\u{85}four\u{85}five\u{2028}";
let output = normalize_end_of_lines(&input.to_string());
assert_eq!(
output,
"one\u{0A}two\u{0A}\u{0A}three\u{0A}\u{0A}four\u{0A}five\u{0A}".to_string()
)
}
struct NoneEntityResolver {}
impl EntityResolver for NoneEntityResolver {
fn resolve(&self, name: &str) -> Option<String> {
let result: Option<String> = None;
println!("EntityResolver::resolve({:?}) -> {:?}", name, result);
result
}
}
pub(crate) fn none_entity_resolver() -> Box<dyn EntityResolver> {
let resolver = NoneEntityResolver {};
Box::new(resolver)
}
#[test]
fn test_normalize_avalue_trim() {
let resolver = none_entity_resolver();
let resolver = resolver.borrow();
assert_eq!(
normalize_attribute_value(" abc ", resolver, true),
" abc "
);
assert_eq!(normalize_attribute_value(" abc ", resolver, false), "abc");
}
struct TestResolver {
entity_map: HashMap<String, String>,
}
impl EntityResolver for TestResolver {
fn resolve(&self, entity: &str) -> Option<String> {
self.entity_map.get(entity).cloned()
}
}
impl TestResolver {
pub(crate) fn new() -> Self {
let mut new_self = Self {
entity_map: Default::default(),
};
let _safe_to_ignore = new_self
.entity_map
.insert("£".to_string(), "£".to_string());
let _safe_to_ignore = new_self
.entity_map
.insert("¥".to_string(), "¥".to_string());
let _safe_to_ignore = new_self
.entity_map
.insert("€".to_string(), "€".to_string());
let _safe_to_ignore = new_self.entity_map.insert(
"¤cy;".to_string(),
"$, £, €, and ¥".to_string(),
);
new_self
}
}
fn test_resolver() -> Box<dyn EntityResolver> {
let resolver = TestResolver::new();
Box::new(resolver)
}
#[test]
fn test_normalize_avalue_entity_resolver() {
let resolver = test_resolver();
let resolver = resolver.borrow();
assert_eq!(
normalize_attribute_value("10$ in £s please", resolver, true),
"10$ in £s please"
);
assert_eq!(
normalize_attribute_value("¥ to €", resolver, false),
"¥ to €"
);
assert_eq!(
normalize_attribute_value("¤cy;", resolver, false),
"$, £, €, and ¥"
);
}
}