use super::prelude::*;
use entities::ENTITIES;
use once_cell::sync::Lazy;
use std::borrow::Cow;
use std::char;
use std::collections::HashMap;
static ENTITY_MAPPING: Lazy<HashMap<&'static str, &'static str>> = Lazy::new(|| {
let mut mapping = HashMap::new();
for entity in &ENTITIES {
let key = strip_entity(entity.entity);
let value = entity.characters;
mapping.insert(key, value);
}
mapping
});
pub const BLOCK_CHAR: BlockRule = BlockRule {
name: "block-char",
accepts_names: &["char", "character"],
accepts_star: false,
accepts_score: false,
accepts_newlines: false,
parse_fn,
};
fn parse_fn<'r, 't>(
parser: &mut Parser<'r, 't>,
name: &'t str,
flag_star: bool,
flag_score: bool,
in_head: bool,
) -> ParseResult<'r, 't, Elements<'t>> {
debug!("Parsing character / HTML entity block (in-head {in_head})");
assert!(!flag_star, "Char doesn't allow star flag");
assert!(!flag_score, "Char doesn't allow score flag");
assert_block_name(&BLOCK_CHAR, name);
let string = parser.get_head_value(&BLOCK_CHAR, in_head, parse_entity)?;
ok!(Element::Text(string))
}
fn parse_entity<'t>(
parser: &Parser<'_, 't>,
argument: Option<&'t str>,
) -> Result<Cow<'t, str>, ParseError> {
let argument = match argument {
Some(arg) => strip_entity(arg),
None => return Err(parser.make_err(ParseErrorKind::BlockMissingArguments)),
};
match find_entity(argument) {
Some(string) => Ok(string),
None => Err(parser.make_err(ParseErrorKind::BlockMalformedArguments)),
}
}
fn find_entity(entity: &str) -> Option<Cow<str>> {
if let Some(result) = ENTITY_MAPPING.get(entity) {
return Some(cow!(result));
}
if let Some(value) = entity.strip_prefix("#x") {
if let Some(result) = get_char(value, 16) {
return Some(result);
}
}
if let Some(value) = entity.strip_prefix('#') {
if let Some(result) = get_char(value, 10) {
return Some(result);
}
}
None
}
fn get_char(value: &str, radix: u32) -> Option<Cow<str>> {
let codepoint = match u32::from_str_radix(value, radix) {
Ok(codepoint) => codepoint,
Err(_) => return None,
};
let ch = char::from_u32(codepoint)?;
Some(Cow::Owned(ch.to_string()))
}
fn strip_entity(mut s: &str) -> &str {
s = s.trim();
if let Some(stripped) = s.strip_prefix('&') {
s = stripped;
}
if let Some(stripped) = s.strip_suffix(';') {
s = stripped;
}
s
}
#[test]
fn test_get_entity() {
macro_rules! check {
($input:expr, $expected:expr $(,)?) => {{
let actual = find_entity($input);
let expected = $expected;
assert_eq!(
actual, expected,
"Actual entity string doesn't match expected",
);
}};
}
check!("", None);
check!("amp", Some(cow!("&")));
check!("lt", Some(cow!("<")));
check!("gt", Some(cow!(">")));
check!("copy", Some(cow!("©")));
check!("xxxzzz", None);
check!("#32", Some(cow!(" ")));
check!("#255", Some(cow!("\u{ff}")));
check!("#128175", Some(cow!("💯")));
check!("#2097151", None);
check!("#x20", Some(cow!(" ")));
check!("#xff", Some(cow!("\u{ff}")));
check!("#x1f4af", Some(cow!("💯")));
check!("#x1fffff", None);
}
#[test]
fn test_get_char() {
macro_rules! check {
($value:expr, $radix:expr, $expected:expr $(,)?) => {{
let actual = get_char($value, $radix);
let expected = $expected;
assert_eq!(
actual, expected,
"Actual character value doesn't match expected",
);
}};
}
check!("32", 10, Some(Cow::Owned(str!(' '))));
check!("255", 10, Some(Cow::Owned(str!('\u{ff}'))));
check!("128175", 10, Some(Cow::Owned(str!('💯'))));
check!("2097151", 10, None);
check!("20", 16, Some(Cow::Owned(str!(' '))));
check!("ff", 16, Some(Cow::Owned(str!('\u{ff}'))));
check!("1f4af", 16, Some(Cow::Owned(str!('💯'))));
check!("1fffff", 16, None);
}
#[test]
fn test_strip_entity() {
macro_rules! check {
($input:expr, $expected:expr $(,)?) => {{
let actual = strip_entity($input);
let expected = $expected;
assert_eq!(
actual, expected,
"Actual stripped entity value didn't match expected",
);
}};
}
check!("", "");
check!("abc", "abc");
check!("legumes1", "legumes1");
check!("&", "amp");
check!("d", "#100");
check!("&xdeadbeef;", "xdeadbeef");
check!("&", "amp");
check!("amp;", "amp");
check!("d", "#100");
check!("#100;", "#100");
check!(" ", "");
check!(" abc", "abc");
check!(" legumes1", "legumes1");
check!(" &", "amp");
check!(" d", "#100");
check!(" &xdeadbeef;", "xdeadbeef");
}