use super::prelude::*;
use entities::ENTITIES;
use std::borrow::Cow;
use std::char;
use std::collections::HashMap;
use std::sync::LazyLock;
static ENTITY_MAPPING: LazyLock<HashMap<&'static str, &'static str>> =
LazyLock::new(|| {
let mut mapping = HashMap::new();
for entity in &ENTITIES {
let key = strip_entity(entity.entity);
let value = entity.characters;
mapping.insert(key, value);
}
mapping
});
pub const BLOCK_CHAR: BlockRule = BlockRule {
name: "block-char",
accepts_names: &["char", "character"],
accepts_star: false,
accepts_score: false,
accepts_newlines: false,
parse_fn,
};
fn parse_fn<'r, 't>(
parser: &mut Parser<'r, 't>,
name: &'t str,
flag_star: bool,
flag_score: bool,
in_head: bool,
) -> ParseResult<'r, 't, Elements<'t>> {
debug!("Parsing character / HTML entity block (in-head {in_head})");
assert!(!flag_star, "Char doesn't allow star flag");
assert!(!flag_score, "Char doesn't allow score flag");
assert_block_name(&BLOCK_CHAR, name);
let string = parser.get_head_value(&BLOCK_CHAR, in_head, parse_entity)?;
ok!(Element::Text(string))
}
fn parse_entity<'t>(
parser: &Parser<'_, 't>,
argument: Option<&'t str>,
) -> Result<Cow<'t, str>, ParseError> {
let argument = match argument {
Some(arg) => strip_entity(arg),
None => return Err(parser.make_err(ParseErrorKind::BlockMissingArguments)),
};
match find_entity(argument) {
Some(string) => Ok(string),
None => Err(parser.make_err(ParseErrorKind::BlockMalformedArguments)),
}
}
fn find_entity(entity: &str) -> Option<Cow<'_, str>> {
if let Some(result) = ENTITY_MAPPING.get(entity) {
return Some(cow!(result));
}
if let Some(value) = entity.strip_prefix("#x")
&& let Some(result) = get_char(value, 16)
{
return Some(result);
}
if let Some(value) = entity.strip_prefix('#')
&& let Some(result) = get_char(value, 10)
{
return Some(result);
}
None
}
fn get_char(value: &str, radix: u32) -> Option<Cow<'_, str>> {
let codepoint = match u32::from_str_radix(value, radix) {
Ok(codepoint) => codepoint,
Err(_) => return None,
};
let ch = char::from_u32(codepoint)?;
Some(Cow::Owned(ch.to_string()))
}
fn strip_entity(mut s: &str) -> &str {
s = s.trim();
if let Some(stripped) = s.strip_prefix('&') {
s = stripped;
}
if let Some(stripped) = s.strip_suffix(';') {
s = stripped;
}
s
}
#[test]
fn test_get_entity() {
macro_rules! test {
($input:expr, $expected:expr $(,)?) => {{
let actual = find_entity($input);
let expected = $expected;
assert_eq!(
actual, expected,
"Actual entity string doesn't match expected",
);
}};
}
test!("", None);
test!("amp", Some(cow!("&")));
test!("lt", Some(cow!("<")));
test!("gt", Some(cow!(">")));
test!("copy", Some(cow!("©")));
test!("xxxzzz", None);
test!("#32", Some(cow!(" ")));
test!("#255", Some(cow!("\u{ff}")));
test!("#128175", Some(cow!("💯")));
test!("#2097151", None);
test!("#x20", Some(cow!(" ")));
test!("#xff", Some(cow!("\u{ff}")));
test!("#x1f4af", Some(cow!("💯")));
test!("#x1fffff", None);
}
#[test]
fn test_get_char() {
macro_rules! test {
($value:expr, $radix:expr, $expected:expr $(,)?) => {{
let actual = get_char($value, $radix);
let expected = $expected;
assert_eq!(
actual, expected,
"Actual character value doesn't match expected",
);
}};
}
test!("32", 10, Some(Cow::Owned(str!(' '))));
test!("255", 10, Some(Cow::Owned(str!('\u{ff}'))));
test!("128175", 10, Some(Cow::Owned(str!('💯'))));
test!("2097151", 10, None);
test!("20", 16, Some(Cow::Owned(str!(' '))));
test!("ff", 16, Some(Cow::Owned(str!('\u{ff}'))));
test!("1f4af", 16, Some(Cow::Owned(str!('💯'))));
test!("1fffff", 16, None);
}
#[test]
fn test_strip_entity() {
macro_rules! test {
($input:expr, $expected:expr $(,)?) => {{
let actual = strip_entity($input);
let expected = $expected;
assert_eq!(
actual, expected,
"Actual stripped entity value didn't match expected",
);
}};
}
test!("", "");
test!("abc", "abc");
test!("legumes1", "legumes1");
test!("&", "amp");
test!("d", "#100");
test!("&xdeadbeef;", "xdeadbeef");
test!("&", "amp");
test!("amp;", "amp");
test!("d", "#100");
test!("#100;", "#100");
test!(" ", "");
test!(" abc", "abc");
test!(" legumes1", "legumes1");
test!(" &", "amp");
test!(" d", "#100");
test!(" &xdeadbeef;", "xdeadbeef");
}