use crate::util::constant::{
CHARACTER_REFERENCES, CHARACTER_REFERENCES_HTML_4, CHARACTER_REFERENCE_DECIMAL_SIZE_MAX,
CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX,
};
use alloc::string::String;
use core::str;
pub fn decode_named(value: &str, html5: bool) -> Option<String> {
let mut iter = if html5 {
CHARACTER_REFERENCES.iter()
} else {
CHARACTER_REFERENCES_HTML_4.iter()
};
iter.find(|d| d.0 == value).map(|d| d.1.into())
}
pub fn decode_numeric(value: &str, radix: u32) -> String {
if let Some(char) = char::from_u32(u32::from_str_radix(value, radix).unwrap()) {
if !matches!(char,
'\0'..='\u{08}' | '\u{0B}' | '\u{0E}'..='\u{1F}' |
'\u{7F}'..='\u{9F}'
) {
return char.into();
}
}
char::REPLACEMENT_CHARACTER.into()
}
pub fn decode(value: &str, marker: u8, html5: bool) -> Option<String> {
match marker {
b'#' => Some(decode_numeric(value, 10)),
b'x' => Some(decode_numeric(value, 16)),
b'&' => decode_named(value, html5),
_ => unreachable!("Unexpected marker `{}`", marker),
}
}
pub fn value_max(marker: u8) -> usize {
match marker {
b'&' => CHARACTER_REFERENCE_NAMED_SIZE_MAX,
b'x' => CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX,
b'#' => CHARACTER_REFERENCE_DECIMAL_SIZE_MAX,
_ => unreachable!("Unexpected marker `{}`", marker),
}
}
pub fn value_test(marker: u8) -> fn(&u8) -> bool {
match marker {
b'&' => u8::is_ascii_alphanumeric,
b'x' => u8::is_ascii_hexdigit,
b'#' => u8::is_ascii_digit,
_ => unreachable!("Unexpected marker `{}`", marker),
}
}
pub fn parse(value: &str) -> String {
let bytes = value.as_bytes();
let mut index = 0;
let len = bytes.len();
let mut result = String::with_capacity(value.len());
let mut start = 0;
while index < len {
if bytes[index] == b'&' {
let (marker, value_start) = if index + 1 < len && bytes[index + 1] == b'#' {
if index + 2 < len && matches!(bytes[index + 2], b'x' | b'X') {
(b'x', index + 3)
} else {
(b'#', index + 2)
}
} else {
(b'&', index + 1)
};
let max = value_max(marker);
let test = value_test(marker);
let mut value_index = 0;
while value_index < max && (value_start + value_index) < len {
if !test(&bytes[value_start + value_index]) {
break;
}
value_index += 1;
}
let value_end = value_start + value_index;
if value_index > 0 && bytes[value_end] == b';' {
if let Some(decoded) = decode(
str::from_utf8(&bytes[value_start..value_end]).unwrap(),
marker,
false,
) {
result.push_str(&value[start..index]);
result.push_str(&decoded);
start = value_end + 1;
index = start;
continue;
}
}
}
index += 1;
}
result.push_str(&value[start..]);
result
}