pub fn decode_entities(text: &str) -> String {
if !text.contains('&') {
return text.to_string();
}
let mut out = String::with_capacity(text.len());
let mut rest = text;
while let Some(amp_pos) = rest.find('&') {
out.push_str(&rest[..amp_pos]);
rest = &rest[amp_pos..];
let search_end = rest.len().min(14); if let Some(semi_offset) = rest[..search_end].find(';') {
let entity = &rest[1..semi_offset];
if let Some(decoded) = decode_one(entity) {
out.push(decoded);
rest = &rest[semi_offset + 1..];
continue;
}
}
out.push('&');
rest = &rest[1..];
}
out.push_str(rest);
out
}
fn decode_one(entity: &str) -> Option<char> {
if let Some(stripped) = entity.strip_prefix('#') {
return if let Some(hex) = stripped
.strip_prefix('x')
.or_else(|| stripped.strip_prefix('X'))
{
u32::from_str_radix(hex, 16).ok().and_then(char::from_u32)
} else {
stripped.parse::<u32>().ok().and_then(char::from_u32)
};
}
match entity {
"amp" => Some('&'),
"lt" => Some('<'),
"gt" => Some('>'),
"quot" => Some('"'),
"apos" => Some('\''),
"nbsp" => Some('\u{00A0}'),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn named_entities() {
assert_eq!(decode_entities("&"), "&");
assert_eq!(decode_entities("<"), "<");
assert_eq!(decode_entities(">"), ">");
assert_eq!(decode_entities("""), "\"");
assert_eq!(decode_entities("'"), "'");
assert_eq!(decode_entities(" "), "\u{00A0}");
}
#[test]
fn numeric_decimal() {
assert_eq!(decode_entities("©"), "\u{00A9}"); assert_eq!(decode_entities("’"), "\u{2019}"); assert_eq!(decode_entities("A"), "A");
}
#[test]
fn numeric_hex() {
assert_eq!(decode_entities("’"), "\u{2019}");
assert_eq!(decode_entities("A"), "A");
assert_eq!(decode_entities("A"), "A"); }
#[test]
fn mixed() {
assert_eq!(
decode_entities("fish & chips — good"),
"fish & chips \u{2014} good"
);
}
#[test]
fn already_clean() {
assert_eq!(decode_entities("no entities here"), "no entities here");
}
#[test]
fn malformed_unclosed() {
assert_eq!(decode_entities("AT&T"), "AT&T");
}
#[test]
fn malformed_unknown_named() {
assert_eq!(decode_entities("&foo;"), "&foo;");
}
#[test]
fn empty_input() {
assert_eq!(decode_entities(""), "");
}
#[test]
fn consecutive_entities() {
assert_eq!(decode_entities("&<>"), "&<>");
}
#[test]
fn entity_at_end() {
assert_eq!(decode_entities("end&"), "end&");
}
#[test]
fn bare_ampersand_mid_text() {
assert_eq!(decode_entities("a & b"), "a & b");
}
#[test]
fn invalid_numeric() {
assert_eq!(decode_entities("�"), "�");
}
}