use std::borrow::Cow;
use htmlentity::{
entity::{
decode, decode_chars, decode_chars_to, decode_to, encode, encode_char, encode_chars_with,
encode_with, CharacterSet, EncodeType, Entity, EntityType, ICodedDataTrait,
},
types::{AnyhowResult, ByteList},
};
fn decode_to_string(content: &str) -> String {
if let Ok(result) = decode(content.as_bytes()).to_string() {
result
} else {
String::from("")
}
}
#[test]
fn test_entity_decode() -> AnyhowResult<()> {
assert!(Entity::decode_chars(&[]).is_err());
assert!(Entity::decode_chars(&['l', 't', '好']).is_err());
let ok_char = Entity::decode_chars(&['l', 't']);
assert!(ok_char.is_ok());
assert_eq!(ok_char?, '<');
Ok(())
}
#[test]
fn test_escape() -> AnyhowResult<()> {
let content = "
\t
\n
<br>this is a multiple line text.
<div class='product'>
<span><span>¥</span>100</span>
<h4>this is a title<main></h4>
</div>
";
let result = encode(content.as_bytes(), &Default::default(), &Default::default());
let encoded_string = result.to_string();
assert!(encoded_string.is_ok());
assert_eq!(decode_to_string(&encoded_string?), content);
let encoded_chars = result.to_chars();
assert!(encoded_chars.is_ok());
let cur_chars = encoded_chars?;
assert_eq!(decode_chars(&cur_chars).iter().collect::<String>(), content);
assert_eq!(decode_chars(&['&', ';']).iter().collect::<String>(), "&;");
assert_eq!(
decode_chars(&['&', ';', '&', 'l', 't', ';'])
.iter()
.collect::<String>(),
"&;<"
);
let mut decode_result: Vec<char> = Vec::new();
decode_chars_to(&cur_chars, &mut decode_result);
assert_eq!(decode_result.iter().collect::<String>(), content);
let mut data = vec![];
decode_to(&result.to_bytes(), &mut data);
let now_content = std::str::from_utf8(&data);
assert!(now_content.is_ok());
assert_eq!(now_content?, content);
let content = "<div> '\"</div>";
let encoded_content = "<div>&nbsp;'"</div>";
let encoded_data = encode(
content.as_bytes(),
&EncodeType::NamedOrHex,
&CharacterSet::SpecialChars,
);
let encoded_string = encoded_data.to_string();
assert!(encoded_string.is_ok());
assert_eq!(encoded_string?, encoded_content);
assert_eq!(decode_to_string(encoded_content), content);
let content = "<div> '\"</div>";
let encoded_content = "<div>&nbsp;'\"</div>";
let encoded_data = encode(
content.as_bytes(),
&EncodeType::NamedOrHex,
&CharacterSet::Html,
);
let encoded_string = encoded_data.to_string();
assert!(encoded_string.is_ok());
assert_eq!(encoded_string?, encoded_content);
assert_eq!(decode_to_string(encoded_content), content);
let content = "<div>℗ℑ";
let encoded_content = "<div>℗ℑ";
let encoded_data = encode(
content.as_bytes(),
&EncodeType::NamedOrHex,
&CharacterSet::All,
);
let encoded_string = encoded_data.to_string();
assert!(encoded_string.is_ok());
assert_eq!(encoded_string?, encoded_content);
assert_eq!(decode_to_string(encoded_content), content);
let content = "\t<div>";
let encoded_data = encode_with(content.as_bytes(), &EncodeType::Named, |ch, _| {
if *ch == '<' {
return (false, None);
}
(true, None)
});
let encoded_string = encoded_data.to_string();
assert!(encoded_string.is_ok());
assert_eq!(encoded_string?, "	<div>");
let chars = String::from("<div class='header'></div>")
.chars()
.collect::<Vec<char>>();
let character_set = CharacterSet::HtmlAndNonASCII;
let encoded_chars = encode_chars_with(&chars, |ch| {
if character_set.contains(ch) || *ch == '\'' {
return Some(&EncodeType::Named);
}
None
});
assert_eq!(
encoded_chars.iter().collect::<String>(),
"<div class='header'></div>"
);
Ok(())
}
#[test]
fn test_wrong_entity() {
let content = "&#;";
assert_eq!(decode_to_string(content), content);
let content = "&;";
assert_eq!(decode_to_string(content), content);
}
#[test]
fn test_decode_named() {
let content = "&#q123;";
let content_bytes = content.as_bytes();
let mut decoded_data = decode(content_bytes);
assert!(!decoded_data.is_ok());
assert!(!decoded_data.get_errors().is_empty());
assert_eq!(decoded_data.entity_count(), 0);
assert_eq!(decoded_data.bytes(), Cow::Borrowed(content_bytes));
decoded_data.to_owned();
assert_eq!(decoded_data.into_bytes(), content.as_bytes());
assert_eq!(decode_to_string(content), content);
let content = "&123;";
assert_eq!(decode_to_string(content), content);
let content = "&q123;";
assert_eq!(decode_to_string(content), content);
}
#[test]
fn test_decode_hex() {
let content = "→";
assert_eq!(decode_to_string(content), "→");
let content = "→";
assert_eq!(decode_to_string(content), "→");
let content = "→";
assert_eq!(decode_to_string(content), "→");
let content = "�";
assert_eq!(decode_to_string(content), content);
let content = "�";
assert_eq!(decode_to_string(content), content);
let content = "ਏh;";
assert_eq!(decode_to_string(content), content);
let content = "&#a00;";
assert_eq!(decode_to_string(content), content);
}
#[test]
fn test_decode_decimal() {
let content = "→";
assert_eq!(decode_to_string(content), "→");
let content = "→";
assert_eq!(decode_to_string(content), "→");
let content = "�";
assert_eq!(decode_to_string(content), content);
let content = "�";
assert_eq!(decode_to_string(content), content);
}
#[test]
fn test_exclude_named() -> AnyhowResult<()> {
let html = "<div class='header'>℗</div>";
let encode_type = EncodeType::Named;
let entity_set = CharacterSet::SpecialCharsAndNonASCII;
let html_encoded = encode_with(html.as_bytes(), &encode_type, |ch, _| {
if *ch == '<' {
return (false, None);
}
return entity_set.filter(ch, &encode_type);
});
let encoded_string = html_encoded.to_string();
assert!(encoded_string.is_ok());
assert_eq!(
encoded_string?,
String::from("<div class='header'>℗</div>")
);
let html = "<div class='header'></div>";
let html_encoded = encode_with(
html.as_bytes(),
&EncodeType::NamedOrDecimal,
|ch, encode_type| {
if *ch == '\'' {
return (
true,
Some((EntityType::Decimal, Cow::Owned(b"39".to_vec()))),
);
}
return entity_set.filter(ch, encode_type);
},
);
let encoded_string = html_encoded.to_string();
assert!(encoded_string.is_ok());
assert_eq!(
encoded_string?,
"<div class='header'></div>"
);
let html = "<div class='header'></div>";
let html_encoded = encode_with(
html.as_bytes(),
&EncodeType::NamedOrDecimal,
|ch, encode_type| {
let (need_encode, _) = entity_set.filter(ch, encode_type);
if need_encode {
if let Some(char_entity) = encode_char(ch, &EncodeType::Decimal) {
return (
true,
Some((EntityType::Decimal, Cow::from(char_entity.data()))),
);
}
}
(false, None)
},
);
let encoded_string = html_encoded.to_string();
assert!(encoded_string.is_ok());
assert_eq!(
encoded_string?,
"<div class='header'></div>"
);
assert_eq!(
html_encoded.bytes(),
Cow::Owned::<ByteList>(b"<div class='header'></div>".to_vec())
);
Ok(())
}
#[test]
fn test_unexpected() -> AnyhowResult<()> {
assert_eq!(decode(b"&").to_string()?, "&");
assert_eq!(decode(b"&;").to_string()?, "&;");
assert_eq!(decode(b"&a0;").to_string()?, "&a0;");
assert_eq!(decode(b"&0a;").to_string()?, "&0a;");
assert_eq!(decode(b"&#").to_string()?, "&#");
assert_eq!(decode(b"&#;").to_string()?, "&#;");
assert_eq!(decode(b"&#a;").to_string()?, "&#a;");
assert_eq!(decode(b"&#x;").to_string()?, "&#x;");
assert_eq!(decode(b"&#xg;").to_string()?, "&#xg;");
assert_eq!(decode(b"�g;").to_string()?, "�g;");
assert_eq!(decode(b"abc&").to_string()?, "abc&");
assert_eq!(decode(b"<").to_string()?, "<");
Ok(())
}