use std::fmt;
use crate::engine::{encode_loop, is_invalid_for_xml, is_unicode_noncharacter};
pub fn for_html(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_html(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_html<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(out, input, needs_html_encoding, write_html_encoded)
}
fn needs_html_encoding(c: char) -> bool {
matches!(c, '&' | '<' | '>' | '"' | '\'') || is_invalid_for_xml(c)
}
fn write_html_encoded<W: fmt::Write>(out: &mut W, c: char, _next: Option<char>) -> fmt::Result {
match c {
'&' => out.write_str("&"),
'<' => out.write_str("<"),
'>' => out.write_str(">"),
'"' => out.write_str("""),
'\'' => out.write_str("'"),
_ => out.write_char(' '),
}
}
pub fn for_html_content(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_html_content(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_html_content<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(
out,
input,
needs_html_content_encoding,
write_html_content_encoded,
)
}
fn needs_html_content_encoding(c: char) -> bool {
matches!(c, '&' | '<' | '>') || is_invalid_for_xml(c)
}
fn write_html_content_encoded<W: fmt::Write>(
out: &mut W,
c: char,
_next: Option<char>,
) -> fmt::Result {
match c {
'&' => out.write_str("&"),
'<' => out.write_str("<"),
'>' => out.write_str(">"),
_ => out.write_char(' '),
}
}
pub fn for_html_attribute(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_html_attribute(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_html_attribute<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(
out,
input,
needs_html_attribute_encoding,
write_html_attribute_encoded,
)
}
fn needs_html_attribute_encoding(c: char) -> bool {
matches!(c, '&' | '<' | '"' | '\'') || is_invalid_for_xml(c)
}
fn write_html_attribute_encoded<W: fmt::Write>(
out: &mut W,
c: char,
_next: Option<char>,
) -> fmt::Result {
match c {
'&' => out.write_str("&"),
'<' => out.write_str("<"),
'"' => out.write_str("""),
'\'' => out.write_str("'"),
_ => out.write_char(' '),
}
}
pub fn for_html_unquoted_attribute(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_html_unquoted_attribute(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_html_unquoted_attribute<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(
out,
input,
needs_html_unquoted_attribute_encoding,
write_html_unquoted_attribute_encoded,
)
}
fn needs_html_unquoted_attribute_encoding(c: char) -> bool {
let cp = c as u32;
if matches!(
c,
'\t' | '\n' | '\x0C' | '\r' | ' ' | '&' | '<' | '>' | '"' | '\'' | '/' | '=' | '`'
) {
return true;
}
if cp <= 0x1F {
return true;
}
if cp == 0x7F {
return true;
}
if (0x80..=0x9F).contains(&cp) {
return true;
}
if cp == 0x2028 || cp == 0x2029 {
return true;
}
if is_unicode_noncharacter(cp) {
return true;
}
false
}
fn write_html_unquoted_attribute_encoded<W: fmt::Write>(
out: &mut W,
c: char,
_next: Option<char>,
) -> fmt::Result {
match c {
'\t' => out.write_str("	"),
'\n' => out.write_str(" "),
'\x0C' => out.write_str(""),
'\r' => out.write_str(" "),
' ' => out.write_str(" "),
'&' => out.write_str("&"),
'<' => out.write_str("<"),
'>' => out.write_str(">"),
'"' => out.write_str("""),
'\'' => out.write_str("'"),
'/' => out.write_str("/"),
'=' => out.write_str("="),
'`' => out.write_str("`"),
'\u{0085}' => out.write_str("…"),
'\u{2028}' => out.write_str("
"),
'\u{2029}' => out.write_str("
"),
_ => out.write_char('-'),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn html_no_encoding_needed() {
assert_eq!(for_html("hello world"), "hello world");
assert_eq!(for_html(""), "");
assert_eq!(for_html("abc123"), "abc123");
}
#[test]
fn html_encodes_ampersand() {
assert_eq!(for_html("a&b"), "a&b");
}
#[test]
fn html_encodes_angle_brackets() {
assert_eq!(for_html("<div>"), "<div>");
}
#[test]
fn html_encodes_quotes() {
assert_eq!(for_html(r#"a"b'c"#), "a"b'c");
}
#[test]
fn html_replaces_controls_with_space() {
assert_eq!(for_html("a\x01b"), "a b");
assert_eq!(for_html("a\x7Fb"), "a b");
}
#[test]
fn html_preserves_tab_lf_cr() {
assert_eq!(for_html("a\tb\nc\rd"), "a\tb\nc\rd");
}
#[test]
fn html_writer_variant() {
let mut out = String::new();
write_html(&mut out, "<b>").unwrap();
assert_eq!(out, "<b>");
}
#[test]
fn html_content_does_not_encode_quotes() {
assert_eq!(for_html_content(r#"a"b'c"#), r#"a"b'c"#);
}
#[test]
fn html_content_encodes_angle_brackets_and_amp() {
assert_eq!(for_html_content("a<b&c>d"), "a<b&c>d");
}
#[test]
fn html_attribute_does_not_encode_gt() {
assert_eq!(for_html_attribute("a>b"), "a>b");
}
#[test]
fn html_attribute_encodes_quotes_and_amp_and_lt() {
assert_eq!(
for_html_attribute(r#"a"b'c&d<e"#),
"a"b'c&d<e"
);
}
#[test]
fn unquoted_attr_encodes_whitespace() {
assert_eq!(
for_html_unquoted_attribute("a b\tc\nd"),
"a b	c d"
);
}
#[test]
fn unquoted_attr_encodes_grave_accent() {
assert_eq!(for_html_unquoted_attribute("a`b"), "a`b");
}
#[test]
fn unquoted_attr_encodes_equals_and_slash() {
assert_eq!(for_html_unquoted_attribute("a=b/c"), "a=b/c");
}
#[test]
fn unquoted_attr_replaces_controls_with_dash() {
assert_eq!(for_html_unquoted_attribute("a\x01b"), "a-b");
assert_eq!(for_html_unquoted_attribute("a\x7Fb"), "a-b");
}
#[test]
fn unquoted_attr_encodes_nel() {
assert_eq!(for_html_unquoted_attribute("a\u{0085}b"), "a…b");
}
#[test]
fn unquoted_attr_encodes_line_separators() {
assert_eq!(
for_html_unquoted_attribute("a\u{2028}b\u{2029}c"),
"a
b
c"
);
}
#[test]
fn unquoted_attr_passes_through_safe_chars() {
let safe = "ABCxyz019!#$%()*+,-.[]\\^_}";
assert_eq!(for_html_unquoted_attribute(safe), safe);
}
#[test]
fn unquoted_attr_passes_through_non_ascii() {
assert_eq!(for_html_unquoted_attribute("café"), "café");
assert_eq!(for_html_unquoted_attribute("日本語"), "日本語");
}
}