use std::fmt;
use crate::engine::{encode_loop, is_invalid_for_xml, is_unicode_noncharacter};
pub fn for_xml(input: &str) -> String {
crate::html::for_html(input)
}
pub fn write_xml<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
crate::html::write_html(out, input)
}
pub fn for_xml_content(input: &str) -> String {
crate::html::for_html_content(input)
}
pub fn write_xml_content<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
crate::html::write_html_content(out, input)
}
pub fn for_xml_attribute(input: &str) -> String {
crate::html::for_html_attribute(input)
}
pub fn write_xml_attribute<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
crate::html::write_html_attribute(out, input)
}
pub fn for_xml_comment(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_xml_comment(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_xml_comment<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
let mut last_was_hyphen = false;
let mut chars = input.chars().peekable();
while let Some(c) = chars.next() {
if c == '-' {
if last_was_hyphen {
out.write_char('~')?;
last_was_hyphen = false;
} else if chars.peek().is_none() {
out.write_char('~')?;
} else {
out.write_char('-')?;
last_was_hyphen = true;
}
} else if is_invalid_for_xml(c) {
out.write_char(' ')?;
last_was_hyphen = false;
} else {
out.write_char(c)?;
last_was_hyphen = false;
}
}
Ok(())
}
pub fn for_cdata(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_cdata(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_cdata<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
let mut bracket_count: u32 = 0;
for c in input.chars() {
if c == ']' {
bracket_count += 1;
} else if c == '>' && bracket_count >= 2 {
for _ in 0..(bracket_count - 2) {
out.write_char(']')?;
}
out.write_str("]]]]><![CDATA[>")?;
bracket_count = 0;
} else {
for _ in 0..bracket_count {
out.write_char(']')?;
}
bracket_count = 0;
if is_invalid_for_xml(c) {
out.write_char(' ')?;
} else {
out.write_char(c)?;
}
}
}
for _ in 0..bracket_count {
out.write_char(']')?;
}
Ok(())
}
pub fn for_xml11(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_xml11(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_xml11<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(out, input, needs_xml11_encoding, write_xml11_encoded)
}
pub fn for_xml11_content(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_xml11_content(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_xml11_content<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(
out,
input,
needs_xml11_content_encoding,
write_xml11_content_encoded,
)
}
pub fn for_xml11_attribute(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_xml11_attribute(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_xml11_attribute<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(
out,
input,
needs_xml11_attribute_encoding,
write_xml11_attribute_encoded,
)
}
fn is_xml11_restricted_or_invalid(c: char) -> bool {
let cp = c as u32;
cp == 0
|| (0x01..=0x08).contains(&cp)
|| cp == 0x0B
|| cp == 0x0C
|| (0x0E..=0x1F).contains(&cp)
|| (0x7F..=0x84).contains(&cp)
|| (0x86..=0x9F).contains(&cp)
|| is_unicode_noncharacter(cp)
}
fn needs_xml11_encoding(c: char) -> bool {
matches!(c, '&' | '<' | '>' | '"' | '\'') || is_xml11_restricted_or_invalid(c)
}
fn write_xml11_encoded<W: fmt::Write>(out: &mut W, c: char, _next: Option<char>) -> fmt::Result {
match c {
'&' => out.write_str("&"),
'<' => out.write_str("<"),
'>' => out.write_str(">"),
'"' => out.write_str("""),
'\'' => out.write_str("'"),
'\0' => out.write_char(' '),
c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
c => write!(out, "&#x{:x};", c as u32),
}
}
fn needs_xml11_content_encoding(c: char) -> bool {
matches!(c, '&' | '<' | '>') || is_xml11_restricted_or_invalid(c)
}
fn write_xml11_content_encoded<W: fmt::Write>(
out: &mut W,
c: char,
_next: Option<char>,
) -> fmt::Result {
match c {
'&' => out.write_str("&"),
'<' => out.write_str("<"),
'>' => out.write_str(">"),
'\0' => out.write_char(' '),
c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
c => write!(out, "&#x{:x};", c as u32),
}
}
fn needs_xml11_attribute_encoding(c: char) -> bool {
matches!(c, '&' | '<' | '"' | '\'') || is_xml11_restricted_or_invalid(c)
}
fn write_xml11_attribute_encoded<W: fmt::Write>(
out: &mut W,
c: char,
_next: Option<char>,
) -> fmt::Result {
match c {
'&' => out.write_str("&"),
'<' => out.write_str("<"),
'"' => out.write_str("""),
'\'' => out.write_str("'"),
'\0' => out.write_char(' '),
c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
c => write!(out, "&#x{:x};", c as u32),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn xml_aliases_match_html() {
let input = r#"<b attr="val">&</b>"#;
assert_eq!(for_xml(input), crate::html::for_html(input));
assert_eq!(for_xml_content(input), crate::html::for_html_content(input));
assert_eq!(
for_xml_attribute(input),
crate::html::for_html_attribute(input)
);
}
#[test]
fn comment_passthrough() {
assert_eq!(for_xml_comment("safe text"), "safe text");
assert_eq!(for_xml_comment(""), "");
}
#[test]
fn comment_double_hyphen() {
assert_eq!(for_xml_comment("a--b"), "a-~b");
assert_eq!(for_xml_comment("--"), "-~");
assert_eq!(for_xml_comment("---"), "-~~");
assert_eq!(for_xml_comment("----"), "-~-~");
assert_eq!(for_xml_comment("a--b--c"), "a-~b-~c");
}
#[test]
fn comment_trailing_hyphen() {
assert_eq!(for_xml_comment("trailing-"), "trailing~");
assert_eq!(for_xml_comment("-"), "~");
}
#[test]
fn comment_replaces_invalid_xml() {
assert_eq!(for_xml_comment("a\x01b"), "a b");
assert_eq!(for_xml_comment("a\x7Fb"), "a b");
}
#[test]
fn comment_preserves_non_ascii() {
assert_eq!(for_xml_comment("café"), "café");
}
#[test]
fn comment_writer_variant() {
let mut out = String::new();
write_xml_comment(&mut out, "a--b").unwrap();
assert_eq!(out, "a-~b");
}
#[test]
fn cdata_passthrough() {
assert_eq!(for_cdata("safe text"), "safe text");
assert_eq!(for_cdata(""), "");
}
#[test]
fn cdata_splits_closing_delimiter() {
assert_eq!(for_cdata("a]]>b"), "a]]]]><![CDATA[>b");
}
#[test]
fn cdata_double_split() {
assert_eq!(for_cdata("a]]>b]]>c"), "a]]]]><![CDATA[>b]]]]><![CDATA[>c");
}
#[test]
fn cdata_brackets_without_gt() {
assert_eq!(for_cdata("]]"), "]]");
assert_eq!(for_cdata("]"), "]");
assert_eq!(for_cdata("]]a"), "]]a");
}
#[test]
fn cdata_extra_brackets() {
assert_eq!(for_cdata("]]]>"), "]]]]]><![CDATA[>");
}
#[test]
fn cdata_replaces_invalid_xml() {
assert_eq!(for_cdata("a\x01b"), "a b");
}
#[test]
fn cdata_single_bracket_gt() {
assert_eq!(for_cdata("]>"), "]>");
}
#[test]
fn cdata_writer_variant() {
let mut out = String::new();
write_cdata(&mut out, "a]]>b").unwrap();
assert_eq!(out, "a]]]]><![CDATA[>b");
}
#[test]
fn xml11_encodes_entities() {
assert_eq!(for_xml11("<&>\"'"), "<&>"'");
}
#[test]
fn xml11_controls_as_references() {
assert_eq!(for_xml11("a\x01b"), "ab");
assert_eq!(for_xml11("a\x08b"), "ab");
assert_eq!(for_xml11("a\x0Bb"), "ab");
assert_eq!(for_xml11("a\x1Fb"), "ab");
}
#[test]
fn xml11_nel_passes_through() {
assert_eq!(for_xml11("a\u{0085}b"), "a\u{0085}b");
}
#[test]
fn xml11_del_and_c1_as_references() {
assert_eq!(for_xml11("a\x7Fb"), "ab");
assert_eq!(for_xml11("a\u{0080}b"), "a€b");
assert_eq!(for_xml11("a\u{009F}b"), "aŸb");
}
#[test]
fn xml11_nul_replaced_with_space() {
assert_eq!(for_xml11("a\x00b"), "a b");
}
#[test]
fn xml11_nonchars_replaced_with_space() {
assert_eq!(for_xml11("a\u{FDD0}b"), "a b");
}
#[test]
fn xml11_preserves_tab_lf_cr() {
assert_eq!(for_xml11("a\tb\nc\rd"), "a\tb\nc\rd");
}
#[test]
fn xml11_content_no_quotes() {
assert_eq!(for_xml11_content(r#"a"b'c"#), r#"a"b'c"#);
assert_eq!(for_xml11_content("a\x01b"), "ab");
}
#[test]
fn xml11_attribute_no_gt() {
assert_eq!(for_xml11_attribute("a>b"), "a>b");
assert_eq!(for_xml11_attribute("a\x01b"), "ab");
}
}