use contextual_encoder::*;
mod html {
use super::*;
#[test]
fn script_tag_injection() {
assert_eq!(
for_html("<script>alert('xss')</script>"),
"<script>alert('xss')</script>"
);
}
#[test]
fn img_tag_injection() {
assert_eq!(
for_html(r#"<img src=x onerror="alert(1)">"#),
"<img src=x onerror="alert(1)">"
);
}
#[test]
fn ampersand_in_various_positions() {
assert_eq!(for_html("&"), "&");
assert_eq!(for_html("&&"), "&&");
assert_eq!(for_html("a&b&c"), "a&b&c");
assert_eq!(for_html("&"), "&amp;");
}
#[test]
fn already_encoded_input() {
assert_eq!(for_html("<"), "&lt;");
assert_eq!(for_html("""), "&#34;");
}
#[test]
fn mixed_safe_and_unsafe() {
assert_eq!(
for_html("Hello, <world> & \"friends\"!"),
"Hello, <world> & "friends"!"
);
}
#[test]
fn cjk_characters() {
assert_eq!(for_html("日本語テスト"), "日本語テスト");
}
#[test]
fn emoji() {
assert_eq!(for_html("hello 😀 world"), "hello 😀 world");
}
#[test]
fn mixed_unicode_and_html() {
assert_eq!(for_html("<café>"), "<café>");
}
#[test]
fn supplementary_plane_characters() {
assert_eq!(for_html("\u{10000}"), "\u{10000}");
assert_eq!(for_html("😀"), "😀");
}
#[test]
fn null_byte() {
assert_eq!(for_html("\x00"), " ");
}
#[test]
fn all_c0_controls() {
for cp in 0x00u8..=0x08 {
let s = String::from(char::from(cp));
assert_eq!(for_html(&s), " ", "C0 control 0x{:02x}", cp);
}
assert_eq!(for_html("\x09"), "\x09");
assert_eq!(for_html("\x0A"), "\x0A");
assert_eq!(for_html("\x0D"), "\x0D");
assert_eq!(for_html("\x0B"), " ");
assert_eq!(for_html("\x0C"), " ");
for cp in 0x0Eu8..=0x1F {
let s = String::from(char::from(cp));
assert_eq!(for_html(&s), " ", "C0 control 0x{:02x}", cp);
}
}
#[test]
fn del_replaced() {
assert_eq!(for_html("\x7F"), " ");
}
#[test]
fn c1_controls_replaced() {
for cp in 0x80u32..=0x9F {
let c = char::from_u32(cp).unwrap();
let s = String::from(c);
assert_eq!(for_html(&s), " ", "C1 control U+{:04X}", cp);
}
}
#[test]
fn noncharacters_replaced() {
assert_eq!(for_html("\u{FDD0}"), " ");
assert_eq!(for_html("\u{FDEF}"), " ");
assert_eq!(for_html("\u{FFFE}"), " ");
assert_eq!(for_html("\u{FFFF}"), " ");
assert_eq!(for_html("\u{1FFFE}"), " ");
assert_eq!(for_html("\u{10FFFF}"), " ");
}
#[test]
fn empty_string() {
assert_eq!(for_html(""), "");
}
#[test]
fn single_safe_char() {
assert_eq!(for_html("a"), "a");
}
#[test]
fn single_unsafe_char() {
assert_eq!(for_html("<"), "<");
}
#[test]
fn long_safe_string() {
let s = "a".repeat(10000);
assert_eq!(for_html(&s), s);
}
#[test]
fn all_unsafe_string() {
assert_eq!(for_html("<>&\"'"), "<>&"'");
}
#[test]
fn content_vs_attribute_gt_handling() {
assert_eq!(for_html_content("a>b"), "a>b");
assert_eq!(for_html_attribute("a>b"), "a>b");
assert_eq!(for_html("a>b"), "a>b");
}
#[test]
fn content_vs_attribute_quote_handling() {
assert_eq!(for_html_content(r#"a"b"#), r#"a"b"#);
assert_eq!(for_html_content("a'b"), "a'b");
assert_eq!(for_html_attribute(r#"a"b"#), "a"b");
assert_eq!(for_html_attribute("a'b"), "a'b");
}
#[test]
fn unquoted_attr_comprehensive() {
assert_eq!(
for_html_unquoted_attribute("\t\n\x0C\r "),
"	   "
);
assert_eq!(
for_html_unquoted_attribute("&<>\"'/=`"),
"&<>"'/=`"
);
}
}
mod javascript {
use super::*;
#[test]
fn xss_in_string_literal() {
assert_eq!(for_javascript("';alert(1);//"), r"\x27;alert(1);\/\/");
}
#[test]
fn script_block_breakout() {
assert_eq!(
for_javascript("</script><script>alert(1)</script>"),
r"<\/script><script>alert(1)<\/script>"
);
}
#[test]
fn all_named_escapes() {
assert_eq!(for_javascript("\x08"), r"\b");
assert_eq!(for_javascript("\t"), r"\t");
assert_eq!(for_javascript("\n"), r"\n");
assert_eq!(for_javascript("\x0C"), r"\f");
assert_eq!(for_javascript("\r"), r"\r");
}
#[test]
fn hex_escapes_for_c0_controls() {
assert_eq!(for_javascript("\x00"), r"\x00");
assert_eq!(for_javascript("\x01"), r"\x01");
assert_eq!(for_javascript("\x07"), r"\x07");
assert_eq!(for_javascript("\x0B"), r"\x0b");
assert_eq!(for_javascript("\x0E"), r"\x0e");
assert_eq!(for_javascript("\x1F"), r"\x1f");
}
#[test]
fn unicode_line_terminators() {
assert_eq!(for_javascript("\u{2028}"), r"\u2028");
assert_eq!(for_javascript("\u{2029}"), r"\u2029");
assert_eq!(for_javascript("a\u{2028}b\u{2029}c"), r"a\u2028b\u2029c");
}
#[test]
fn backslash_escaping() {
assert_eq!(for_javascript(r"\"), r"\\");
assert_eq!(for_javascript(r"\\"), r"\\\\");
assert_eq!(for_javascript(r"\n"), r"\\n");
}
#[test]
fn preserves_non_ascii() {
assert_eq!(for_javascript("café"), "café");
assert_eq!(for_javascript("日本語"), "日本語");
assert_eq!(for_javascript("😀"), "😀");
}
#[test]
fn backtick_not_encoded() {
assert_eq!(for_javascript("`template`"), "`template`");
}
#[test]
fn universal_vs_attribute_slash() {
assert_eq!(for_javascript("a/b"), r"a\/b");
assert_eq!(for_javascript_attribute("a/b"), "a/b");
}
#[test]
fn universal_vs_block_quotes() {
assert_eq!(for_javascript(r#"a"b"#), r"a\x22b");
assert_eq!(for_javascript_block(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_javascript("a'b"), r"a\x27b");
assert_eq!(for_javascript_block("a'b"), r"a\'b");
}
#[test]
fn source_minimal_encoding() {
assert_eq!(for_javascript_source("a/b&c"), "a/b&c");
assert_eq!(for_javascript_source("\n"), r"\n");
assert_eq!(for_javascript_source("\u{2028}"), r"\u2028");
}
#[test]
fn block_vs_source_ampersand() {
assert_eq!(for_javascript_block("a&b"), r"a\x26b");
assert_eq!(for_javascript_source("a&b"), "a&b");
}
#[test]
fn empty_string() {
assert_eq!(for_javascript(""), "");
}
#[test]
fn already_escaped_input() {
assert_eq!(for_javascript(r"\n"), r"\\n");
assert_eq!(for_javascript(r"\x22"), r"\\x22");
}
}
mod js_template {
use super::*;
#[test]
fn interpolation_breakout() {
assert_eq!(for_js_template("${alert(1)}"), r"\${alert(1)}");
}
#[test]
fn multiple_interpolations() {
assert_eq!(for_js_template("${a} and ${b}"), r"\${a} and \${b}");
}
#[test]
fn dollar_without_brace_passes_through() {
assert_eq!(for_js_template("$100"), "$100");
assert_eq!(for_js_template("a $ b"), "a $ b");
assert_eq!(for_js_template("a$"), "a$");
}
#[test]
fn nested_interpolation() {
assert_eq!(for_js_template("${${x}}"), r"\${\${x}}");
}
#[test]
fn backtick_escaped() {
assert_eq!(for_js_template("`"), r"\`");
assert_eq!(for_js_template("hello `world`"), r"hello \`world\`");
}
#[test]
fn tagged_template_breakout() {
assert_eq!(for_js_template("`+evil`"), r"\`+evil\`");
}
#[test]
fn script_block_breakout() {
assert_eq!(
for_js_template("</script><script>alert(1)</script>"),
r"<\/script><script>alert(1)<\/script>"
);
}
#[test]
fn backslash_escaped() {
assert_eq!(for_js_template(r"\"), r"\\");
assert_eq!(for_js_template(r"\\"), r"\\\\");
assert_eq!(for_js_template("\\`"), "\\\\\\`");
}
#[test]
fn all_named_escapes() {
assert_eq!(for_js_template("\x08"), r"\b");
assert_eq!(for_js_template("\t"), r"\t");
assert_eq!(for_js_template("\n"), r"\n");
assert_eq!(for_js_template("\x0C"), r"\f");
assert_eq!(for_js_template("\r"), r"\r");
}
#[test]
fn hex_escapes_for_c0_controls() {
assert_eq!(for_js_template("\x00"), r"\x00");
assert_eq!(for_js_template("\x01"), r"\x01");
assert_eq!(for_js_template("\x07"), r"\x07");
assert_eq!(for_js_template("\x0B"), r"\x0b");
assert_eq!(for_js_template("\x0E"), r"\x0e");
assert_eq!(for_js_template("\x1F"), r"\x1f");
}
#[test]
fn unicode_line_terminators() {
assert_eq!(for_js_template("\u{2028}"), r"\u2028");
assert_eq!(for_js_template("\u{2029}"), r"\u2029");
assert_eq!(for_js_template("a\u{2028}b\u{2029}c"), r"a\u2028b\u2029c");
}
#[test]
fn quotes_pass_through() {
assert_eq!(for_js_template(r#"a"b"#), r#"a"b"#);
assert_eq!(for_js_template("a'b"), "a'b");
assert_eq!(for_js_template(r#"say "it's" fine"#), r#"say "it's" fine"#);
}
#[test]
fn preserves_non_ascii() {
assert_eq!(for_js_template("café"), "café");
assert_eq!(for_js_template("日本語"), "日本語");
}
#[test]
fn emoji() {
assert_eq!(for_js_template("😀"), "😀");
assert_eq!(for_js_template("hello 😀 world"), "hello 😀 world");
}
#[test]
fn supplementary_plane_characters() {
assert_eq!(for_js_template("\u{10000}"), "\u{10000}");
assert_eq!(for_js_template("\u{1F600}"), "\u{1F600}");
}
#[test]
fn empty_string() {
assert_eq!(for_js_template(""), "");
}
#[test]
fn single_safe_char() {
assert_eq!(for_js_template("a"), "a");
}
#[test]
fn long_safe_string() {
let s = "a".repeat(10000);
assert_eq!(for_js_template(&s), s);
}
#[test]
fn already_escaped_input() {
assert_eq!(for_js_template(r"\n"), r"\\n");
assert_eq!(for_js_template(r"\`"), r"\\\`");
assert_eq!(for_js_template(r"\${x}"), r"\\\${x}");
}
#[test]
fn template_vs_string_quotes() {
assert_eq!(for_javascript(r#"a"b"#), r"a\x22b");
assert_eq!(for_js_template(r#"a"b"#), r#"a"b"#);
assert_eq!(for_javascript("a'b"), r"a\x27b");
assert_eq!(for_js_template("a'b"), "a'b");
}
#[test]
fn template_vs_string_backtick() {
assert_eq!(for_javascript("`"), "`");
assert_eq!(for_js_template("`"), r"\`");
}
#[test]
fn template_vs_string_interpolation() {
assert_eq!(for_javascript("${x}"), "${x}");
assert_eq!(for_js_template("${x}"), r"\${x}");
}
#[test]
fn mixed_xss_payload() {
assert_eq!(
for_js_template("`Hello ${name}`, welcome\\n"),
r"\`Hello \${name}\`, welcome\\n"
);
}
#[test]
fn writer_matches_string() {
let input = "`Hello ${name}` \\ </script> \t\n café 😀";
let string_result = for_js_template(input);
let mut writer_result = String::new();
write_js_template(&mut writer_result, input).unwrap();
assert_eq!(string_result, writer_result);
}
#[test]
fn writer_matches_string_all_categories() {
let input = "abc`${x}\\\t\n\r\x0C\x08\x00\x1F/\u{2028}\u{2029}café😀";
let string_result = for_js_template(input);
let mut writer_result = String::new();
write_js_template(&mut writer_result, input).unwrap();
assert_eq!(string_result, writer_result);
}
}
mod css {
use super::*;
#[test]
fn basic_encoding() {
assert_eq!(for_css_string("hello"), "hello");
assert_eq!(for_css_string(""), "");
}
#[test]
fn hex_escape_format() {
assert_eq!(for_css_string("\x00"), r"\0");
assert_eq!(for_css_string("\x01"), r"\1");
assert_eq!(for_css_string("\""), r"\22");
assert_eq!(for_css_string("'"), r"\27");
}
#[test]
fn trailing_space_before_hex_digit() {
assert_eq!(for_css_string("\"a"), r"\22 a"); assert_eq!(for_css_string("\"0"), r"\22 0"); assert_eq!(for_css_string("\"f"), r"\22 f"); assert_eq!(for_css_string("\"F"), r"\22 F"); assert_eq!(for_css_string("\"g"), r"\22g"); assert_eq!(for_css_string("\"z"), r"\22z");
assert_eq!(for_css_string("\"!"), r"\22!");
}
#[test]
fn trailing_space_before_whitespace() {
assert_eq!(for_css_string("\" "), r"\22 "); assert_eq!(for_css_string("\"\t"), r"\22 \9"); assert_eq!(for_css_string("\"\n"), r"\22 \a"); }
#[test]
fn no_trailing_space_at_end() {
assert_eq!(for_css_string("\""), r"\22");
assert_eq!(for_css_string("'"), r"\27");
}
#[test]
fn consecutive_encoded_chars() {
assert_eq!(for_css_string("\"'"), r"\22\27");
assert_eq!(for_css_string("\\\""), r"\5c\22");
}
#[test]
fn css_string_xss_payload() {
assert_eq!(
for_css_string("expression(alert(1))"),
r"expression\28 alert\28 1\29\29"
);
}
#[test]
fn noncharacters() {
assert_eq!(for_css_string("\u{FDD0}"), "_");
assert_eq!(for_css_string("\u{FFFE}"), "_");
assert_eq!(for_css_string("\u{FFFF}"), "_");
}
#[test]
fn preserves_non_ascii() {
assert_eq!(for_css_string("café"), "café");
assert_eq!(for_css_string("日本語"), "日本語");
}
#[test]
fn c1_controls_encoded() {
assert_eq!(for_css_string("\u{0080}"), r"\80");
assert_eq!(for_css_string("\u{009F}"), r"\9f");
assert_eq!(for_css_string("\u{0085}"), r"\85");
}
#[test]
fn c1_controls_full_range() {
for cp in 0x80u32..=0x9F {
let c = char::from_u32(cp).unwrap();
let input = String::from(c);
let encoded = for_css_string(&input);
assert!(
encoded.starts_with('\\'),
"U+{cp:04X} should be CSS-encoded, got {encoded:?}"
);
}
}
#[test]
fn c1_controls_trailing_space_rules() {
assert_eq!(for_css_string("\u{0085}a"), r"\85 a");
assert_eq!(for_css_string("\u{0085}F"), r"\85 F");
assert_eq!(for_css_string("\u{0085}z"), r"\85z");
assert_eq!(for_css_string("\u{0085}!"), r"\85!");
}
#[test]
fn c1_controls_in_url_context() {
assert_eq!(for_css_url("\u{0080}"), r"\80");
assert_eq!(for_css_url("\u{0085}"), r"\85");
assert_eq!(for_css_url("\u{009F}"), r"\9f");
}
#[test]
fn non_ascii_above_c1_preserved() {
assert_eq!(for_css_string("\u{00A0}"), "\u{00A0}");
assert_eq!(for_css_string("café"), "café");
}
#[test]
fn url_does_not_encode_parens() {
assert_eq!(for_css_url("a(b)c"), "a(b)c");
assert_eq!(for_css_string("a(b)c"), r"a\28 b\29 c");
}
#[test]
fn url_encodes_everything_else() {
assert_eq!(for_css_url("\""), r"\22");
assert_eq!(for_css_url("'"), r"\27");
assert_eq!(for_css_url("\\"), r"\5c");
assert_eq!(for_css_url("<"), r"\3c");
}
}
mod uri {
use super::*;
#[test]
fn unreserved_chars_pass_through() {
let unreserved = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~";
assert_eq!(for_uri_component(unreserved), unreserved);
}
#[test]
fn reserved_chars_encoded() {
assert_eq!(for_uri_component(":"), "%3A");
assert_eq!(for_uri_component("/"), "%2F");
assert_eq!(for_uri_component("?"), "%3F");
assert_eq!(for_uri_component("#"), "%23");
assert_eq!(for_uri_component("["), "%5B");
assert_eq!(for_uri_component("]"), "%5D");
assert_eq!(for_uri_component("@"), "%40");
assert_eq!(for_uri_component("!"), "%21");
assert_eq!(for_uri_component("$"), "%24");
assert_eq!(for_uri_component("&"), "%26");
assert_eq!(for_uri_component("'"), "%27");
assert_eq!(for_uri_component("("), "%28");
assert_eq!(for_uri_component(")"), "%29");
assert_eq!(for_uri_component("*"), "%2A");
assert_eq!(for_uri_component("+"), "%2B");
assert_eq!(for_uri_component(","), "%2C");
assert_eq!(for_uri_component(";"), "%3B");
assert_eq!(for_uri_component("="), "%3D");
}
#[test]
fn space_encoded() {
assert_eq!(for_uri_component(" "), "%20");
}
#[test]
fn html_significant_chars_encoded() {
assert_eq!(for_uri_component("<"), "%3C");
assert_eq!(for_uri_component(">"), "%3E");
assert_eq!(for_uri_component("\""), "%22");
}
#[test]
fn two_byte_utf8() {
assert_eq!(for_uri_component("\u{00A0}"), "%C2%A0");
assert_eq!(for_uri_component("é"), "%C3%A9");
assert_eq!(for_uri_component("\u{07FF}"), "%DF%BF");
}
#[test]
fn three_byte_utf8() {
assert_eq!(for_uri_component("\u{0800}"), "%E0%A0%80");
assert_eq!(for_uri_component("世"), "%E4%B8%96");
assert_eq!(for_uri_component("\u{FFFD}"), "%EF%BF%BD");
}
#[test]
fn four_byte_utf8() {
assert_eq!(for_uri_component("\u{10000}"), "%F0%90%80%80");
assert_eq!(for_uri_component("😀"), "%F0%9F%98%80");
}
#[test]
fn control_chars() {
assert_eq!(for_uri_component("\x00"), "%00");
assert_eq!(for_uri_component("\x01"), "%01");
assert_eq!(for_uri_component("\x1F"), "%1F");
assert_eq!(for_uri_component("\x7F"), "%7F");
}
#[test]
fn query_parameter_encoding() {
assert_eq!(
for_uri_component("search term with spaces"),
"search%20term%20with%20spaces"
);
}
#[test]
fn full_query_value() {
assert_eq!(
for_uri_component("key=value&other=more"),
"key%3Dvalue%26other%3Dmore"
);
}
#[test]
fn unicode_path_segment() {
assert_eq!(
for_uri_component("ファイル"),
"%E3%83%95%E3%82%A1%E3%82%A4%E3%83%AB"
);
}
#[test]
fn empty_string() {
assert_eq!(for_uri_component(""), "");
}
#[test]
fn single_unreserved() {
assert_eq!(for_uri_component("a"), "a");
}
#[test]
fn all_percent_encoded() {
assert_eq!(for_uri_component(" "), "%20%20%20");
}
}
mod xml {
use super::*;
#[test]
fn aliases_identical_to_html() {
let input = r#"<root attr="val">& 'x' </root>"#;
assert_eq!(for_xml(input), for_html(input));
assert_eq!(for_xml_content(input), for_html_content(input));
assert_eq!(for_xml_attribute(input), for_html_attribute(input));
}
#[test]
fn xml_writer_matches_string() {
let input = r#"<test attr="val">&'</test>"#;
let mut w = String::new();
write_xml(&mut w, input).unwrap();
assert_eq!(for_xml(input), w);
let mut w = String::new();
write_xml_content(&mut w, input).unwrap();
assert_eq!(for_xml_content(input), w);
let mut w = String::new();
write_xml_attribute(&mut w, input).unwrap();
assert_eq!(for_xml_attribute(input), w);
}
#[test]
fn comment_safe_passthrough() {
assert_eq!(for_xml_comment("safe comment text"), "safe comment text");
assert_eq!(for_xml_comment(""), "");
}
#[test]
fn comment_neutralizes_double_hyphen() {
assert_eq!(for_xml_comment("a--b"), "a-~b");
assert_eq!(for_xml_comment("a---b"), "a-~-b");
assert_eq!(for_xml_comment("----"), "-~-~");
}
#[test]
fn comment_trailing_hyphen() {
assert_eq!(for_xml_comment("abc-"), "abc~");
assert_eq!(for_xml_comment("-"), "~");
assert_eq!(for_xml_comment("--"), "-~");
}
#[test]
fn comment_invalid_xml_chars_replaced() {
assert_eq!(for_xml_comment("a\x00b"), "a b");
assert_eq!(for_xml_comment("a\x01b"), "a b");
assert_eq!(for_xml_comment("a\x7Fb"), "a b");
assert_eq!(for_xml_comment("a\u{0080}b"), "a b");
assert_eq!(for_xml_comment("a\u{FDD0}b"), "a b");
}
#[test]
fn comment_preserves_allowed_chars() {
assert_eq!(for_xml_comment("café 日本語"), "café 日本語");
assert_eq!(for_xml_comment("a\tb\nc\rd"), "a\tb\nc\rd");
}
#[test]
fn comment_combined_edge_cases() {
assert_eq!(for_xml_comment("-\x00-"), "- ~");
}
#[test]
fn comment_writer_matches_string() {
let input = "test--comment-";
let mut w = String::new();
write_xml_comment(&mut w, input).unwrap();
assert_eq!(for_xml_comment(input), w);
}
#[test]
fn cdata_safe_passthrough() {
assert_eq!(for_cdata("safe text"), "safe text");
assert_eq!(for_cdata(""), "");
assert_eq!(for_cdata("<b>bold</b>"), "<b>bold</b>");
}
#[test]
fn cdata_splits_closing_delimiter() {
assert_eq!(for_cdata("a]]>b"), "a]]]]><![CDATA[>b");
}
#[test]
fn cdata_multiple_splits() {
assert_eq!(for_cdata("x]]>y]]>z"), "x]]]]><![CDATA[>y]]]]><![CDATA[>z");
}
#[test]
fn cdata_brackets_without_gt() {
assert_eq!(for_cdata("]]"), "]]");
assert_eq!(for_cdata("]]]"), "]]]");
assert_eq!(for_cdata("]>"), "]>");
}
#[test]
fn cdata_extra_brackets_before_gt() {
assert_eq!(for_cdata("]]]>"), "]]]]]><![CDATA[>");
assert_eq!(for_cdata("]]]]>"), "]]]]]]><![CDATA[>");
}
#[test]
fn cdata_at_start() {
assert_eq!(for_cdata("]]>rest"), "]]]]><![CDATA[>rest");
}
#[test]
fn cdata_at_end() {
assert_eq!(for_cdata("start]]>"), "start]]]]><![CDATA[>");
}
#[test]
fn cdata_invalid_xml_replaced() {
assert_eq!(for_cdata("a\x00b"), "a b");
assert_eq!(for_cdata("a\x01b"), "a b");
assert_eq!(for_cdata("a\u{FDD0}b"), "a b");
}
#[test]
fn cdata_writer_matches_string() {
let input = "x]]>y\x00z]]";
let mut w = String::new();
write_cdata(&mut w, input).unwrap();
assert_eq!(for_cdata(input), w);
}
#[test]
fn xml11_entities() {
assert_eq!(for_xml11("<&>\"'"), "<&>"'");
}
#[test]
fn xml11_controls_as_char_references() {
assert_eq!(for_xml11("a\x01b"), "ab");
assert_eq!(for_xml11("a\x08b"), "ab");
assert_eq!(for_xml11("a\x0Bb"), "ab");
assert_eq!(for_xml11("a\x0Cb"), "ab");
assert_eq!(for_xml11("a\x1Fb"), "ab");
}
#[test]
fn xml11_preserves_tab_lf_cr() {
assert_eq!(for_xml11("a\tb\nc\rd"), "a\tb\nc\rd");
}
#[test]
fn xml11_nel_passes_through() {
assert_eq!(for_xml11("a\u{0085}b"), "a\u{0085}b");
}
#[test]
fn xml11_del_and_c1_as_references() {
assert_eq!(for_xml11("a\x7Fb"), "ab");
assert_eq!(for_xml11("a\u{0080}b"), "a€b");
assert_eq!(for_xml11("a\u{0084}b"), "a„b");
assert_eq!(for_xml11("a\u{0086}b"), "a†b");
assert_eq!(for_xml11("a\u{009F}b"), "aŸb");
}
#[test]
fn xml11_nul_replaced_with_space() {
assert_eq!(for_xml11("a\x00b"), "a b");
}
#[test]
fn xml11_nonchars_replaced_with_space() {
assert_eq!(for_xml11("a\u{FDD0}b"), "a b");
assert_eq!(for_xml11("a\u{FFFE}b"), "a b");
}
#[test]
fn xml11_content_no_quotes() {
assert_eq!(for_xml11_content(r#"a"b'c"#), r#"a"b'c"#);
assert_eq!(for_xml11_content("a\x01b"), "ab");
}
#[test]
fn xml11_attribute_no_gt() {
assert_eq!(for_xml11_attribute("a>b"), "a>b");
assert_eq!(for_xml11_attribute(r#"a"b"#), "a"b");
assert_eq!(for_xml11_attribute("a\x01b"), "ab");
}
#[test]
fn xml11_writer_matches_string() {
let input = "test\x01\x7F<>&\u{0085}";
let mut w = String::new();
write_xml11(&mut w, input).unwrap();
assert_eq!(for_xml11(input), w);
let mut w = String::new();
write_xml11_content(&mut w, input).unwrap();
assert_eq!(for_xml11_content(input), w);
let mut w = String::new();
write_xml11_attribute(&mut w, input).unwrap();
assert_eq!(for_xml11_attribute(input), w);
}
}
mod java {
use super::*;
#[test]
fn passthrough() {
assert_eq!(for_java("hello world"), "hello world");
assert_eq!(for_java(""), "");
assert_eq!(for_java("café 日本語"), "café 日本語");
}
#[test]
fn named_escapes() {
assert_eq!(for_java("\x08"), "\\b");
assert_eq!(for_java("\t"), "\\t");
assert_eq!(for_java("\n"), "\\n");
assert_eq!(for_java("\x0C"), "\\f");
assert_eq!(for_java("\r"), "\\r");
}
#[test]
fn quotes_and_backslash() {
assert_eq!(for_java(r#"say "hi""#), r#"say \"hi\""#);
assert_eq!(for_java("it's"), r"it\'s");
assert_eq!(for_java(r"back\slash"), r"back\\slash");
}
#[test]
fn octal_escapes_shortest() {
assert_eq!(for_java("\x00a"), "\\0a");
assert_eq!(for_java("\x01a"), "\\1a");
assert_eq!(for_java("\x07a"), "\\7a");
}
#[test]
fn octal_escapes_three_digit_disambiguation() {
assert_eq!(for_java("\x000"), "\\0000");
assert_eq!(for_java("\x007"), "\\0007");
assert_eq!(for_java("\x015"), "\\0015");
}
#[test]
fn octal_at_end_shortest() {
assert_eq!(for_java("\x00"), "\\0");
assert_eq!(for_java("\x07"), "\\7");
assert_eq!(for_java("\x7F"), "\\177");
}
#[test]
fn del_octal() {
assert_eq!(for_java("a\x7Fb"), "a\\177b");
}
#[test]
fn line_separators() {
assert_eq!(for_java("\u{2028}"), "\\u2028");
assert_eq!(for_java("\u{2029}"), "\\u2029");
}
#[test]
fn supplementary_plane() {
assert_eq!(for_java("\u{1F600}"), "\\ud83d\\ude00");
assert_eq!(for_java("\u{10000}"), "\\ud800\\udc00");
}
#[test]
fn noncharacters() {
assert_eq!(for_java("\u{FDD0}"), " ");
assert_eq!(for_java("\u{FFFE}"), " ");
}
#[test]
fn mixed_xss_payload() {
assert_eq!(
for_java("<script>alert(\"xss\")</script>"),
"<script>alert(\\\"xss\\\")</script>"
);
}
#[test]
fn all_c0_controls() {
assert_eq!(for_java("\x08"), "\\b");
assert_eq!(for_java("\x09"), "\\t");
assert_eq!(for_java("\x0A"), "\\n");
assert_eq!(for_java("\x0C"), "\\f");
assert_eq!(for_java("\x0D"), "\\r");
let octal_controls: &[(u8, &str)] = &[
(0x00, "\\0"),
(0x01, "\\1"),
(0x02, "\\2"),
(0x03, "\\3"),
(0x04, "\\4"),
(0x05, "\\5"),
(0x06, "\\6"),
(0x07, "\\7"),
(0x0B, "\\13"),
(0x0E, "\\16"),
(0x0F, "\\17"),
(0x10, "\\20"),
(0x11, "\\21"),
(0x12, "\\22"),
(0x13, "\\23"),
(0x14, "\\24"),
(0x15, "\\25"),
(0x16, "\\26"),
(0x17, "\\27"),
(0x18, "\\30"),
(0x19, "\\31"),
(0x1A, "\\32"),
(0x1B, "\\33"),
(0x1C, "\\34"),
(0x1D, "\\35"),
(0x1E, "\\36"),
(0x1F, "\\37"),
];
for &(byte, expected) in octal_controls {
let s = String::from(char::from(byte));
assert_eq!(for_java(&s), expected, "C0 control 0x{:02x}", byte);
}
assert_eq!(for_java("\x7F"), "\\177");
}
#[test]
fn surrogate_pair_boundaries() {
assert_eq!(for_java("\u{10000}"), "\\ud800\\udc00");
assert_eq!(for_java("\u{10001}"), "\\ud800\\udc01");
assert_eq!(for_java("\u{103FF}"), "\\ud800\\udfff");
assert_eq!(for_java("\u{10400}"), "\\ud801\\udc00");
assert_eq!(for_java("\u{1F600}"), "\\ud83d\\ude00");
assert_eq!(for_java("\u{20000}"), "\\ud840\\udc00");
assert_eq!(for_java("\u{10FFFD}"), "\\udbff\\udffd");
}
#[test]
fn surrogate_pairs_in_mixed_input() {
assert_eq!(for_java("a\u{1F600}b"), "a\\ud83d\\ude00b");
assert_eq!(
for_java("\u{1F600}\u{1F601}"),
"\\ud83d\\ude00\\ud83d\\ude01"
);
assert_eq!(for_java("\n\u{1F600}"), "\\n\\ud83d\\ude00");
}
#[test]
fn octal_before_non_octal_digits() {
assert_eq!(for_java("\x008"), "\\08");
assert_eq!(for_java("\x009"), "\\09");
assert_eq!(for_java("\x018"), "\\18");
assert_eq!(for_java("\x019"), "\\19");
}
#[test]
fn octal_disambiguation_all_octal_followers() {
for d in '0'..='7' {
let input = format!("\x00{d}");
let expected = format!("\\000{d}");
assert_eq!(for_java(&input), expected, "NUL before '{d}'");
}
}
#[test]
fn octal_disambiguation_multi_digit_values() {
assert_eq!(for_java("\x0B0"), "\\0130");
assert_eq!(for_java("\x0B7"), "\\0137");
assert_eq!(for_java("\x0B8"), "\\138");
assert_eq!(for_java("\x0Ba"), "\\13a");
assert_eq!(for_java("\x1F0"), "\\0370");
assert_eq!(for_java("\x1F9"), "\\379");
assert_eq!(for_java("\x7F0"), "\\1770");
assert_eq!(for_java("\x7Fa"), "\\177a");
}
#[test]
fn writer_matches_string() {
let input = "test\x00\"\\\u{1F600}\u{2028}";
let mut w = String::new();
write_java(&mut w, input).unwrap();
assert_eq!(for_java(input), w);
}
#[test]
fn writer_matches_string_all_categories() {
let input = "abc\x08\t\n\x0C\r\"\'\\\x00\x0B\x7F\u{2028}\u{2029}\u{1F600}\u{FDD0}\x000";
let string_result = for_java(input);
let mut writer_result = String::new();
write_java(&mut writer_result, input).unwrap();
assert_eq!(string_result, writer_result);
}
}
mod go_literals {
use super::*;
#[test]
fn string_passthrough() {
assert_eq!(for_go_string("hello world"), "hello world");
assert_eq!(for_go_string(""), "");
assert_eq!(
for_go_string("caf\u{00e9} \u{65E5}\u{672C}\u{8A9E} \u{1F600}"),
"caf\u{00e9} \u{65E5}\u{672C}\u{8A9E} \u{1F600}"
);
}
#[test]
fn string_escapes_double_quote_not_single() {
assert_eq!(for_go_string(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_go_string("a'b"), "a'b");
}
#[test]
fn string_all_named_escapes() {
assert_eq!(for_go_string("\x07"), "\\a");
assert_eq!(for_go_string("\x08"), "\\b");
assert_eq!(for_go_string("\t"), "\\t");
assert_eq!(for_go_string("\n"), "\\n");
assert_eq!(for_go_string("\x0B"), "\\v");
assert_eq!(for_go_string("\x0C"), "\\f");
assert_eq!(for_go_string("\r"), "\\r");
}
#[test]
fn string_hex_for_controls() {
assert_eq!(for_go_string("\x00"), "\\x00");
assert_eq!(for_go_string("\x01"), "\\x01");
assert_eq!(for_go_string("\x06"), "\\x06");
assert_eq!(for_go_string("\x0E"), "\\x0e");
assert_eq!(for_go_string("\x1F"), "\\x1f");
assert_eq!(for_go_string("\x7F"), "\\x7f");
}
#[test]
fn string_backslash() {
assert_eq!(for_go_string(r"a\b"), r"a\\b");
}
#[test]
fn string_nonchars_replaced() {
assert_eq!(for_go_string("\u{FDD0}"), " ");
assert_eq!(for_go_string("\u{FFFE}"), " ");
}
#[test]
fn string_supplementary_plane_passes_through() {
assert_eq!(for_go_string("\u{1F600}"), "\u{1F600}");
assert_eq!(for_go_string("\u{10000}"), "\u{10000}");
}
#[test]
fn string_writer_matches() {
let input = "test\x00\"\\\ncaf\u{00e9}\u{1F600}";
let mut w = String::new();
write_go_string(&mut w, input).unwrap();
assert_eq!(for_go_string(input), w);
}
#[test]
fn char_passthrough() {
assert_eq!(for_go_char("hello world"), "hello world");
assert_eq!(for_go_char("caf\u{00e9}"), "caf\u{00e9}");
}
#[test]
fn char_escapes_single_quote_not_double() {
assert_eq!(for_go_char("a'b"), r"a\'b");
assert_eq!(for_go_char(r#"a"b"#), r#"a"b"#);
}
#[test]
fn char_all_named_escapes() {
assert_eq!(for_go_char("\x07"), "\\a");
assert_eq!(for_go_char("\x08"), "\\b");
assert_eq!(for_go_char("\t"), "\\t");
assert_eq!(for_go_char("\n"), "\\n");
assert_eq!(for_go_char("\x0B"), "\\v");
assert_eq!(for_go_char("\x0C"), "\\f");
assert_eq!(for_go_char("\r"), "\\r");
}
#[test]
fn char_hex_for_controls() {
assert_eq!(for_go_char("\x01"), "\\x01");
assert_eq!(for_go_char("\x7F"), "\\x7f");
}
#[test]
fn char_nonchars_replaced() {
assert_eq!(for_go_char("\u{FDD0}"), " ");
}
#[test]
fn char_writer_matches() {
let input = "test\x00'\\\ncaf\u{00e9}";
let mut w = String::new();
write_go_char(&mut w, input).unwrap();
assert_eq!(for_go_char(input), w);
}
#[test]
fn byte_string_ascii_passthrough() {
assert_eq!(for_go_byte_string("hello world"), "hello world");
assert_eq!(for_go_byte_string(""), "");
}
#[test]
fn byte_string_escapes_double_quote_not_single() {
assert_eq!(for_go_byte_string(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_go_byte_string("a'b"), "a'b");
}
#[test]
fn byte_string_all_named_escapes() {
assert_eq!(for_go_byte_string("\x07"), "\\a");
assert_eq!(for_go_byte_string("\x08"), "\\b");
assert_eq!(for_go_byte_string("\t"), "\\t");
assert_eq!(for_go_byte_string("\n"), "\\n");
assert_eq!(for_go_byte_string("\x0B"), "\\v");
assert_eq!(for_go_byte_string("\x0C"), "\\f");
assert_eq!(for_go_byte_string("\r"), "\\r");
}
#[test]
fn byte_string_hex_for_controls() {
assert_eq!(for_go_byte_string("\x00"), "\\x00");
assert_eq!(for_go_byte_string("\x01"), "\\x01");
assert_eq!(for_go_byte_string("\x7F"), "\\x7f");
}
#[test]
fn byte_string_non_ascii_as_utf8_bytes() {
assert_eq!(for_go_byte_string("caf\u{00e9}"), r"caf\xc3\xa9");
assert_eq!(for_go_byte_string("\u{65E5}"), r"\xe6\x97\xa5");
assert_eq!(for_go_byte_string("\u{1F600}"), r"\xf0\x9f\x98\x80");
}
#[test]
fn byte_string_nonchars_as_bytes() {
assert_eq!(for_go_byte_string("\u{FDD0}"), r"\xef\xb7\x90");
}
#[test]
fn byte_string_vs_string_non_ascii() {
assert_eq!(for_go_string("\u{00e9}"), "\u{00e9}");
assert_eq!(for_go_byte_string("\u{00e9}"), r"\xc3\xa9");
}
#[test]
fn byte_string_writer_matches() {
let input = "test\x00\"\\caf\u{00e9}\u{1F600}";
let mut w = String::new();
write_go_byte_string(&mut w, input).unwrap();
assert_eq!(for_go_byte_string(input), w);
}
#[test]
fn go_vs_java_supplementary_plane() {
assert_eq!(for_go_string("\u{1F600}"), "\u{1F600}");
assert_eq!(for_java("\u{1F600}"), "\\ud83d\\ude00");
}
#[test]
fn go_has_alert_and_vtab() {
assert_eq!(for_go_string("\x07"), "\\a");
assert_eq!(for_go_string("\x0B"), "\\v");
assert_eq!(for_java("\x07a"), "\\7a");
assert_eq!(for_java("\x0Ba"), "\\13a");
}
}
mod rust_literals {
use super::*;
#[test]
fn string_passthrough() {
assert_eq!(for_rust_string("hello world"), "hello world");
assert_eq!(for_rust_string(""), "");
assert_eq!(for_rust_string("café 日本語 😀"), "café 日本語 😀");
}
#[test]
fn string_escapes_double_quote_not_single() {
assert_eq!(for_rust_string(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_rust_string("a'b"), "a'b");
}
#[test]
fn string_named_escapes() {
assert_eq!(for_rust_string("\0"), "\\0");
assert_eq!(for_rust_string("\t"), "\\t");
assert_eq!(for_rust_string("\n"), "\\n");
assert_eq!(for_rust_string("\r"), "\\r");
}
#[test]
fn string_hex_for_controls() {
assert_eq!(for_rust_string("\x01"), "\\x01");
assert_eq!(for_rust_string("\x08"), "\\x08");
assert_eq!(for_rust_string("\x0B"), "\\x0b");
assert_eq!(for_rust_string("\x0C"), "\\x0c");
assert_eq!(for_rust_string("\x1F"), "\\x1f");
assert_eq!(for_rust_string("\x7F"), "\\x7f");
}
#[test]
fn string_backslash() {
assert_eq!(for_rust_string(r"a\b"), r"a\\b");
}
#[test]
fn string_nonchars_replaced() {
assert_eq!(for_rust_string("\u{FDD0}"), " ");
assert_eq!(for_rust_string("\u{FFFE}"), " ");
}
#[test]
fn string_supplementary_plane_passes_through() {
assert_eq!(for_rust_string("😀"), "😀");
assert_eq!(for_rust_string("\u{10000}"), "\u{10000}");
}
#[test]
fn string_writer_matches() {
let input = "test\0\"\\\ncafé\u{1F600}";
let mut w = String::new();
write_rust_string(&mut w, input).unwrap();
assert_eq!(for_rust_string(input), w);
}
#[test]
fn char_passthrough() {
assert_eq!(for_rust_char("hello world"), "hello world");
assert_eq!(for_rust_char("café"), "café");
}
#[test]
fn char_escapes_single_quote_not_double() {
assert_eq!(for_rust_char("a'b"), r"a\'b");
assert_eq!(for_rust_char(r#"a"b"#), r#"a"b"#);
}
#[test]
fn char_named_escapes() {
assert_eq!(for_rust_char("\0"), "\\0");
assert_eq!(for_rust_char("\t"), "\\t");
assert_eq!(for_rust_char("\n"), "\\n");
assert_eq!(for_rust_char("\r"), "\\r");
}
#[test]
fn char_hex_for_controls() {
assert_eq!(for_rust_char("\x01"), "\\x01");
assert_eq!(for_rust_char("\x7F"), "\\x7f");
}
#[test]
fn char_nonchars_replaced() {
assert_eq!(for_rust_char("\u{FDD0}"), " ");
}
#[test]
fn char_writer_matches() {
let input = "test\0'\\\ncafé";
let mut w = String::new();
write_rust_char(&mut w, input).unwrap();
assert_eq!(for_rust_char(input), w);
}
#[test]
fn byte_string_ascii_passthrough() {
assert_eq!(for_rust_byte_string("hello world"), "hello world");
assert_eq!(for_rust_byte_string(""), "");
}
#[test]
fn byte_string_escapes_double_quote_not_single() {
assert_eq!(for_rust_byte_string(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_rust_byte_string("a'b"), "a'b");
}
#[test]
fn byte_string_named_escapes() {
assert_eq!(for_rust_byte_string("\0"), "\\0");
assert_eq!(for_rust_byte_string("\t"), "\\t");
assert_eq!(for_rust_byte_string("\n"), "\\n");
assert_eq!(for_rust_byte_string("\r"), "\\r");
}
#[test]
fn byte_string_hex_for_controls() {
assert_eq!(for_rust_byte_string("\x01"), "\\x01");
assert_eq!(for_rust_byte_string("\x7F"), "\\x7f");
}
#[test]
fn byte_string_non_ascii_as_utf8_bytes() {
assert_eq!(for_rust_byte_string("café"), r"caf\xc3\xa9");
assert_eq!(for_rust_byte_string("日"), r"\xe6\x97\xa5");
assert_eq!(for_rust_byte_string("😀"), r"\xf0\x9f\x98\x80");
}
#[test]
fn byte_string_nonchars_as_bytes() {
assert_eq!(for_rust_byte_string("\u{FDD0}"), r"\xef\xb7\x90");
}
#[test]
fn byte_string_vs_string_non_ascii() {
assert_eq!(for_rust_string("é"), "é");
assert_eq!(for_rust_byte_string("é"), r"\xc3\xa9");
}
#[test]
fn byte_string_writer_matches() {
let input = "test\0\"\\café😀";
let mut w = String::new();
write_rust_byte_string(&mut w, input).unwrap();
assert_eq!(for_rust_byte_string(input), w);
}
}
mod python_literals {
use super::*;
#[test]
fn string_passthrough() {
assert_eq!(for_python_string("hello world"), "hello world");
assert_eq!(for_python_string(""), "");
assert_eq!(
for_python_string("caf\u{00e9} \u{65E5}\u{672C}\u{8A9E} \u{1F600}"),
"caf\u{00e9} \u{65E5}\u{672C}\u{8A9E} \u{1F600}"
);
}
#[test]
fn string_escapes_both_quotes() {
assert_eq!(for_python_string(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_python_string("a'b"), r"a\'b");
assert_eq!(for_python_string(r#"say "it's""#), r#"say \"it\'s\""#);
}
#[test]
fn string_all_named_escapes() {
assert_eq!(for_python_string("\x07"), "\\a");
assert_eq!(for_python_string("\x08"), "\\b");
assert_eq!(for_python_string("\t"), "\\t");
assert_eq!(for_python_string("\n"), "\\n");
assert_eq!(for_python_string("\x0B"), "\\v");
assert_eq!(for_python_string("\x0C"), "\\f");
assert_eq!(for_python_string("\r"), "\\r");
}
#[test]
fn string_hex_for_controls() {
assert_eq!(for_python_string("\x00"), "\\x00");
assert_eq!(for_python_string("\x01"), "\\x01");
assert_eq!(for_python_string("\x06"), "\\x06");
assert_eq!(for_python_string("\x0E"), "\\x0e");
assert_eq!(for_python_string("\x1F"), "\\x1f");
assert_eq!(for_python_string("\x7F"), "\\x7f");
}
#[test]
fn string_backslash() {
assert_eq!(for_python_string(r"a\b"), r"a\\b");
}
#[test]
fn string_nonchars_replaced() {
assert_eq!(for_python_string("\u{FDD0}"), " ");
assert_eq!(for_python_string("\u{FFFE}"), " ");
}
#[test]
fn string_supplementary_plane_passes_through() {
assert_eq!(for_python_string("\u{1F600}"), "\u{1F600}");
assert_eq!(for_python_string("\u{10000}"), "\u{10000}");
}
#[test]
fn string_xss_payload() {
assert_eq!(
for_python_string("<script>alert(\"xss\")</script>"),
"<script>alert(\\\"xss\\\")</script>"
);
}
#[test]
fn string_writer_matches() {
let input = "test\x00\"'\\\ncaf\u{00e9}\u{1F600}";
let mut w = String::new();
write_python_string(&mut w, input).unwrap();
assert_eq!(for_python_string(input), w);
}
#[test]
fn bytes_ascii_passthrough() {
assert_eq!(for_python_bytes("hello world"), "hello world");
assert_eq!(for_python_bytes(""), "");
}
#[test]
fn bytes_escapes_both_quotes() {
assert_eq!(for_python_bytes(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_python_bytes("a'b"), r"a\'b");
}
#[test]
fn bytes_all_named_escapes() {
assert_eq!(for_python_bytes("\x07"), "\\a");
assert_eq!(for_python_bytes("\x08"), "\\b");
assert_eq!(for_python_bytes("\t"), "\\t");
assert_eq!(for_python_bytes("\n"), "\\n");
assert_eq!(for_python_bytes("\x0B"), "\\v");
assert_eq!(for_python_bytes("\x0C"), "\\f");
assert_eq!(for_python_bytes("\r"), "\\r");
}
#[test]
fn bytes_hex_for_controls() {
assert_eq!(for_python_bytes("\x00"), "\\x00");
assert_eq!(for_python_bytes("\x01"), "\\x01");
assert_eq!(for_python_bytes("\x7F"), "\\x7f");
}
#[test]
fn bytes_non_ascii_as_utf8_bytes() {
assert_eq!(for_python_bytes("caf\u{00e9}"), r"caf\xc3\xa9");
assert_eq!(for_python_bytes("\u{65E5}"), r"\xe6\x97\xa5");
assert_eq!(for_python_bytes("\u{1F600}"), r"\xf0\x9f\x98\x80");
}
#[test]
fn bytes_nonchars_as_bytes() {
assert_eq!(for_python_bytes("\u{FDD0}"), r"\xef\xb7\x90");
}
#[test]
fn bytes_vs_string_non_ascii() {
assert_eq!(for_python_string("\u{00e9}"), "\u{00e9}");
assert_eq!(for_python_bytes("\u{00e9}"), r"\xc3\xa9");
}
#[test]
fn bytes_writer_matches() {
let input = "test\x00\"'\\caf\u{00e9}\u{1F600}";
let mut w = String::new();
write_python_bytes(&mut w, input).unwrap();
assert_eq!(for_python_bytes(input), w);
}
#[test]
fn raw_passthrough() {
assert_eq!(for_python_raw_string("hello world"), "hello world");
assert_eq!(for_python_raw_string(""), "");
}
#[test]
fn raw_quotes_replaced_with_space() {
assert_eq!(for_python_raw_string(r#"a"b"#), "a b");
assert_eq!(for_python_raw_string("a'b"), "a b");
assert_eq!(
for_python_raw_string(r#"it "won't" work"#),
"it won t work"
);
}
#[test]
fn raw_controls_replaced_with_space() {
assert_eq!(for_python_raw_string("a\x00b"), "a b");
assert_eq!(for_python_raw_string("a\tb"), "a b");
assert_eq!(for_python_raw_string("a\nb"), "a b");
assert_eq!(for_python_raw_string("a\x7Fb"), "a b");
}
#[test]
fn raw_backslashes_in_middle_pass_through() {
assert_eq!(for_python_raw_string(r"a\b\c"), r"a\b\c");
assert_eq!(for_python_raw_string(r"C:\Users\test"), r"C:\Users\test");
}
#[test]
fn raw_trailing_even_backslashes_ok() {
assert_eq!(for_python_raw_string(r"ab\\"), r"ab\\");
}
#[test]
fn raw_trailing_odd_backslash_replaced() {
assert_eq!(for_python_raw_string(r"ab\"), "ab ");
assert_eq!(for_python_raw_string(r"ab\\\"), "ab\\\\ ");
}
#[test]
fn raw_just_backslash() {
assert_eq!(for_python_raw_string(r"\"), " ");
}
#[test]
fn raw_nonchars_replaced() {
assert_eq!(for_python_raw_string("\u{FDD0}"), " ");
assert_eq!(for_python_raw_string("\u{FFFE}"), " ");
}
#[test]
fn raw_non_ascii_passes_through() {
assert_eq!(for_python_raw_string("café"), "café");
assert_eq!(for_python_raw_string("日本語"), "日本語");
assert_eq!(for_python_raw_string("😀"), "😀");
}
#[test]
fn raw_regex_pattern() {
assert_eq!(for_python_raw_string(r"\d+\.\d+"), r"\d+\.\d+");
}
#[test]
fn raw_writer_matches() {
let input = "test\x00path\\to\\file";
let mut w = String::new();
write_python_raw_string(&mut w, input).unwrap();
assert_eq!(for_python_raw_string(input), w);
}
#[test]
fn python_vs_java_quote_handling() {
assert_eq!(for_python_string("a'b"), r"a\'b");
assert_eq!(for_java("a'b"), r"a\'b");
}
#[test]
fn python_vs_go_supplementary_plane() {
assert_eq!(for_python_string("\u{1F600}"), "\u{1F600}");
assert_eq!(for_go_string("\u{1F600}"), "\u{1F600}");
assert_eq!(for_java("\u{1F600}"), "\\ud83d\\ude00");
}
#[test]
fn python_has_alert_and_vtab() {
assert_eq!(for_python_string("\x07"), "\\a");
assert_eq!(for_python_string("\x0B"), "\\v");
assert_eq!(for_go_string("\x07"), "\\a");
assert_eq!(for_go_string("\x0B"), "\\v");
}
#[test]
fn python_string_vs_bytes_vs_raw() {
let input = "café\n";
assert_eq!(for_python_string(input), "café\\n");
assert_eq!(for_python_bytes(input), "caf\\xc3\\xa9\\n");
assert_eq!(for_python_raw_string(input), "café ");
}
}
mod ruby_literals {
use super::*;
#[test]
fn string_passthrough() {
assert_eq!(for_ruby_string("hello world"), "hello world");
assert_eq!(for_ruby_string(""), "");
assert_eq!(
for_ruby_string("caf\u{00e9} \u{65E5}\u{672C}\u{8A9E} \u{1F600}"),
"caf\u{00e9} \u{65E5}\u{672C}\u{8A9E} \u{1F600}"
);
}
#[test]
fn string_escapes_double_quote_not_single() {
assert_eq!(for_ruby_string(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_ruby_string("a'b"), "a'b");
}
#[test]
fn string_all_named_escapes() {
assert_eq!(for_ruby_string("\x07"), "\\a");
assert_eq!(for_ruby_string("\x08"), "\\b");
assert_eq!(for_ruby_string("\t"), "\\t");
assert_eq!(for_ruby_string("\n"), "\\n");
assert_eq!(for_ruby_string("\x0B"), "\\v");
assert_eq!(for_ruby_string("\x0C"), "\\f");
assert_eq!(for_ruby_string("\r"), "\\r");
assert_eq!(for_ruby_string("\x1B"), "\\e");
}
#[test]
fn string_hex_for_controls() {
assert_eq!(for_ruby_string("\x00"), "\\x00");
assert_eq!(for_ruby_string("\x01"), "\\x01");
assert_eq!(for_ruby_string("\x06"), "\\x06");
assert_eq!(for_ruby_string("\x0E"), "\\x0e");
assert_eq!(for_ruby_string("\x1F"), "\\x1f");
assert_eq!(for_ruby_string("\x7F"), "\\x7f");
}
#[test]
fn string_backslash() {
assert_eq!(for_ruby_string(r"a\b"), r"a\\b");
}
#[test]
fn string_hash_interpolation_prevention() {
assert_eq!(for_ruby_string("hello #{name}"), r"hello \#{name}");
assert_eq!(for_ruby_string("#$PATH"), r"\#$PATH");
assert_eq!(for_ruby_string("#@name"), r"\#@name");
assert_eq!(for_ruby_string("color #ff0000"), r"color \#ff0000");
}
#[test]
fn string_nonchars_replaced() {
assert_eq!(for_ruby_string("\u{FDD0}"), " ");
assert_eq!(for_ruby_string("\u{FFFE}"), " ");
}
#[test]
fn string_supplementary_plane_passes_through() {
assert_eq!(for_ruby_string("\u{1F600}"), "\u{1F600}");
assert_eq!(for_ruby_string("\u{10000}"), "\u{10000}");
}
#[test]
fn string_xss_payload() {
assert_eq!(
for_ruby_string("<script>alert(\"xss\")</script>"),
"<script>alert(\\\"xss\\\")</script>"
);
}
#[test]
fn string_writer_matches() {
let input = "test\x00\"\\\n#{}caf\u{00e9}\u{1F600}\x1B";
let mut w = String::new();
write_ruby_string(&mut w, input).unwrap();
assert_eq!(for_ruby_string(input), w);
}
#[test]
fn ruby_vs_python_quote_handling() {
assert_eq!(for_ruby_string("a'b"), "a'b");
assert_eq!(for_python_string("a'b"), r"a\'b");
assert_eq!(for_ruby_string(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_python_string(r#"a"b"#), r#"a\"b"#);
}
#[test]
fn ruby_vs_go_esc_handling() {
assert_eq!(for_ruby_string("\x1B"), "\\e");
assert_eq!(for_go_string("\x1B"), "\\x1b");
}
#[test]
fn ruby_hash_escape_unique() {
assert_eq!(for_ruby_string("#"), "\\#");
assert_eq!(for_python_string("#"), "#");
assert_eq!(for_go_string("#"), "#");
assert_eq!(for_rust_string("#"), "#");
}
}
mod json {
use super::*;
#[test]
fn passthrough() {
assert_eq!(for_json("hello world"), "hello world");
assert_eq!(for_json(""), "");
assert_eq!(for_json("café 日本語 😀"), "café 日本語 😀");
}
#[test]
fn double_quotes_escaped() {
assert_eq!(for_json(r#"say "hi""#), r#"say \"hi\""#);
}
#[test]
fn single_quotes_not_escaped() {
assert_eq!(for_json("it's"), "it's");
assert_eq!(for_json("'quoted'"), "'quoted'");
}
#[test]
fn backslash() {
assert_eq!(for_json(r"back\slash"), r"back\\slash");
}
#[test]
fn named_escapes() {
assert_eq!(for_json("\x08"), "\\b");
assert_eq!(for_json("\t"), "\\t");
assert_eq!(for_json("\n"), "\\n");
assert_eq!(for_json("\x0C"), "\\f");
assert_eq!(for_json("\r"), "\\r");
}
#[test]
fn c0_controls_use_unicode_not_hex() {
assert_eq!(for_json("\x00"), "\\u0000");
assert_eq!(for_json("\x01"), "\\u0001");
assert_eq!(for_json("\x07"), "\\u0007");
assert_eq!(for_json("\x0B"), "\\u000b");
assert_eq!(for_json("\x0E"), "\\u000e");
assert_eq!(for_json("\x1F"), "\\u001f");
}
#[test]
fn line_separators_mandatory() {
assert_eq!(for_json("\u{2028}"), "\\u2028");
assert_eq!(for_json("\u{2029}"), "\\u2029");
assert_eq!(for_json("a\u{2028}b\u{2029}c"), "a\\u2028b\\u2029c");
}
#[test]
fn forward_slash_escaped() {
assert_eq!(for_json("/"), "\\/");
assert_eq!(for_json("a/b"), "a\\/b");
assert_eq!(for_json("</script>"), "<\\/script>");
assert_eq!(for_json("https://example.com"), "https:\\/\\/example.com");
}
#[test]
fn script_tag_breakout_prevented() {
assert_eq!(
for_json("</script><script>alert(1)//"),
"<\\/script><script>alert(1)\\/\\/"
);
}
#[test]
fn ampersand_not_escaped() {
assert_eq!(for_json("a&b"), "a&b");
}
#[test]
fn supplementary_plane_passes_through() {
assert_eq!(for_json("😀"), "😀");
assert_eq!(for_json("\u{10000}"), "\u{10000}");
}
#[test]
fn json_vs_js_source_single_quotes() {
assert_eq!(for_json("it's"), "it's");
assert_eq!(for_javascript_source("it's"), r"it\'s");
}
#[test]
fn json_vs_js_source_control_format() {
assert_eq!(for_json("\x01"), "\\u0001");
assert_eq!(for_javascript_source("\x01"), "\\x01");
}
#[test]
fn json_vs_js_source_common_escapes() {
assert_eq!(for_json("\n"), for_javascript_source("\n"));
assert_eq!(for_json("\t"), for_javascript_source("\t"));
assert_eq!(for_json("\\"), for_javascript_source("\\"));
assert_eq!(for_json("\u{2028}"), for_javascript_source("\u{2028}"));
}
#[test]
fn json_vs_js_source_slash() {
assert_eq!(for_json("/"), "\\/");
assert_eq!(for_javascript_source("/"), "/");
}
#[test]
fn already_escaped_input() {
assert_eq!(for_json(r"\n"), r"\\n");
assert_eq!(for_json(r#"\""#), r#"\\\""#);
}
#[test]
fn writer_matches_string() {
let input = "test\x00\"\\\n\u{2028}'café";
let mut w = String::new();
write_json(&mut w, input).unwrap();
assert_eq!(for_json(input), w);
}
}
mod sql {
use super::*;
#[test]
fn sql_single_quote_doubled() {
assert_eq!(for_sql("it's"), "it''s");
assert_eq!(for_sql("'quoted'"), "''quoted''");
assert_eq!(for_sql("a''b"), "a''''b");
assert_eq!(for_sql("'''"), "''''''");
}
#[test]
fn sql_injection_classic() {
assert_eq!(
for_sql("'; DROP TABLE users; --"),
"''; DROP TABLE users; --"
);
}
#[test]
fn sql_injection_stacked() {
assert_eq!(for_sql("' OR '1'='1"), "'' OR ''1''=''1");
}
#[test]
fn sql_nul_removed() {
assert_eq!(for_sql("before\x00after"), "beforeafter");
assert_eq!(for_sql("\x00"), "");
assert_eq!(for_sql("\x00\x00\x00"), "");
assert_eq!(for_sql("a\x00b\x00c"), "abc");
}
#[test]
fn sql_backslash_passes_through() {
assert_eq!(for_sql(r"\"), r"\");
assert_eq!(for_sql(r"\\"), r"\\");
assert_eq!(for_sql(r"\'"), r"\''");
}
#[test]
fn sql_double_quote_passes_through() {
assert_eq!(for_sql(r#"a"b"#), r#"a"b"#);
assert_eq!(for_sql(r#""""#), r#""""#);
}
#[test]
fn sql_control_chars_pass_through() {
assert_eq!(for_sql("\t"), "\t");
assert_eq!(for_sql("\n"), "\n");
assert_eq!(for_sql("\r"), "\r");
assert_eq!(for_sql("\x08"), "\x08");
assert_eq!(for_sql("\x1A"), "\x1A");
assert_eq!(for_sql("\x01"), "\x01");
assert_eq!(for_sql("\x7F"), "\x7F");
}
#[test]
fn sql_nonchars_replaced() {
assert_eq!(for_sql("\u{FDD0}"), " ");
assert_eq!(for_sql("\u{FDEF}"), " ");
assert_eq!(for_sql("\u{FFFE}"), " ");
assert_eq!(for_sql("\u{FFFF}"), " ");
assert_eq!(for_sql("\u{1FFFE}"), " ");
assert_eq!(for_sql("\u{10FFFF}"), " ");
}
#[test]
fn sql_unicode_passthrough() {
assert_eq!(for_sql("café"), "café");
assert_eq!(for_sql("日本語"), "日本語");
assert_eq!(for_sql("😀"), "😀");
assert_eq!(for_sql("\u{10000}"), "\u{10000}");
}
#[test]
fn sql_mixed_unicode_and_quotes() {
assert_eq!(for_sql("café's best"), "café''s best");
assert_eq!(for_sql("日本語'テスト"), "日本語''テスト");
}
#[test]
fn sql_empty_string() {
assert_eq!(for_sql(""), "");
}
#[test]
fn sql_single_safe_char() {
assert_eq!(for_sql("a"), "a");
}
#[test]
fn sql_single_quote_only() {
assert_eq!(for_sql("'"), "''");
}
#[test]
fn sql_long_safe_string() {
let s = "a".repeat(10000);
assert_eq!(for_sql(&s), s);
}
#[test]
fn sql_writer_matches_string() {
let input = "test\x00'escape'' café\u{FDD0}";
let mut w = String::new();
write_sql(&mut w, input).unwrap();
assert_eq!(for_sql(input), w);
}
#[test]
fn backslash_single_quote_escaped() {
assert_eq!(for_sql_backslash("it's"), r"it\'s");
assert_eq!(for_sql_backslash("'quoted'"), r"\'quoted\'");
assert_eq!(for_sql_backslash("'''"), r"\'\'\'");
}
#[test]
fn backslash_escapes_backslash() {
assert_eq!(for_sql_backslash(r"\"), r"\\");
assert_eq!(for_sql_backslash(r"\\"), r"\\\\");
assert_eq!(for_sql_backslash(r"a\b"), r"a\\b");
}
#[test]
fn backslash_all_named_escapes() {
assert_eq!(for_sql_backslash("\x00"), r"\0");
assert_eq!(for_sql_backslash("\x08"), r"\b");
assert_eq!(for_sql_backslash("\t"), r"\t");
assert_eq!(for_sql_backslash("\n"), r"\n");
assert_eq!(for_sql_backslash("\r"), r"\r");
assert_eq!(for_sql_backslash("\x1A"), r"\Z");
}
#[test]
fn backslash_injection_classic() {
assert_eq!(
for_sql_backslash("'; DROP TABLE users; --"),
r"\'; DROP TABLE users; --"
);
}
#[test]
fn backslash_injection_via_backslash() {
assert_eq!(for_sql_backslash("\\'"), r"\\\'");
}
#[test]
fn backslash_injection_stacked() {
assert_eq!(for_sql_backslash("' OR '1'='1"), r"\' OR \'1\'=\'1");
}
#[test]
fn backslash_nul_encoded_not_removed() {
assert_eq!(for_sql_backslash("a\x00b"), r"a\0b");
assert_eq!(for_sql_backslash("\x00"), r"\0");
}
#[test]
fn backslash_double_quote_passes_through() {
assert_eq!(for_sql_backslash(r#"a"b"#), r#"a"b"#);
}
#[test]
fn backslash_other_controls_pass_through() {
assert_eq!(for_sql_backslash("\x01"), "\x01");
assert_eq!(for_sql_backslash("\x07"), "\x07");
assert_eq!(for_sql_backslash("\x0B"), "\x0B");
assert_eq!(for_sql_backslash("\x0C"), "\x0C");
assert_eq!(for_sql_backslash("\x1F"), "\x1F");
assert_eq!(for_sql_backslash("\x7F"), "\x7F");
}
#[test]
fn backslash_nonchars_replaced() {
assert_eq!(for_sql_backslash("\u{FDD0}"), " ");
assert_eq!(for_sql_backslash("\u{FDEF}"), " ");
assert_eq!(for_sql_backslash("\u{FFFE}"), " ");
assert_eq!(for_sql_backslash("\u{FFFF}"), " ");
assert_eq!(for_sql_backslash("\u{1FFFE}"), " ");
assert_eq!(for_sql_backslash("\u{10FFFF}"), " ");
}
#[test]
fn backslash_unicode_passthrough() {
assert_eq!(for_sql_backslash("café"), "café");
assert_eq!(for_sql_backslash("日本語"), "日本語");
assert_eq!(for_sql_backslash("😀"), "😀");
}
#[test]
fn backslash_empty_string() {
assert_eq!(for_sql_backslash(""), "");
}
#[test]
fn backslash_long_safe_string() {
let s = "a".repeat(10000);
assert_eq!(for_sql_backslash(&s), s);
}
#[test]
fn backslash_writer_matches_string() {
let input = "test\x00\x08\t\n\r\x1A'\\café\u{FDD0}";
let mut w = String::new();
write_sql_backslash(&mut w, input).unwrap();
assert_eq!(for_sql_backslash(input), w);
}
#[test]
fn dialect_quote_escaping_differs() {
assert_eq!(for_sql("'"), "''");
assert_eq!(for_sql_backslash("'"), r"\'");
}
#[test]
fn dialect_backslash_handling_differs() {
assert_eq!(for_sql(r"\"), r"\");
assert_eq!(for_sql_backslash(r"\"), r"\\");
}
#[test]
fn dialect_nul_handling_differs() {
assert_eq!(for_sql("\x00"), "");
assert_eq!(for_sql_backslash("\x00"), r"\0");
}
#[test]
fn dialect_control_handling_differs() {
assert_eq!(for_sql("\t"), "\t");
assert_eq!(for_sql_backslash("\t"), r"\t");
assert_eq!(for_sql("\n"), "\n");
assert_eq!(for_sql_backslash("\n"), r"\n");
assert_eq!(for_sql("\r"), "\r");
assert_eq!(for_sql_backslash("\r"), r"\r");
}
#[test]
fn dialect_nonchars_same() {
assert_eq!(for_sql("\u{FDD0}"), " ");
assert_eq!(for_sql_backslash("\u{FDD0}"), " ");
}
#[test]
fn dialect_safe_input_same() {
let safe = "SELECT name FROM users WHERE id = 42";
assert_eq!(for_sql(safe), safe);
assert_eq!(for_sql_backslash(safe), safe);
}
}
mod cross_context {
use super::*;
#[test]
fn same_input_different_contexts() {
let input = r#"<img src="x" onerror="alert('xss')">"#;
let html = for_html(input);
let js = for_javascript(input);
let css = for_css_string(input);
let uri = for_uri_component(input);
let sql = for_sql(input);
let sql_bs = for_sql_backslash(input);
assert_ne!(html, js);
assert_ne!(js, css);
assert_ne!(css, uri);
assert!(html.contains("<"));
assert!(js.contains("\\x22"));
assert!(css.contains("\\3c"));
assert!(uri.contains("%3C"));
assert!(sql.contains("''"));
assert!(sql_bs.contains("\\'"));
}
#[test]
fn writer_matches_string() {
let input = r#"test <b>"bold"</b> & 'italic' café 日本語"#;
let mut html_w = String::new();
write_html(&mut html_w, input).unwrap();
assert_eq!(for_html(input), html_w);
let mut js_w = String::new();
write_javascript(&mut js_w, input).unwrap();
assert_eq!(for_javascript(input), js_w);
let mut css_w = String::new();
write_css_string(&mut css_w, input).unwrap();
assert_eq!(for_css_string(input), css_w);
let mut uri_w = String::new();
write_uri_component(&mut uri_w, input).unwrap();
assert_eq!(for_uri_component(input), uri_w);
let mut sql_w = String::new();
write_sql(&mut sql_w, input).unwrap();
assert_eq!(for_sql(input), sql_w);
let mut sql_bs_w = String::new();
write_sql_backslash(&mut sql_bs_w, input).unwrap();
assert_eq!(for_sql_backslash(input), sql_bs_w);
}
#[test]
fn safe_string_unchanged_in_all_contexts() {
let safe = "hello world 123";
assert_eq!(for_html(safe), safe);
assert_eq!(for_javascript(safe), safe);
assert_eq!(for_css_string(safe), safe);
assert_eq!(for_sql(safe), safe);
assert_eq!(for_sql_backslash(safe), safe);
assert_ne!(for_uri_component(safe), safe);
}
}