use parking_lot::RwLock;
use regex::Regex;
use serde_json::Value;
use std::collections::HashSet;
use std::fmt::Write as _;
use std::sync::LazyLock;
use url::form_urlencoded::{Serializer, parse};
static SAFE_STRINGS: LazyLock<RwLock<HashSet<String>>> =
LazyLock::new(|| RwLock::new(HashSet::new()));
static SPACES_BETWEEN_TAGS_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r">\s+<").expect("valid strip-spaces regex"));
const URL_SAFE: &[u8] = b"!$&'()*+,;=:/?#[]@-._~";
const TAG_SCAN_LIMIT: usize = 50;
fn mark_safe(value: impl Into<String>) -> String {
let value = value.into();
SAFE_STRINGS.write().insert(value.clone());
value
}
fn is_marked_safe(value: &str) -> bool {
SAFE_STRINGS.read().contains(value)
}
fn normalize_newlines(value: &str) -> String {
value.replace("\r\n", "\n").replace('\r', "\n")
}
fn escape_impl(value: &str) -> String {
let mut escaped = String::with_capacity(value.len());
for ch in value.chars() {
match ch {
'&' => escaped.push_str("&"),
'<' => escaped.push_str("<"),
'>' => escaped.push_str(">"),
'\"' => escaped.push_str("""),
'\'' => escaped.push_str("'"),
_ => escaped.push(ch),
}
}
escaped
}
fn percent_decode_lossy(value: &str) -> String {
let mut bytes = Vec::with_capacity(value.len());
let raw = value.as_bytes();
let mut index = 0;
while index < raw.len() {
if raw[index] == b'%' && index + 2 < raw.len() {
let hi = (raw[index + 1] as char).to_digit(16);
let lo = (raw[index + 2] as char).to_digit(16);
if let (Some(hi), Some(lo)) = (hi, lo) {
bytes.push(((hi << 4) | lo) as u8);
index += 3;
continue;
}
}
bytes.push(raw[index]);
index += 1;
}
String::from_utf8_lossy(&bytes).into_owned()
}
fn is_url_safe_byte(byte: u8) -> bool {
byte.is_ascii_alphanumeric() || URL_SAFE.contains(&byte)
}
fn percent_encode_with_safe(value: &str, safe: fn(u8) -> bool) -> String {
let mut encoded = String::with_capacity(value.len());
for byte in value.as_bytes() {
if safe(*byte) {
encoded.push(*byte as char);
} else {
encoded.push('%');
encoded.push_str(&format!("{:02X}", byte));
}
}
encoded
}
fn percent_encode_mail_component(value: &str) -> String {
percent_encode_with_safe(value, |byte| {
byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'.' | b'_' | b'~')
})
}
fn html_unescape_minimal(value: &str) -> String {
let mut result = String::with_capacity(value.len());
let mut index = 0;
while index < value.len() {
let tail = &value[index..];
let (decoded, consumed) = if tail.starts_with("&") {
('&', 5)
} else if tail.starts_with("<") {
('<', 4)
} else if tail.starts_with("<") {
('<', 3)
} else if tail.starts_with(">") {
('>', 4)
} else if tail.starts_with(">") {
('>', 3)
} else if tail.starts_with(""") {
('\"', 6)
} else if tail.starts_with("'") {
('\'', 6)
} else if tail.starts_with("'") {
('\'', 5)
} else if let Some(ch) = tail.chars().next() {
result.push(ch);
index += ch.len_utf8();
continue;
} else {
break;
};
result.push(decoded);
index += consumed;
}
result
}
fn trim_url_text(value: &str, limit: Option<usize>) -> String {
match limit {
None => value.to_owned(),
Some(limit) => {
let char_count = value.chars().count();
if char_count <= limit {
value.to_owned()
} else {
let take = limit.saturating_sub(1);
let trimmed: String = value.chars().take(take).collect();
format!("{trimmed}…")
}
}
}
}
fn count_char(value: &str, needle: char) -> usize {
value.chars().filter(|ch| *ch == needle).count()
}
fn trim_punctuation(word: &str) -> (String, String, String) {
let mut lead = String::new();
let mut middle = word.to_owned();
while let Some(first) = middle.chars().next() {
if matches!(first, '(' | '[') {
lead.push(first);
middle = middle[first.len_utf8()..].to_owned();
} else {
break;
}
}
let mut trail_parts = Vec::new();
loop {
let Some(last) = middle.chars().last() else {
break;
};
let should_trim = match last {
'.' | ',' | ':' | ';' | '!' => true,
')' => count_char(&middle, '(') < count_char(&middle, ')'),
']' => count_char(&middle, '[') < count_char(&middle, ']'),
_ => false,
};
if should_trim {
let new_len = middle.len() - last.len_utf8();
trail_parts.push(last.to_string());
middle.truncate(new_len);
} else {
break;
}
}
trail_parts.reverse();
(lead, middle, trail_parts.concat())
}
fn looks_like_simple_url(value: &str) -> bool {
let lower = value.to_ascii_lowercase();
(lower.starts_with("http://") || lower.starts_with("https://"))
&& value
.split_once("://")
.and_then(|(_, rest)| rest.chars().next())
.is_some_and(|ch| ch.is_alphanumeric() || matches!(ch, '[' | '%'))
}
fn looks_like_bare_domain(value: &str) -> bool {
if value.contains('@') || value.contains(' ') || value.contains('<') || value.contains('>') {
return false;
}
let candidate = value.split(['/', '?', '#']).next().unwrap_or(value);
if candidate.is_empty() {
return false;
}
if candidate.starts_with('.') || candidate.ends_with('.') {
return false;
}
let mut labels = candidate.split('.');
let Some(first) = labels.next() else {
return false;
};
let Some(last) = candidate.rsplit('.').next() else {
return false;
};
if first.is_empty() || last.len() < 2 {
return false;
}
if !candidate.contains('.') {
return false;
}
candidate.chars().all(|ch| {
ch.is_alphanumeric() || matches!(ch, '.' | '-' | '_' | '%' | '~') || !ch.is_ascii()
})
}
fn looks_like_email(value: &str) -> bool {
if value.contains(':') || value.contains(' ') {
return false;
}
let mut parts = value.split('@');
let Some(local) = parts.next() else {
return false;
};
let Some(domain) = parts.next() else {
return false;
};
if parts.next().is_some() || local.is_empty() || domain.is_empty() {
return false;
}
if domain.starts_with('.') || domain.ends_with('.') || !domain.contains('.') {
return false;
}
if local.chars().any(char::is_whitespace) || domain.chars().any(char::is_whitespace) {
return false;
}
domain
.chars()
.all(|ch| ch.is_alphanumeric() || matches!(ch, '.' | '-') || !ch.is_ascii())
}
fn split_with_scheme(url: &str) -> Option<(&str, &str)> {
let (scheme, rest) = url.split_once("://")?;
let valid = scheme.chars().enumerate().all(|(idx, ch)| {
if idx == 0 {
ch.is_ascii_alphabetic()
} else {
ch.is_ascii_alphanumeric() || matches!(ch, '+' | '-' | '.')
}
});
valid.then_some((scheme, rest))
}
fn quote_url_piece(value: &str) -> String {
percent_encode_with_safe(&percent_decode_lossy(value), is_url_safe_byte)
}
fn urlize_word(
word: &str,
trim_url_limit: Option<usize>,
nofollow: bool,
autoescape: bool,
) -> String {
if !(word.contains('.') || word.contains('@') || word.contains(':')) {
return if autoescape {
escape(word)
} else {
word.to_owned()
};
}
let (lead, middle, trail) = trim_punctuation(word);
let href = if looks_like_simple_url(&middle) {
Some(smart_urlquote(&html_unescape_minimal(&middle)))
} else if looks_like_bare_domain(&middle) {
Some(smart_urlquote(&format!(
"https://{}",
html_unescape_minimal(&middle)
)))
} else if looks_like_email(&middle) {
middle.rsplit_once('@').map(|(local, domain)| {
format!(
"mailto:{}@{}",
percent_encode_mail_component(local),
percent_encode_mail_component(domain)
)
})
} else {
None
};
let Some(href) = href else {
return if autoescape {
escape(word)
} else {
word.to_owned()
};
};
let rel = if nofollow && !href.starts_with("mailto:") {
" rel=\"nofollow\""
} else {
""
};
let display = trim_url_text(&middle, trim_url_limit);
let display = if autoescape {
escape(&display)
} else {
display
};
let lead = if autoescape { escape(&lead) } else { lead };
let trail = if autoescape { escape(&trail) } else { trail };
mark_safe(format!(
"{lead}<a href=\"{}\"{rel}>{display}</a>{trail}",
escape(&href)
))
}
fn strip_tags_once(value: &str) -> String {
let mut output = String::with_capacity(value.len());
let mut index = 0;
while index < value.len() {
let tail = &value[index..];
if tail.starts_with("<!--")
&& let Some(end) = tail.find("-->")
{
index += end + 3;
continue;
}
if let Some(consumed) = consume_tag(tail) {
index += consumed;
continue;
}
if let Some(ch) = tail.chars().next() {
output.push(ch);
index += ch.len_utf8();
} else {
break;
}
}
output
}
fn consume_tag(tail: &str) -> Option<usize> {
let mut chars = tail.chars();
let first = chars.next()?;
if first != '<' {
return None;
}
let second = chars.next()?;
if !(second.is_ascii_alphabetic() || matches!(second, '/' | '!' | '?')) {
return None;
}
let mut in_quote: Option<char> = None;
for (offset, ch) in tail.char_indices().skip(1) {
match in_quote {
Some(quote) if ch == quote => in_quote = None,
Some(_) => {}
None if matches!(ch, '\"' | '\'') => in_quote = Some(ch),
None if ch == '>' => {
let body = &tail[1..offset];
return is_valid_tag_body(body).then_some(offset + 1);
}
None => {}
}
}
None
}
fn is_valid_tag_body(body: &str) -> bool {
let body = body.trim_start();
if body.is_empty() {
return false;
}
let body = if let Some(stripped) = body.strip_prefix('/') {
stripped
} else if body.starts_with('!') || body.starts_with('?') {
return true;
} else {
body
};
let mut chars = body.chars();
let Some(first) = chars.next() else {
return false;
};
if !first.is_ascii_alphabetic() {
return false;
}
chars.all(|ch| {
ch.is_ascii_alphanumeric()
|| matches!(
ch,
':' | '-'
| '_'
| ' '
| '\t'
| '\n'
| '\r'
| '='
| '\''
| '\"'
| '/'
| '!'
| '?'
| '['
| ']'
| '('
| ')'
| ';'
| '&'
| '#'
| '.'
| ','
)
})
}
pub fn escape(s: &str) -> String {
mark_safe(escape_impl(s))
}
pub fn conditional_escape(s: &str) -> String {
if is_marked_safe(s) {
s.to_owned()
} else {
escape(s)
}
}
pub fn strip_tags(value: &str) -> String {
let mut current = value.to_owned();
for _ in 0..TAG_SCAN_LIMIT {
let stripped = strip_tags_once(¤t);
if stripped == current {
break;
}
current = stripped;
}
current
}
pub fn format_html(format_string: &str, args: &[&str]) -> String {
let mut output = String::with_capacity(
format_string.len() + args.iter().map(|arg| arg.len()).sum::<usize>(),
);
let mut index = 0;
let mut arg_index = 0;
while index < format_string.len() {
let tail = &format_string[index..];
if tail.starts_with("{{") {
output.push('{');
index += 2;
continue;
}
if tail.starts_with("}}") {
output.push('}');
index += 2;
continue;
}
if tail.starts_with("{}") {
if let Some(arg) = args.get(arg_index) {
output.push_str(&conditional_escape(arg));
arg_index += 1;
} else {
output.push_str("{}");
}
index += 2;
continue;
}
if let Some(ch) = tail.chars().next() {
output.push(ch);
index += ch.len_utf8();
} else {
break;
}
}
mark_safe(output)
}
#[must_use]
pub fn escapejs(value: &str) -> String {
let mut escaped = String::with_capacity(value.len());
for ch in value.chars() {
match ch {
'\\' => escaped.push_str("\\u005C"),
'\'' => escaped.push_str("\\u0027"),
'"' => escaped.push_str("\\u0022"),
'>' => escaped.push_str("\\u003E"),
'<' => escaped.push_str("\\u003C"),
'&' => escaped.push_str("\\u0026"),
'=' => escaped.push_str("\\u003D"),
'-' => escaped.push_str("\\u002D"),
';' => escaped.push_str("\\u003B"),
'`' => escaped.push_str("\\u0060"),
'\u{2028}' => escaped.push_str("\\u2028"),
'\u{2029}' => escaped.push_str("\\u2029"),
'\u{0000}'..='\u{001F}' => {
write!(&mut escaped, "\\u{:04X}", ch as u32)
.expect("writing to string cannot fail");
}
_ => escaped.push(ch),
}
}
mark_safe(escaped)
}
#[must_use]
pub fn format_html_join(sep: &str, fragments: &[&str]) -> String {
mark_safe(fragments.join(&conditional_escape(sep)))
}
#[must_use]
pub fn strip_spaces_between_tags(value: &str) -> String {
SPACES_BETWEEN_TAGS_RE.replace_all(value, "><").into_owned()
}
pub fn linebreaks(value: &str) -> String {
let normalized = normalize_newlines(value);
let paragraphs = normalized
.split("\n\n")
.filter(|paragraph| !paragraph.is_empty())
.map(|paragraph| format!("<p>{}</p>", paragraph.replace('\n', "<br>")))
.collect::<Vec<_>>();
mark_safe(paragraphs.join("\n\n"))
}
pub fn linebreaksbr(value: &str) -> String {
mark_safe(normalize_newlines(value).replace('\n', "<br>"))
}
pub fn urlize(
text: &str,
trim_url_limit: Option<usize>,
nofollow: bool,
autoescape: bool,
) -> String {
let mut output = String::with_capacity(text.len());
let mut token = String::new();
let flush_token = |output: &mut String, token: &mut String| {
if !token.is_empty() {
output.push_str(&urlize_word(token, trim_url_limit, nofollow, autoescape));
token.clear();
}
};
for ch in text.chars() {
if ch.is_whitespace() {
flush_token(&mut output, &mut token);
output.push(ch);
} else {
token.push(ch);
}
}
flush_token(&mut output, &mut token);
mark_safe(output)
}
pub fn json_script(value: &Value, element_id: Option<&str>) -> String {
let json = serde_json::to_string(value).unwrap_or_else(|_| "null".to_owned());
let escaped = json
.replace('&', "\\u0026")
.replace('<', "\\u003C")
.replace('>', "\\u003E");
let script = match element_id {
Some(element_id) => format!(
"<script id=\"{}\" type=\"application/json\">{escaped}</script>",
escape_impl(element_id)
),
None => format!("<script type=\"application/json\">{escaped}</script>"),
};
mark_safe(script)
}
pub fn smart_urlquote(url: &str) -> String {
let Some((scheme, rest)) = split_with_scheme(url) else {
return quote_url_piece(url);
};
let netloc_end = rest.find(['/', '?', '#']).unwrap_or(rest.len());
let netloc = &rest[..netloc_end];
let remainder = &rest[netloc_end..];
let (path, query, fragment) = {
let fragment_index = remainder.find('#');
let (before_fragment, fragment) = match fragment_index {
Some(index) => (&remainder[..index], &remainder[index + 1..]),
None => (remainder, ""),
};
let query_index = before_fragment.find('?');
let (path, query) = match query_index {
Some(index) => (&before_fragment[..index], &before_fragment[index + 1..]),
None => (before_fragment, ""),
};
(path, query, fragment)
};
let encoded_netloc = quote_url_piece(netloc);
let encoded_path = quote_url_piece(path);
let encoded_query = if query.is_empty() {
String::new()
} else {
let pairs = parse(query.as_bytes()).into_owned();
let mut serializer = Serializer::new(String::new());
serializer.extend_pairs(pairs);
serializer.finish()
};
let encoded_fragment = quote_url_piece(fragment);
let mut result = format!("{scheme}://{encoded_netloc}{encoded_path}");
if !encoded_query.is_empty() || remainder.contains('?') {
result.push('?');
result.push_str(&encoded_query);
}
if !encoded_fragment.is_empty() || remainder.contains('#') {
result.push('#');
result.push_str(&encoded_fragment);
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
mod test_utils_html {
use super::*;
#[test]
fn test_escape() {
let cases = [
("&", "&"),
("<", "<"),
(">", ">"),
("\"", """),
("'", "'"),
("<&", "<&"),
];
for (input, expected) in cases {
assert_eq!(escape(input), expected);
}
}
#[test]
fn test_escape_repeated_values() {
assert_eq!(escape("&&"), "&&");
assert_eq!(escape("<<"), "<<");
assert_eq!(escape("\"\""), """");
assert_eq!(escape("''"), "''");
assert_eq!(escape("<><&"), "<><&");
}
#[test]
fn test_conditional_escape() {
let plain = "<h1>interop</h1>";
assert_eq!(conditional_escape(plain), "<h1>interop</h1>");
let safe = mark_safe(plain);
assert_eq!(conditional_escape(&safe), "<h1>interop</h1>");
}
#[test]
fn test_format_html() {
let safe_bold = mark_safe("<b>safe</b>");
let safe_italic = mark_safe("<i>safe again</i>");
assert_eq!(
format_html(
"{} {} {} {}",
&[
"< Dangerous >",
&safe_bold,
"< dangerous again",
&safe_italic
]
),
"< Dangerous > <b>safe</b> < dangerous again <i>safe again</i>",
);
assert_eq!(
format_html("<i>{}</i>", &["Adam & Eve"]),
"<i>Adam & Eve</i>"
);
}
#[test]
fn test_linebreaks() {
assert_eq!(
linebreaks("para1\n\npara2\r\rpara3"),
"<p>para1</p>\n\n<p>para2</p>\n\n<p>para3</p>",
);
assert_eq!(
linebreaks("para1\nsub1\rsub2\n\npara2"),
"<p>para1<br>sub1<br>sub2</p>\n\n<p>para2</p>",
);
assert_eq!(
linebreaks("para1\r\n\r\npara2\rsub1\r\rpara4"),
"<p>para1</p>\n\n<p>para2<br>sub1</p>\n\n<p>para4</p>",
);
assert_eq!(
linebreaks("para1\tmore\n\npara2"),
"<p>para1\tmore</p>\n\n<p>para2</p>",
);
}
#[test]
fn test_linebreaksbr() {
assert_eq!(linebreaksbr("a\r\nb\rc\nd"), "a<br>b<br>c<br>d");
}
#[test]
fn test_strip_tags() {
let cases = [
(
"<p>See: 'é is an apostrophe followed by e acute</p>",
"See: 'é is an apostrophe followed by e acute",
),
(
"<p>See: 'é is an apostrophe followed by e acute</p>",
"See: 'é is an apostrophe followed by e acute",
),
("<adf>a", "a"),
("</adf>a", "a"),
("<asdf><asdf>e", "e"),
("hi, <f x", "hi, <f x"),
("234<235, right?", "234<235, right?"),
("</fe", "</fe"),
("<x>b<y>", "b"),
("a<p onclick=\"alert('<test>')\">b</p>c", "abc"),
("a<p a >b</p>c", "abc"),
("d<a:b c:d>e</p>f", "def"),
(
"<strong>foo</strong><a href=\"http://example.com\">bar</a>",
"foobar",
),
("&gotcha&#;<>", "&gotcha&#;<>"),
("<script>alert()</script>&h", "alert()&h"),
];
for (input, expected) in cases {
assert_eq!(strip_tags(input), expected, "input: {input}");
}
}
#[test]
fn test_json_script() {
assert_eq!(
json_script(&json!("&<>"), Some("test_id")),
"<script id=\"test_id\" type=\"application/json\">\"\\u0026\\u003C\\u003E\"</script>",
);
assert_eq!(
json_script(&json!({"a": "<script>test&ing</script>"}), Some("test_id")),
"<script id=\"test_id\" type=\"application/json\">{\"a\":\"\\u003Cscript\\u003Etest\\u0026ing\\u003C/script\\u003E\"}</script>",
);
}
#[test]
fn test_json_script_without_id() {
assert_eq!(
json_script(&json!({"key": "value"}), None),
"<script type=\"application/json\">{\"key\":\"value\"}</script>",
);
}
#[test]
fn test_smart_urlquote() {
let cases = [
("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
(
"http://öäü.com/öäü/",
"http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
),
(
"http://example.com/path/öäü/",
"http://example.com/path/%C3%B6%C3%A4%C3%BC/",
),
(
"http://example.com/%C3%B6/ä/",
"http://example.com/%C3%B6/%C3%A4/",
),
(
"http://example.com/?x=1&y=2+3&z=",
"http://example.com/?x=1&y=2+3&z=",
),
(
"http://example.com/?x=<>\"'",
"http://example.com/?x=%3C%3E%22%27",
),
("http://[fd00::1]/", "http://[fd00::1]/"),
];
for (input, expected) in cases {
assert_eq!(smart_urlquote(input), expected, "input: {input}");
}
}
#[test]
fn test_urlize() {
assert_eq!(
urlize("Search for google.com/?q=! and see.", None, false, false),
"Search for <a href=\"https://google.com/?q=\">google.com/?q=</a>! and see.",
);
assert_eq!(
urlize(
"Search for google.com/?q=1<! and see.",
None,
false,
false
),
"Search for <a href=\"https://google.com/?q=1%3C\">google.com/?q=1<</a>! and see.",
);
assert_eq!(
urlize("Visit example.com", None, false, false),
"Visit <a href=\"https://example.com\">example.com</a>",
);
assert_eq!(
urlize("http://www.foo.bar/", None, false, false),
"<a href=\"http://www.foo.bar/\">http://www.foo.bar/</a>",
);
assert_eq!(
urlize("host.djangoproject.com", None, false, false),
"<a href=\"https://host.djangoproject.com\">host.djangoproject.com</a>",
);
}
#[test]
fn test_urlize_unicode_domain() {
assert_eq!(
urlize("Look on www.نامه‌ای.com.", None, false, false),
"Look on <a href=\"https://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com\">www.نامه‌ای.com</a>.",
);
}
#[test]
fn test_urlize_email() {
assert_eq!(
urlize("foo@example.com", None, false, false),
"<a href=\"mailto:foo@example.com\">foo@example.com</a>",
);
assert_eq!(
urlize("yes+this=is&a%valid!email@example.com", None, false, false),
"<a href=\"mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com\">yes+this=is&a%valid!email@example.com</a>",
);
assert_eq!(
urlize("foo@faß.example.com", None, false, false),
"<a href=\"mailto:foo@fa%C3%9F.example.com\">foo@faß.example.com</a>",
);
assert_eq!(
urlize("idna-2008@Þ‰Þ¨Þ€Þ§ÞƒÞª.example.mv", None, false, false),
"<a href=\"mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.example.mv\">idna-2008@Þ‰Þ¨Þ€Þ§ÞƒÞª.example.mv</a>",
);
}
#[test]
fn test_urlize_trim_url_limit() {
assert_eq!(
urlize(
"Visit https://example.com/very/long/path",
Some(12),
false,
false
),
"Visit <a href=\"https://example.com/very/long/path\">https://exa…</a>",
);
}
#[test]
fn test_urlize_nofollow() {
assert_eq!(
urlize("Visit example.com", None, true, false),
"Visit <a href=\"https://example.com\" rel=\"nofollow\">example.com</a>",
);
}
#[test]
fn test_urlize_autoescape() {
assert_eq!(
urlize("Use <google.com>", None, false, true),
"Use <google.com>",
);
}
#[test]
fn test_urlize_unchanged_inputs() {
let cases = [
"foo@",
"@foo.com",
"foo@.example.com",
"foo@localhost",
"foo@localhost.",
"test@example?;+!.com",
"email me@example.com,then I'll respond",
"[a link](https://www.djangoproject.com/)",
];
for input in cases {
assert_eq!(urlize(input, None, false, false), input, "input: {input}");
}
}
const HTML_SPECIAL_CHARS: &[(char, &str)] = &[
('&', "&"),
('<', "<"),
('>', ">"),
('\"', """),
('\'', "'"),
];
#[test]
fn test_escape_patterns() {
let patterns = ["{}", "asdf{}fdsa", "{}1", "1{}b"];
for (raw, escaped) in HTML_SPECIAL_CHARS {
for pattern in patterns {
let input = pattern.replace("{}", &raw.to_string());
let expected = pattern.replace("{}", escaped);
assert_eq!(escape(&input), expected);
}
}
}
#[test]
fn test_format_html_literal_braces() {
assert_eq!(
format_html("<span>{{{}}}</span>", &["<x>"]),
"<span>{<x>}</span>"
);
}
#[test]
fn test_escapejs_escapes_javascript_unsafe_characters() {
assert_eq!(
escapejs("double \"quotes\" \\ and 'single quotes'"),
"double \\u0022quotes\\u0022 \\u005C and \\u0027single quotes\\u0027"
);
assert_eq!(
escapejs("<script>alert(`x` & y);</script>"),
"\\u003Cscript\\u003Ealert(\\u0060x\\u0060 \\u0026 y)\\u003B\\u003C/script\\u003E"
);
}
#[test]
fn test_escapejs_escapes_control_and_separator_characters() {
assert_eq!(
escapejs("\u{0000}\u{001F}\n\r\t\u{2028}\u{2029}"),
"\\u0000\\u001F\\u000A\\u000D\\u0009\\u2028\\u2029"
);
assert_eq!(
escapejs("and lots of whitespace: \r\n\t\u{000B}\u{000C}\u{0008}"),
"and lots of whitespace: \\u000D\\u000A\\u0009\\u000B\\u000C\\u0008"
);
assert_eq!(
escapejs("paragraph separator:\u{2029}and line separator:\u{2028}"),
"paragraph separator:\\u2029and line separator:\\u2028"
);
assert_eq!(escapejs("`"), "\\u0060");
}
#[test]
fn test_format_html_join_escapes_separator_and_marks_safe() {
let first = format_html("<span>{}</span>", &["<x>"]);
let second = format_html("<span>{}</span>", &[&mark_safe("<b>safe</b>")]);
let joined = format_html_join(" & ", &[&first, &second]);
assert_eq!(
joined,
"<span><x></span> & <span><b>safe</b></span>"
);
assert_eq!(conditional_escape(&joined), joined);
}
#[test]
fn test_format_html_join_empty_input_returns_safe_empty_string() {
let joined = format_html_join("<ignored>", &[]);
assert_eq!(joined, "");
assert_eq!(conditional_escape(&joined), joined);
}
#[test]
fn test_strip_spaces_between_tags() {
assert_eq!(
strip_spaces_between_tags("<p>foo</p> \n\t <p>bar</p>"),
"<p>foo</p><p>bar</p>"
);
assert_eq!(
strip_spaces_between_tags(" <p>foo</p> <p>bar</p> "),
" <p>foo</p><p>bar</p> "
);
assert_eq!(
strip_spaces_between_tags("<p>foo</p> text <p>bar</p>"),
"<p>foo</p> text <p>bar</p>"
);
assert_eq!(strip_spaces_between_tags(" <adf>"), " <adf>");
assert_eq!(strip_spaces_between_tags("<adf> "), "<adf> ");
assert_eq!(strip_spaces_between_tags(" </adf> "), " </adf> ");
assert_eq!(strip_spaces_between_tags(" <f> x</f>"), " <f> x</f>");
assert_eq!(strip_spaces_between_tags("<d> </d>"), "<d></d>");
assert_eq!(
strip_spaces_between_tags("<p>hello </p>\n<p> world</p>"),
"<p>hello </p><p> world</p>"
);
assert_eq!(
strip_spaces_between_tags("\n<p>\t</p>\n<p> </p>\n"),
"\n<p></p><p></p>\n"
);
}
#[test]
fn test_linebreaks_empty_segments_are_ignored() {
assert_eq!(linebreaks("\n\npara\n\n"), "<p>para</p>");
}
#[test]
fn test_strip_tags_preserves_plain_angle_brackets() {
assert_eq!(strip_tags("a4<a5 right?"), "a4<a5 right?");
assert_eq!(strip_tags("b7>b2!"), "b7>b2!");
}
#[test]
fn test_strip_tags_comments() {
assert_eq!(strip_tags("Hello<!-- hidden -->world"), "Helloworld");
}
#[test]
fn test_strip_tags_files() {
let fixtures = [
(
include_str!("../../../django/tests/utils_tests/files/strip_tags1.html"),
&["<center>", "<morbi>"] as &[&str],
),
(
include_str!("../../../django/tests/utils_tests/files/strip_tags2.txt"),
&["<!DOCTYPE html>", "<html>", "<script "] as &[&str],
),
];
for (fixture, removed_fragments) in fixtures {
let stripped = strip_tags(fixture);
assert!(
stripped.contains("Test string that has not been stripped."),
"fixture lost sentinel text"
);
assert!(
stripped.len() < fixture.len(),
"fixture was not meaningfully reduced"
);
for fragment in removed_fragments {
assert!(
!stripped.contains(fragment),
"fixture retained {fragment:?} after stripping"
);
}
}
}
#[test]
fn test_json_script_escapes_script_boundary() {
assert_eq!(
json_script(
&json!({"html": "</script><script>alert(1)</script>"}),
Some("x")
),
"<script id=\"x\" type=\"application/json\">{\"html\":\"\\u003C/script\\u003E\\u003Cscript\\u003Ealert(1)\\u003C/script\\u003E\"}</script>",
);
}
#[test]
fn test_smart_urlquote_existing_encoding_and_nested_url() {
assert_eq!(
smart_urlquote("http://example.com/?q=http://example.com/?x=1%26q=django"),
"http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango",
);
}
#[test]
fn test_smart_urlquote_quotes_unsafe_host_text() {
assert_eq!(
smart_urlquote("http://.www.f oo.bar/"),
"http://.www.f%20oo.bar/",
);
assert_eq!(
smart_urlquote("http://example.com\">"),
"http://example.com%22%3E",
);
}
#[test]
fn test_urlize_preserves_mailto_without_nofollow() {
assert_eq!(
urlize("foo@example.com", None, true, false),
"<a href=\"mailto:foo@example.com\">foo@example.com</a>",
);
}
#[test]
fn test_urlize_handles_parenthesized_links() {
assert_eq!(
urlize("(example.com)", None, false, false),
"(<a href=\"https://example.com\">example.com</a>)",
);
}
}
}