rjango 0.1.1 - Docs.rs

use parking_lot::RwLock;
use regex::Regex;
use serde_json::Value;
use std::collections::HashSet;
use std::fmt::Write as _;
use std::sync::LazyLock;
use url::form_urlencoded::{Serializer, parse};

static SAFE_STRINGS: LazyLock<RwLock<HashSet<String>>> =
    LazyLock::new(|| RwLock::new(HashSet::new()));
static SPACES_BETWEEN_TAGS_RE: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r">\s+<").expect("valid strip-spaces regex"));

const URL_SAFE: &[u8] = b"!$&'()*+,;=:/?#[]@-._~";
const TAG_SCAN_LIMIT: usize = 50;

fn mark_safe(value: impl Into<String>) -> String {
    let value = value.into();
    SAFE_STRINGS.write().insert(value.clone());
    value
}

fn is_marked_safe(value: &str) -> bool {
    SAFE_STRINGS.read().contains(value)
}

fn normalize_newlines(value: &str) -> String {
    value.replace("\r\n", "\n").replace('\r', "\n")
}

fn escape_impl(value: &str) -> String {
    let mut escaped = String::with_capacity(value.len());
    for ch in value.chars() {
        match ch {
            '&' => escaped.push_str("&amp;"),
            '<' => escaped.push_str("&lt;"),
            '>' => escaped.push_str("&gt;"),
            '\"' => escaped.push_str("&quot;"),
            '\'' => escaped.push_str("&#x27;"),
            _ => escaped.push(ch),
        }
    }
    escaped
}

fn percent_decode_lossy(value: &str) -> String {
    let mut bytes = Vec::with_capacity(value.len());
    let raw = value.as_bytes();
    let mut index = 0;
    while index < raw.len() {
        if raw[index] == b'%' && index + 2 < raw.len() {
            let hi = (raw[index + 1] as char).to_digit(16);
            let lo = (raw[index + 2] as char).to_digit(16);
            if let (Some(hi), Some(lo)) = (hi, lo) {
                bytes.push(((hi << 4) | lo) as u8);
                index += 3;
                continue;
            }
        }
        bytes.push(raw[index]);
        index += 1;
    }
    String::from_utf8_lossy(&bytes).into_owned()
}

fn is_url_safe_byte(byte: u8) -> bool {
    byte.is_ascii_alphanumeric() || URL_SAFE.contains(&byte)
}

fn percent_encode_with_safe(value: &str, safe: fn(u8) -> bool) -> String {
    let mut encoded = String::with_capacity(value.len());
    for byte in value.as_bytes() {
        if safe(*byte) {
            encoded.push(*byte as char);
        } else {
            encoded.push('%');
            encoded.push_str(&format!("{:02X}", byte));
        }
    }
    encoded
}

fn percent_encode_mail_component(value: &str) -> String {
    percent_encode_with_safe(value, |byte| {
        byte.is_ascii_alphanumeric() || matches!(byte, b'-' | b'.' | b'_' | b'~')
    })
}

fn html_unescape_minimal(value: &str) -> String {
    let mut result = String::with_capacity(value.len());
    let mut index = 0;
    while index < value.len() {
        let tail = &value[index..];
        let (decoded, consumed) = if tail.starts_with("&amp;") {
            ('&', 5)
        } else if tail.starts_with("&lt;") {
            ('<', 4)
        } else if tail.starts_with("&lt") {
            ('<', 3)
        } else if tail.starts_with("&gt;") {
            ('>', 4)
        } else if tail.starts_with("&gt") {
            ('>', 3)
        } else if tail.starts_with("&quot;") {
            ('\"', 6)
        } else if tail.starts_with("&#x27;") {
            ('\'', 6)
        } else if tail.starts_with("&#39;") {
            ('\'', 5)
        } else if let Some(ch) = tail.chars().next() {
            result.push(ch);
            index += ch.len_utf8();
            continue;
        } else {
            break;
        };
        result.push(decoded);
        index += consumed;
    }
    result
}

fn trim_url_text(value: &str, limit: Option<usize>) -> String {
    match limit {
        None => value.to_owned(),
        Some(limit) => {
            let char_count = value.chars().count();
            if char_count <= limit {
                value.to_owned()
            } else {
                let take = limit.saturating_sub(1);
                let trimmed: String = value.chars().take(take).collect();
                format!("{trimmed}…")
            }
        }
    }
}

fn count_char(value: &str, needle: char) -> usize {
    value.chars().filter(|ch| *ch == needle).count()
}

fn trim_punctuation(word: &str) -> (String, String, String) {
    let mut lead = String::new();
    let mut middle = word.to_owned();
    while let Some(first) = middle.chars().next() {
        if matches!(first, '(' | '[') {
            lead.push(first);
            middle = middle[first.len_utf8()..].to_owned();
        } else {
            break;
        }
    }

    let mut trail_parts = Vec::new();
    loop {
        let Some(last) = middle.chars().last() else {
            break;
        };
        let should_trim = match last {
            '.' | ',' | ':' | ';' | '!' => true,
            ')' => count_char(&middle, '(') < count_char(&middle, ')'),
            ']' => count_char(&middle, '[') < count_char(&middle, ']'),
            _ => false,
        };
        if should_trim {
            let new_len = middle.len() - last.len_utf8();
            trail_parts.push(last.to_string());
            middle.truncate(new_len);
        } else {
            break;
        }
    }
    trail_parts.reverse();
    (lead, middle, trail_parts.concat())
}

fn looks_like_simple_url(value: &str) -> bool {
    let lower = value.to_ascii_lowercase();
    (lower.starts_with("http://") || lower.starts_with("https://"))
        && value
            .split_once("://")
            .and_then(|(_, rest)| rest.chars().next())
            .is_some_and(|ch| ch.is_alphanumeric() || matches!(ch, '[' | '%'))
}

fn looks_like_bare_domain(value: &str) -> bool {
    if value.contains('@') || value.contains(' ') || value.contains('<') || value.contains('>') {
        return false;
    }
    let candidate = value.split(['/', '?', '#']).next().unwrap_or(value);
    if candidate.is_empty() {
        return false;
    }
    if candidate.starts_with('.') || candidate.ends_with('.') {
        return false;
    }
    let mut labels = candidate.split('.');
    let Some(first) = labels.next() else {
        return false;
    };
    let Some(last) = candidate.rsplit('.').next() else {
        return false;
    };
    if first.is_empty() || last.len() < 2 {
        return false;
    }
    if !candidate.contains('.') {
        return false;
    }
    candidate.chars().all(|ch| {
        ch.is_alphanumeric() || matches!(ch, '.' | '-' | '_' | '%' | '~') || !ch.is_ascii()
    })
}

fn looks_like_email(value: &str) -> bool {
    if value.contains(':') || value.contains(' ') {
        return false;
    }
    let mut parts = value.split('@');
    let Some(local) = parts.next() else {
        return false;
    };
    let Some(domain) = parts.next() else {
        return false;
    };
    if parts.next().is_some() || local.is_empty() || domain.is_empty() {
        return false;
    }
    if domain.starts_with('.') || domain.ends_with('.') || !domain.contains('.') {
        return false;
    }
    if local.chars().any(char::is_whitespace) || domain.chars().any(char::is_whitespace) {
        return false;
    }
    domain
        .chars()
        .all(|ch| ch.is_alphanumeric() || matches!(ch, '.' | '-') || !ch.is_ascii())
}

fn split_with_scheme(url: &str) -> Option<(&str, &str)> {
    let (scheme, rest) = url.split_once("://")?;
    let valid = scheme.chars().enumerate().all(|(idx, ch)| {
        if idx == 0 {
            ch.is_ascii_alphabetic()
        } else {
            ch.is_ascii_alphanumeric() || matches!(ch, '+' | '-' | '.')
        }
    });
    valid.then_some((scheme, rest))
}

fn quote_url_piece(value: &str) -> String {
    percent_encode_with_safe(&percent_decode_lossy(value), is_url_safe_byte)
}

fn urlize_word(
    word: &str,
    trim_url_limit: Option<usize>,
    nofollow: bool,
    autoescape: bool,
) -> String {
    if !(word.contains('.') || word.contains('@') || word.contains(':')) {
        return if autoescape {
            escape(word)
        } else {
            word.to_owned()
        };
    }

    let (lead, middle, trail) = trim_punctuation(word);
    let href = if looks_like_simple_url(&middle) {
        Some(smart_urlquote(&html_unescape_minimal(&middle)))
    } else if looks_like_bare_domain(&middle) {
        Some(smart_urlquote(&format!(
            "https://{}",
            html_unescape_minimal(&middle)
        )))
    } else if looks_like_email(&middle) {
        middle.rsplit_once('@').map(|(local, domain)| {
            format!(
                "mailto:{}@{}",
                percent_encode_mail_component(local),
                percent_encode_mail_component(domain)
            )
        })
    } else {
        None
    };

    let Some(href) = href else {
        return if autoescape {
            escape(word)
        } else {
            word.to_owned()
        };
    };

    let rel = if nofollow && !href.starts_with("mailto:") {
        " rel=\"nofollow\""
    } else {
        ""
    };
    let display = trim_url_text(&middle, trim_url_limit);
    let display = if autoescape {
        escape(&display)
    } else {
        display
    };
    let lead = if autoescape { escape(&lead) } else { lead };
    let trail = if autoescape { escape(&trail) } else { trail };
    mark_safe(format!(
        "{lead}<a href=\"{}\"{rel}>{display}</a>{trail}",
        escape(&href)
    ))
}

fn strip_tags_once(value: &str) -> String {
    let mut output = String::with_capacity(value.len());
    let mut index = 0;
    while index < value.len() {
        let tail = &value[index..];
        if tail.starts_with("<!--")
            && let Some(end) = tail.find("-->")
        {
            index += end + 3;
            continue;
        }
        if let Some(consumed) = consume_tag(tail) {
            index += consumed;
            continue;
        }
        if let Some(ch) = tail.chars().next() {
            output.push(ch);
            index += ch.len_utf8();
        } else {
            break;
        }
    }
    output
}

fn consume_tag(tail: &str) -> Option<usize> {
    let mut chars = tail.chars();
    let first = chars.next()?;
    if first != '<' {
        return None;
    }
    let second = chars.next()?;
    if !(second.is_ascii_alphabetic() || matches!(second, '/' | '!' | '?')) {
        return None;
    }

    let mut in_quote: Option<char> = None;
    for (offset, ch) in tail.char_indices().skip(1) {
        match in_quote {
            Some(quote) if ch == quote => in_quote = None,
            Some(_) => {}
            None if matches!(ch, '\"' | '\'') => in_quote = Some(ch),
            None if ch == '>' => {
                let body = &tail[1..offset];
                return is_valid_tag_body(body).then_some(offset + 1);
            }
            None => {}
        }
    }
    None
}

fn is_valid_tag_body(body: &str) -> bool {
    let body = body.trim_start();
    if body.is_empty() {
        return false;
    }
    let body = if let Some(stripped) = body.strip_prefix('/') {
        stripped
    } else if body.starts_with('!') || body.starts_with('?') {
        return true;
    } else {
        body
    };
    let mut chars = body.chars();
    let Some(first) = chars.next() else {
        return false;
    };
    if !first.is_ascii_alphabetic() {
        return false;
    }
    chars.all(|ch| {
        ch.is_ascii_alphanumeric()
            || matches!(
                ch,
                ':' | '-'
                    | '_'
                    | ' '
                    | '\t'
                    | '\n'
                    | '\r'
                    | '='
                    | '\''
                    | '\"'
                    | '/'
                    | '!'
                    | '?'
                    | '['
                    | ']'
                    | '('
                    | ')'
                    | ';'
                    | '&'
                    | '#'
                    | '.'
                    | ','
            )
    })
}

/// Escape a string for safe HTML text rendering.
pub fn escape(s: &str) -> String {
    mark_safe(escape_impl(s))
}

/// Escape a string unless it has already been marked safe in this module.
pub fn conditional_escape(s: &str) -> String {
    if is_marked_safe(s) {
        s.to_owned()
    } else {
        escape(s)
    }
}

/// Strip HTML tags from a string while leaving plain text and entities intact.
pub fn strip_tags(value: &str) -> String {
    let mut current = value.to_owned();
    for _ in 0..TAG_SCAN_LIMIT {
        let stripped = strip_tags_once(&current);
        if stripped == current {
            break;
        }
        current = stripped;
    }
    current
}

/// Format an HTML fragment while conditionally escaping positional arguments.
pub fn format_html(format_string: &str, args: &[&str]) -> String {
    let mut output = String::with_capacity(
        format_string.len() + args.iter().map(|arg| arg.len()).sum::<usize>(),
    );
    let mut index = 0;
    let mut arg_index = 0;
    while index < format_string.len() {
        let tail = &format_string[index..];
        if tail.starts_with("{{") {
            output.push('{');
            index += 2;
            continue;
        }
        if tail.starts_with("}}") {
            output.push('}');
            index += 2;
            continue;
        }
        if tail.starts_with("{}") {
            if let Some(arg) = args.get(arg_index) {
                output.push_str(&conditional_escape(arg));
                arg_index += 1;
            } else {
                output.push_str("{}");
            }
            index += 2;
            continue;
        }
        if let Some(ch) = tail.chars().next() {
            output.push(ch);
            index += ch.len_utf8();
        } else {
            break;
        }
    }
    mark_safe(output)
}

/// Escape a string for safe embedding inside JavaScript string literals.
#[must_use]
pub fn escapejs(value: &str) -> String {
    let mut escaped = String::with_capacity(value.len());
    for ch in value.chars() {
        match ch {
            '\\' => escaped.push_str("\\u005C"),
            '\'' => escaped.push_str("\\u0027"),
            '"' => escaped.push_str("\\u0022"),
            '>' => escaped.push_str("\\u003E"),
            '<' => escaped.push_str("\\u003C"),
            '&' => escaped.push_str("\\u0026"),
            '=' => escaped.push_str("\\u003D"),
            '-' => escaped.push_str("\\u002D"),
            ';' => escaped.push_str("\\u003B"),
            '`' => escaped.push_str("\\u0060"),
            '\u{2028}' => escaped.push_str("\\u2028"),
            '\u{2029}' => escaped.push_str("\\u2029"),
            '\u{0000}'..='\u{001F}' => {
                write!(&mut escaped, "\\u{:04X}", ch as u32)
                    .expect("writing to string cannot fail");
            }
            _ => escaped.push(ch),
        }
    }
    mark_safe(escaped)
}

/// Join preformatted HTML fragments using a conditionally escaped separator.
#[must_use]
pub fn format_html_join(sep: &str, fragments: &[&str]) -> String {
    mark_safe(fragments.join(&conditional_escape(sep)))
}

/// Remove whitespace that appears only between adjacent HTML tags.
#[must_use]
pub fn strip_spaces_between_tags(value: &str) -> String {
    SPACES_BETWEEN_TAGS_RE.replace_all(value, "><").into_owned()
}

/// Convert double newlines into paragraphs and single newlines into <br> tags.
pub fn linebreaks(value: &str) -> String {
    let normalized = normalize_newlines(value);
    let paragraphs = normalized
        .split("\n\n")
        .filter(|paragraph| !paragraph.is_empty())
        .map(|paragraph| format!("<p>{}</p>", paragraph.replace('\n', "<br>")))
        .collect::<Vec<_>>();
    mark_safe(paragraphs.join("\n\n"))
}

/// Convert newlines into <br> tags.
pub fn linebreaksbr(value: &str) -> String {
    mark_safe(normalize_newlines(value).replace('\n', "<br>"))
}

/// Convert bare URLs and email addresses in text into HTML links.
pub fn urlize(
    text: &str,
    trim_url_limit: Option<usize>,
    nofollow: bool,
    autoescape: bool,
) -> String {
    let mut output = String::with_capacity(text.len());
    let mut token = String::new();

    let flush_token = |output: &mut String, token: &mut String| {
        if !token.is_empty() {
            output.push_str(&urlize_word(token, trim_url_limit, nofollow, autoescape));
            token.clear();
        }
    };

    for ch in text.chars() {
        if ch.is_whitespace() {
            flush_token(&mut output, &mut token);
            output.push(ch);
        } else {
            token.push(ch);
        }
    }
    flush_token(&mut output, &mut token);
    mark_safe(output)
}

/// Render JSON safely inside a <script type="application/json"> element.
pub fn json_script(value: &Value, element_id: Option<&str>) -> String {
    let json = serde_json::to_string(value).unwrap_or_else(|_| "null".to_owned());
    let escaped = json
        .replace('&', "\\u0026")
        .replace('<', "\\u003C")
        .replace('>', "\\u003E");
    let script = match element_id {
        Some(element_id) => format!(
            "<script id=\"{}\" type=\"application/json\">{escaped}</script>",
            escape_impl(element_id)
        ),
        None => format!("<script type=\"application/json\">{escaped}</script>"),
    };
    mark_safe(script)
}

/// Percent-encode unsafe URL characters while preserving existing URL structure.
pub fn smart_urlquote(url: &str) -> String {
    let Some((scheme, rest)) = split_with_scheme(url) else {
        return quote_url_piece(url);
    };

    let netloc_end = rest.find(['/', '?', '#']).unwrap_or(rest.len());
    let netloc = &rest[..netloc_end];
    let remainder = &rest[netloc_end..];

    let (path, query, fragment) = {
        let fragment_index = remainder.find('#');
        let (before_fragment, fragment) = match fragment_index {
            Some(index) => (&remainder[..index], &remainder[index + 1..]),
            None => (remainder, ""),
        };
        let query_index = before_fragment.find('?');
        let (path, query) = match query_index {
            Some(index) => (&before_fragment[..index], &before_fragment[index + 1..]),
            None => (before_fragment, ""),
        };
        (path, query, fragment)
    };

    let encoded_netloc = quote_url_piece(netloc);
    let encoded_path = quote_url_piece(path);
    let encoded_query = if query.is_empty() {
        String::new()
    } else {
        let pairs = parse(query.as_bytes()).into_owned();
        let mut serializer = Serializer::new(String::new());
        serializer.extend_pairs(pairs);
        serializer.finish()
    };
    let encoded_fragment = quote_url_piece(fragment);

    let mut result = format!("{scheme}://{encoded_netloc}{encoded_path}");
    if !encoded_query.is_empty() || remainder.contains('?') {
        result.push('?');
        result.push_str(&encoded_query);
    }
    if !encoded_fragment.is_empty() || remainder.contains('#') {
        result.push('#');
        result.push_str(&encoded_fragment);
    }
    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use serde_json::json;

    mod test_utils_html {
        use super::*;

        #[test]
        fn test_escape() {
            let cases = [
                ("&", "&amp;"),
                ("<", "&lt;"),
                (">", "&gt;"),
                ("\"", "&quot;"),
                ("'", "&#x27;"),
                ("<&", "&lt;&amp;"),
            ];
            for (input, expected) in cases {
                assert_eq!(escape(input), expected);
            }
        }

        #[test]
        fn test_escape_repeated_values() {
            assert_eq!(escape("&&"), "&amp;&amp;");
            assert_eq!(escape("<<"), "&lt;&lt;");
            assert_eq!(escape("\"\""), "&quot;&quot;");
            assert_eq!(escape("''"), "&#x27;&#x27;");
            assert_eq!(escape("<><&"), "&lt;&gt;&lt;&amp;");
        }

        #[test]
        fn test_conditional_escape() {
            let plain = "<h1>interop</h1>";
            assert_eq!(conditional_escape(plain), "&lt;h1&gt;interop&lt;/h1&gt;");

            let safe = mark_safe(plain);
            assert_eq!(conditional_escape(&safe), "<h1>interop</h1>");
        }

        #[test]
        fn test_format_html() {
            let safe_bold = mark_safe("<b>safe</b>");
            let safe_italic = mark_safe("<i>safe again</i>");
            assert_eq!(
                format_html(
                    "{} {} {} {}",
                    &[
                        "< Dangerous >",
                        &safe_bold,
                        "< dangerous again",
                        &safe_italic
                    ]
                ),
                "&lt; Dangerous &gt; <b>safe</b> &lt; dangerous again <i>safe again</i>",
            );
            assert_eq!(
                format_html("<i>{}</i>", &["Adam & Eve"]),
                "<i>Adam &amp; Eve</i>"
            );
        }

        #[test]
        fn test_linebreaks() {
            assert_eq!(
                linebreaks("para1\n\npara2\r\rpara3"),
                "<p>para1</p>\n\n<p>para2</p>\n\n<p>para3</p>",
            );
            assert_eq!(
                linebreaks("para1\nsub1\rsub2\n\npara2"),
                "<p>para1<br>sub1<br>sub2</p>\n\n<p>para2</p>",
            );
            assert_eq!(
                linebreaks("para1\r\n\r\npara2\rsub1\r\rpara4"),
                "<p>para1</p>\n\n<p>para2<br>sub1</p>\n\n<p>para4</p>",
            );
            assert_eq!(
                linebreaks("para1\tmore\n\npara2"),
                "<p>para1\tmore</p>\n\n<p>para2</p>",
            );
        }

        #[test]
        fn test_linebreaksbr() {
            assert_eq!(linebreaksbr("a\r\nb\rc\nd"), "a<br>b<br>c<br>d");
        }

        #[test]
        fn test_strip_tags() {
            let cases = [
                (
                    "<p>See: &#39;&eacute; is an apostrophe followed by e acute</p>",
                    "See: &#39;&eacute; is an apostrophe followed by e acute",
                ),
                (
                    "<p>See: &#x27;&eacute; is an apostrophe followed by e acute</p>",
                    "See: &#x27;&eacute; is an apostrophe followed by e acute",
                ),
                ("<adf>a", "a"),
                ("</adf>a", "a"),
                ("<asdf><asdf>e", "e"),
                ("hi, <f x", "hi, <f x"),
                ("234<235, right?", "234<235, right?"),
                ("</fe", "</fe"),
                ("<x>b<y>", "b"),
                ("a<p onclick=\"alert('<test>')\">b</p>c", "abc"),
                ("a<p a >b</p>c", "abc"),
                ("d<a:b c:d>e</p>f", "def"),
                (
                    "<strong>foo</strong><a href=\"http://example.com\">bar</a>",
                    "foobar",
                ),
                ("&gotcha&#;<>", "&gotcha&#;<>"),
                ("<script>alert()</script>&h", "alert()&h"),
            ];
            for (input, expected) in cases {
                assert_eq!(strip_tags(input), expected, "input: {input}");
            }
        }

        #[test]
        fn test_json_script() {
            assert_eq!(
                json_script(&json!("&<>"), Some("test_id")),
                "<script id=\"test_id\" type=\"application/json\">\"\\u0026\\u003C\\u003E\"</script>",
            );
            assert_eq!(
                json_script(&json!({"a": "<script>test&ing</script>"}), Some("test_id")),
                "<script id=\"test_id\" type=\"application/json\">{\"a\":\"\\u003Cscript\\u003Etest\\u0026ing\\u003C/script\\u003E\"}</script>",
            );
        }

        #[test]
        fn test_json_script_without_id() {
            assert_eq!(
                json_script(&json!({"key": "value"}), None),
                "<script type=\"application/json\">{\"key\":\"value\"}</script>",
            );
        }

        #[test]
        fn test_smart_urlquote() {
            let cases = [
                ("http://öäü.com/", "http://%C3%B6%C3%A4%C3%BC.com/"),
                (
                    "http://öäü.com/öäü/",
                    "http://%C3%B6%C3%A4%C3%BC.com/%C3%B6%C3%A4%C3%BC/",
                ),
                (
                    "http://example.com/path/öäü/",
                    "http://example.com/path/%C3%B6%C3%A4%C3%BC/",
                ),
                (
                    "http://example.com/%C3%B6/ä/",
                    "http://example.com/%C3%B6/%C3%A4/",
                ),
                (
                    "http://example.com/?x=1&y=2+3&z=",
                    "http://example.com/?x=1&y=2+3&z=",
                ),
                (
                    "http://example.com/?x=<>\"'",
                    "http://example.com/?x=%3C%3E%22%27",
                ),
                ("http://[fd00::1]/", "http://[fd00::1]/"),
            ];
            for (input, expected) in cases {
                assert_eq!(smart_urlquote(input), expected, "input: {input}");
            }
        }

        #[test]
        fn test_urlize() {
            assert_eq!(
                urlize("Search for google.com/?q=! and see.", None, false, false),
                "Search for <a href=\"https://google.com/?q=\">google.com/?q=</a>! and see.",
            );
            assert_eq!(
                urlize(
                    "Search for google.com/?q=1&lt! and see.",
                    None,
                    false,
                    false
                ),
                "Search for <a href=\"https://google.com/?q=1%3C\">google.com/?q=1&lt</a>! and see.",
            );
            assert_eq!(
                urlize("Visit example.com", None, false, false),
                "Visit <a href=\"https://example.com\">example.com</a>",
            );
            assert_eq!(
                urlize("http://www.foo.bar/", None, false, false),
                "<a href=\"http://www.foo.bar/\">http://www.foo.bar/</a>",
            );
            assert_eq!(
                urlize("host.djangoproject.com", None, false, false),
                "<a href=\"https://host.djangoproject.com\">host.djangoproject.com</a>",
            );
        }

        #[test]
        fn test_urlize_unicode_domain() {
            assert_eq!(
                urlize("Look on www.نامه‌ای.com.", None, false, false),
                "Look on <a href=\"https://www.%D9%86%D8%A7%D9%85%D9%87%E2%80%8C%D8%A7%DB%8C.com\">www.نامه‌ای.com</a>.",
            );
        }

        #[test]
        fn test_urlize_email() {
            assert_eq!(
                urlize("foo@example.com", None, false, false),
                "<a href=\"mailto:foo@example.com\">foo@example.com</a>",
            );
            assert_eq!(
                urlize("yes+this=is&a%valid!email@example.com", None, false, false),
                "<a href=\"mailto:yes%2Bthis%3Dis%26a%25valid%21email@example.com\">yes+this=is&a%valid!email@example.com</a>",
            );
            assert_eq!(
                urlize("foo@faß.example.com", None, false, false),
                "<a href=\"mailto:foo@fa%C3%9F.example.com\">foo@faß.example.com</a>",
            );
            assert_eq!(
                urlize("idna-2008@މިހާރު.example.mv", None, false, false),
                "<a href=\"mailto:idna-2008@%DE%89%DE%A8%DE%80%DE%A7%DE%83%DE%AA.example.mv\">idna-2008@މިހާރު.example.mv</a>",
            );
        }

        #[test]
        fn test_urlize_trim_url_limit() {
            assert_eq!(
                urlize(
                    "Visit https://example.com/very/long/path",
                    Some(12),
                    false,
                    false
                ),
                "Visit <a href=\"https://example.com/very/long/path\">https://exa…</a>",
            );
        }

        #[test]
        fn test_urlize_nofollow() {
            assert_eq!(
                urlize("Visit example.com", None, true, false),
                "Visit <a href=\"https://example.com\" rel=\"nofollow\">example.com</a>",
            );
        }

        #[test]
        fn test_urlize_autoescape() {
            assert_eq!(
                urlize("Use <google.com>", None, false, true),
                "Use &lt;google.com&gt;",
            );
        }

        #[test]
        fn test_urlize_unchanged_inputs() {
            let cases = [
                "foo@",
                "@foo.com",
                "foo@.example.com",
                "foo@localhost",
                "foo@localhost.",
                "test@example?;+!.com",
                "email me@example.com,then I'll respond",
                "[a link](https://www.djangoproject.com/)",
            ];
            for input in cases {
                assert_eq!(urlize(input, None, false, false), input, "input: {input}");
            }
        }

        const HTML_SPECIAL_CHARS: &[(char, &str)] = &[
            ('&', "&amp;"),
            ('<', "&lt;"),
            ('>', "&gt;"),
            ('\"', "&quot;"),
            ('\'', "&#x27;"),
        ];

        #[test]
        fn test_escape_patterns() {
            let patterns = ["{}", "asdf{}fdsa", "{}1", "1{}b"];
            for (raw, escaped) in HTML_SPECIAL_CHARS {
                for pattern in patterns {
                    let input = pattern.replace("{}", &raw.to_string());
                    let expected = pattern.replace("{}", escaped);
                    assert_eq!(escape(&input), expected);
                }
            }
        }

        #[test]
        fn test_format_html_literal_braces() {
            assert_eq!(
                format_html("<span>{{{}}}</span>", &["<x>"]),
                "<span>{&lt;x&gt;}</span>"
            );
        }

        #[test]
        fn test_escapejs_escapes_javascript_unsafe_characters() {
            assert_eq!(
                escapejs("double \"quotes\" \\ and 'single quotes'"),
                "double \\u0022quotes\\u0022 \\u005C and \\u0027single quotes\\u0027"
            );
            assert_eq!(
                escapejs("<script>alert(`x` & y);</script>"),
                "\\u003Cscript\\u003Ealert(\\u0060x\\u0060 \\u0026 y)\\u003B\\u003C/script\\u003E"
            );
        }

        #[test]
        fn test_escapejs_escapes_control_and_separator_characters() {
            assert_eq!(
                escapejs("\u{0000}\u{001F}\n\r\t\u{2028}\u{2029}"),
                "\\u0000\\u001F\\u000A\\u000D\\u0009\\u2028\\u2029"
            );
            assert_eq!(
                escapejs("and lots of whitespace: \r\n\t\u{000B}\u{000C}\u{0008}"),
                "and lots of whitespace: \\u000D\\u000A\\u0009\\u000B\\u000C\\u0008"
            );
            assert_eq!(
                escapejs("paragraph separator:\u{2029}and line separator:\u{2028}"),
                "paragraph separator:\\u2029and line separator:\\u2028"
            );
            assert_eq!(escapejs("`"), "\\u0060");
        }

        #[test]
        fn test_format_html_join_escapes_separator_and_marks_safe() {
            let first = format_html("<span>{}</span>", &["<x>"]);
            let second = format_html("<span>{}</span>", &[&mark_safe("<b>safe</b>")]);
            let joined = format_html_join(" & ", &[&first, &second]);

            assert_eq!(
                joined,
                "<span>&lt;x&gt;</span> &amp; <span><b>safe</b></span>"
            );
            assert_eq!(conditional_escape(&joined), joined);
        }

        #[test]
        fn test_format_html_join_empty_input_returns_safe_empty_string() {
            let joined = format_html_join("<ignored>", &[]);

            assert_eq!(joined, "");
            assert_eq!(conditional_escape(&joined), joined);
        }

        #[test]
        fn test_strip_spaces_between_tags() {
            assert_eq!(
                strip_spaces_between_tags("<p>foo</p>   \n\t <p>bar</p>"),
                "<p>foo</p><p>bar</p>"
            );
            assert_eq!(
                strip_spaces_between_tags(" <p>foo</p>   <p>bar</p> "),
                " <p>foo</p><p>bar</p> "
            );
            assert_eq!(
                strip_spaces_between_tags("<p>foo</p> text <p>bar</p>"),
                "<p>foo</p> text <p>bar</p>"
            );
            assert_eq!(strip_spaces_between_tags(" <adf>"), " <adf>");
            assert_eq!(strip_spaces_between_tags("<adf> "), "<adf> ");
            assert_eq!(strip_spaces_between_tags(" </adf> "), " </adf> ");
            assert_eq!(strip_spaces_between_tags(" <f> x</f>"), " <f> x</f>");
            assert_eq!(strip_spaces_between_tags("<d> </d>"), "<d></d>");
            assert_eq!(
                strip_spaces_between_tags("<p>hello </p>\n<p> world</p>"),
                "<p>hello </p><p> world</p>"
            );
            assert_eq!(
                strip_spaces_between_tags("\n<p>\t</p>\n<p> </p>\n"),
                "\n<p></p><p></p>\n"
            );
        }

        #[test]
        fn test_linebreaks_empty_segments_are_ignored() {
            assert_eq!(linebreaks("\n\npara\n\n"), "<p>para</p>");
        }

        #[test]
        fn test_strip_tags_preserves_plain_angle_brackets() {
            assert_eq!(strip_tags("a4<a5 right?"), "a4<a5 right?");
            assert_eq!(strip_tags("b7>b2!"), "b7>b2!");
        }

        #[test]
        fn test_strip_tags_comments() {
            assert_eq!(strip_tags("Hello<!-- hidden -->world"), "Helloworld");
        }

        #[test]
        fn test_strip_tags_files() {
            let fixtures = [
                (
                    include_str!("../../../django/tests/utils_tests/files/strip_tags1.html"),
                    &["<center>", "<morbi>"] as &[&str],
                ),
                (
                    include_str!("../../../django/tests/utils_tests/files/strip_tags2.txt"),
                    &["<!DOCTYPE html>", "<html>", "<script "] as &[&str],
                ),
            ];

            for (fixture, removed_fragments) in fixtures {
                let stripped = strip_tags(fixture);
                assert!(
                    stripped.contains("Test string that has not been stripped."),
                    "fixture lost sentinel text"
                );
                assert!(
                    stripped.len() < fixture.len(),
                    "fixture was not meaningfully reduced"
                );
                for fragment in removed_fragments {
                    assert!(
                        !stripped.contains(fragment),
                        "fixture retained {fragment:?} after stripping"
                    );
                }
            }
        }

        #[test]
        fn test_json_script_escapes_script_boundary() {
            assert_eq!(
                json_script(
                    &json!({"html": "</script><script>alert(1)</script>"}),
                    Some("x")
                ),
                "<script id=\"x\" type=\"application/json\">{\"html\":\"\\u003C/script\\u003E\\u003Cscript\\u003Ealert(1)\\u003C/script\\u003E\"}</script>",
            );
        }

        #[test]
        fn test_smart_urlquote_existing_encoding_and_nested_url() {
            assert_eq!(
                smart_urlquote("http://example.com/?q=http://example.com/?x=1%26q=django"),
                "http://example.com/?q=http%3A%2F%2Fexample.com%2F%3Fx%3D1%26q%3Ddjango",
            );
        }

        #[test]
        fn test_smart_urlquote_quotes_unsafe_host_text() {
            assert_eq!(
                smart_urlquote("http://.www.f oo.bar/"),
                "http://.www.f%20oo.bar/",
            );
            assert_eq!(
                smart_urlquote("http://example.com\">"),
                "http://example.com%22%3E",
            );
        }

        #[test]
        fn test_urlize_preserves_mailto_without_nofollow() {
            assert_eq!(
                urlize("foo@example.com", None, true, false),
                "<a href=\"mailto:foo@example.com\">foo@example.com</a>",
            );
        }

        #[test]
        fn test_urlize_handles_parenthesized_links() {
            assert_eq!(
                urlize("(example.com)", None, false, false),
                "(<a href=\"https://example.com\">example.com</a>)",
            );
        }
    }
}