#[must_use]
pub fn url_encode(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for b in s.bytes() {
if matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~') {
out.push(b as char);
} else {
out.push_str(&format!("%{b:02X}"));
}
}
out
}
#[must_use]
pub fn url_decode(s: &str) -> String {
let bytes = s.as_bytes();
let mut out = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == b'%' && i + 2 < bytes.len() {
let hex = std::str::from_utf8(&bytes[i + 1..i + 3]).unwrap_or("");
if let Ok(b) = u8::from_str_radix(hex, 16) {
out.push(b);
i += 3;
continue;
}
}
out.push(if bytes[i] == b'+' { b' ' } else { bytes[i] });
i += 1;
}
String::from_utf8_lossy(&out).into_owned()
}
#[must_use]
pub fn iri_to_uri(iri: &str) -> String {
let mut out = String::with_capacity(iri.len());
for byte in iri.bytes() {
let safe = matches!(
byte,
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9'
| b'-' | b'_' | b'.' | b'~'
| b'/' | b':' | b'?' | b'#' | b'[' | b']' | b'@'
| b'!' | b'$' | b'&' | b'\'' | b'(' | b')'
| b'*' | b'+' | b',' | b';' | b'=' | b'%'
);
if safe {
out.push(byte as char);
} else {
use std::fmt::Write as _;
let _ = write!(out, "%{byte:02X}");
}
}
out
}
#[must_use]
pub fn uri_to_iri(uri: &str) -> String {
let bytes = uri.as_bytes();
let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] != b'%' {
out.push(bytes[i]);
i += 1;
continue;
}
let start = i;
let mut run: Vec<u8> = Vec::with_capacity(4);
while i + 2 < bytes.len() + 1 && i < bytes.len() && bytes[i] == b'%' {
if i + 2 >= bytes.len() {
break;
}
let h1 = (bytes[i + 1] as char).to_digit(16);
let h2 = (bytes[i + 2] as char).to_digit(16);
match (h1, h2) {
(Some(a), Some(b)) => {
run.push((a * 16 + b) as u8);
i += 3;
}
_ => break,
}
}
if run.is_empty() {
out.push(bytes[start]);
i = start + 1;
continue;
}
match std::str::from_utf8(&run) {
Ok(decoded) => {
let mut run_idx = 0;
for ch in decoded.chars() {
let utf8_len = ch.len_utf8();
if is_uri_reserved(ch) {
for &byte in &run[run_idx..run_idx + utf8_len] {
use std::fmt::Write as _;
let mut buf = String::with_capacity(3);
let _ = write!(buf, "%{byte:02X}");
out.extend_from_slice(buf.as_bytes());
}
} else {
let mut buf = [0u8; 4];
let encoded = ch.encode_utf8(&mut buf);
out.extend_from_slice(encoded.as_bytes());
}
run_idx += utf8_len;
}
}
Err(_) => {
out.extend_from_slice(&bytes[start..i]);
}
}
}
String::from_utf8(out).unwrap_or_default()
}
fn is_uri_reserved(ch: char) -> bool {
matches!(
ch,
':' | '/'
| '?'
| '#'
| '['
| ']'
| '@'
| '!'
| '$'
| '&'
| '\''
| '('
| ')'
| '*'
| '+'
| ','
| ';'
| '='
)
}
#[must_use]
pub fn escape_uri_path(path: &str) -> String {
let mut out = String::with_capacity(path.len());
for byte in path.bytes() {
let safe = matches!(
byte,
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9'
| b'-' | b'_' | b'.' | b'~'
| b'/' | b':' | b'@'
| b'!' | b'$' | b'&' | b'\'' | b'(' | b')'
| b'*' | b'+' | b',' | b';' | b'='
);
if safe {
out.push(byte as char);
} else {
use std::fmt::Write as _;
let _ = write!(out, "%{byte:02X}");
}
}
out
}
#[must_use]
pub fn filepath_to_uri(path: &str) -> String {
let normalized = path.replace('\\', "/");
let mut out = String::with_capacity(normalized.len());
for byte in normalized.bytes() {
let safe = matches!(
byte,
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9'
| b'-' | b'_' | b'.' | b'~'
| b'/' | b'!' | b'*' | b'(' | b')' | b'\''
);
if safe {
out.push(byte as char);
} else {
use std::fmt::Write as _;
let _ = write!(out, "%{byte:02X}");
}
}
out
}
#[must_use]
pub fn urlsafe_base64_encode(bytes: &[u8]) -> String {
use base64::Engine;
base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(bytes)
}
#[must_use]
pub fn urlsafe_base64_decode(s: &str) -> Option<Vec<u8>> {
use base64::Engine;
let trimmed = s.trim_end_matches('=');
base64::engine::general_purpose::URL_SAFE_NO_PAD
.decode(trimmed)
.ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn plain_text_passes_through() {
assert_eq!(url_decode("hello"), "hello");
}
#[test]
fn empty_string_yields_empty() {
assert_eq!(url_decode(""), "");
}
#[test]
fn percent_20_becomes_space() {
assert_eq!(url_decode("hello%20world"), "hello world");
}
#[test]
fn plus_becomes_space() {
assert_eq!(url_decode("hello+world"), "hello world");
}
#[test]
fn percent_2b_decodes_to_literal_plus() {
assert_eq!(url_decode("a%2Bb"), "a+b");
}
#[test]
fn mixed_plus_and_percent() {
assert_eq!(url_decode("hello+world%21"), "hello world!");
}
#[test]
fn mixed_case_hex_accepted() {
assert_eq!(url_decode("%2A%2a%2F%2f"), "**//");
}
#[test]
fn unicode_via_utf8_bytes() {
assert_eq!(url_decode("caf%C3%A9"), "café");
}
#[test]
fn malformed_percent_kept_as_literal() {
assert_eq!(url_decode("a%2Xb"), "a%2Xb");
}
#[test]
fn malformed_non_hex_first_digit() {
assert_eq!(url_decode("a%XYb"), "a%XYb");
}
#[test]
fn trailing_percent_kept_as_literal() {
assert_eq!(url_decode("foo%"), "foo%");
assert_eq!(url_decode("foo%2"), "foo%2");
}
#[test]
fn invalid_utf8_is_replaced_not_dropped() {
let got = url_decode("hello%C3");
assert!(got.starts_with("hello"), "got: {got:?}");
assert!(
got.contains("%C3") || got.contains('\u{FFFD}'),
"got: {got:?}"
);
}
#[test]
fn invalid_utf8_in_middle_keeps_well_formed_tail() {
let got = url_decode("a%C3%28b");
assert!(got.starts_with('a'), "got: {got:?}");
assert!(got.ends_with('b'), "got: {got:?}");
assert!(
got.contains('\u{FFFD}'),
"expected replacement char, got: {got:?}"
);
}
#[test]
fn no_panic_on_arbitrary_input() {
for s in ["%", "%%", "%%%", "+%", "%+", "+%2", "%2+"] {
let _ = url_decode(s);
}
}
#[test]
fn dollar_amp_equal_unchanged() {
assert_eq!(url_decode("a=b&c=d"), "a=b&c=d");
}
#[test]
fn url_encode_unreserved_pass_through() {
assert_eq!(url_encode("plain"), "plain");
assert_eq!(url_encode("foo-bar.baz_~"), "foo-bar.baz_~");
assert_eq!(url_encode("AaZz09"), "AaZz09");
}
#[test]
fn url_encode_reserved_chars_percent_encoded() {
assert_eq!(url_encode("hello world"), "hello%20world");
assert_eq!(url_encode("a&b=c"), "a%26b%3Dc");
assert_eq!(url_encode("?#"), "%3F%23");
}
#[test]
fn url_encode_decode_round_trip() {
for input in [
"plain",
"hello world",
"a&b=c",
"café", "100%off", "x_y-z.0", "?#&=+/!", ] {
let encoded = url_encode(input);
let decoded = url_decode(&encoded);
assert_eq!(decoded, input, "round-trip failed on `{input}`");
}
}
#[test]
fn urlsafe_b64_encode_matches_django_examples() {
assert_eq!(urlsafe_base64_encode(b"foo"), "Zm9v");
assert_eq!(urlsafe_base64_encode(b"foobar"), "Zm9vYmFy");
assert_eq!(urlsafe_base64_encode(b""), "");
}
#[test]
fn urlsafe_b64_encode_drops_padding() {
let encoded = urlsafe_base64_encode(b"f");
assert_eq!(encoded, "Zg");
assert!(!encoded.contains('='));
}
#[test]
fn urlsafe_b64_encode_uses_url_safe_alphabet() {
let encoded = urlsafe_base64_encode(&[0xfb, 0xff]);
assert_eq!(encoded, "-_8");
assert!(!encoded.contains('+'));
assert!(!encoded.contains('/'));
}
#[test]
fn urlsafe_b64_decode_simple() {
assert_eq!(urlsafe_base64_decode("Zm9v").as_deref(), Some(&b"foo"[..]));
}
#[test]
fn urlsafe_b64_decode_accepts_padding_for_django_compat() {
assert_eq!(
urlsafe_base64_decode("Zm9v====").as_deref(),
Some(&b"foo"[..])
);
assert_eq!(urlsafe_base64_decode("Zg==").as_deref(), Some(&b"f"[..]));
}
#[test]
fn urlsafe_b64_decode_rejects_standard_b64_chars() {
assert!(urlsafe_base64_decode("a+b/c").is_none());
}
#[test]
fn urlsafe_b64_decode_rejects_garbage() {
assert!(urlsafe_base64_decode("!@#$%").is_none());
assert!(urlsafe_base64_decode("hello\n").is_none()); }
#[test]
fn urlsafe_b64_decode_empty_is_empty_vec() {
assert_eq!(urlsafe_base64_decode("").as_deref(), Some(&[][..]));
}
#[test]
fn iri_to_uri_ascii_passes_through() {
assert_eq!(iri_to_uri("/path/here"), "/path/here");
assert_eq!(iri_to_uri("plain-text_value.1~"), "plain-text_value.1~");
}
#[test]
fn iri_to_uri_encodes_non_ascii_utf8() {
assert_eq!(iri_to_uri("/café"), "/caf%C3%A9");
}
#[test]
fn iri_to_uri_preserves_reserved_syntax_chars() {
assert_eq!(
iri_to_uri("/path?q=hello&page=1#frag"),
"/path?q=hello&page=1#frag"
);
assert_eq!(
iri_to_uri("scheme://user@host:8080/p"),
"scheme://user@host:8080/p"
);
}
#[test]
fn iri_to_uri_preserves_existing_percent_encoded() {
assert_eq!(iri_to_uri("/already%20encoded"), "/already%20encoded");
}
#[test]
fn iri_to_uri_encodes_space_and_control_chars() {
assert_eq!(iri_to_uri("a b"), "a%20b");
assert_eq!(iri_to_uri("a\nb"), "a%0Ab");
}
#[test]
fn iri_to_uri_handles_full_unicode_range() {
let out = iri_to_uri("/😀");
assert_eq!(out, "/%F0%9F%98%80");
}
#[test]
fn iri_to_uri_empty_is_empty() {
assert_eq!(iri_to_uri(""), "");
}
#[test]
fn escape_uri_path_preserves_slashes() {
assert_eq!(escape_uri_path("/a/b/c"), "/a/b/c");
}
#[test]
fn escape_uri_path_encodes_spaces() {
assert_eq!(escape_uri_path("/a path"), "/a%20path");
}
#[test]
fn escape_uri_path_encodes_non_ascii() {
assert_eq!(escape_uri_path("/café"), "/caf%C3%A9");
}
#[test]
fn escape_uri_path_encodes_query_and_fragment_chars() {
assert_eq!(escape_uri_path("/with?query"), "/with%3Fquery");
assert_eq!(escape_uri_path("/with#frag"), "/with%23frag");
}
#[test]
fn escape_uri_path_encodes_percent_sign() {
assert_eq!(escape_uri_path("/100%"), "/100%25");
}
#[test]
fn escape_uri_path_preserves_sub_delims_and_colon_at() {
assert_eq!(escape_uri_path("/a:b@c"), "/a:b@c");
assert_eq!(escape_uri_path("/a!b$c&d'e(f)g"), "/a!b$c&d'e(f)g");
}
#[test]
fn escape_uri_path_empty() {
assert_eq!(escape_uri_path(""), "");
}
#[test]
fn urlsafe_b64_round_trip_for_random_bytes() {
let mut input = Vec::with_capacity(256);
for b in 0u8..=255 {
input.push(b);
}
let encoded = urlsafe_base64_encode(&input);
let decoded = urlsafe_base64_decode(&encoded).expect("round-trip");
assert_eq!(decoded, input);
}
#[test]
fn uri_to_iri_decodes_non_ascii_utf8() {
assert_eq!(uri_to_iri("/caf%C3%A9"), "/café");
assert_eq!(uri_to_iri("/%E4%B8%AD%E6%96%87"), "/中文");
}
#[test]
fn uri_to_iri_keeps_reserved_chars_encoded() {
assert_eq!(uri_to_iri("/a%2Fb"), "/a%2Fb");
assert_eq!(uri_to_iri("/q%3Fk%3Dv%26"), "/q%3Fk%3Dv%26");
}
#[test]
fn uri_to_iri_decodes_non_reserved_ascii() {
assert_eq!(uri_to_iri("/with%20space"), "/with space");
assert_eq!(uri_to_iri("/foo%5Fbar"), "/foo_bar");
}
#[test]
fn uri_to_iri_passes_already_decoded_through() {
assert_eq!(uri_to_iri("/plain/path"), "/plain/path");
assert_eq!(uri_to_iri(""), "");
}
#[test]
fn uri_to_iri_mixed_reserved_and_unicode() {
assert_eq!(uri_to_iri("/caf%C3%A9/a%2Fb"), "/café/a%2Fb");
}
#[test]
fn uri_to_iri_invalid_utf8_stays_encoded() {
assert_eq!(uri_to_iri("/x%FFy"), "/x%FFy");
}
#[test]
fn uri_to_iri_malformed_percent_passes_through() {
assert_eq!(uri_to_iri("100%off"), "100%off");
assert_eq!(uri_to_iri("x%"), "x%");
}
#[test]
fn iri_to_uri_then_uri_to_iri_round_trip_for_unicode() {
let original = "/café";
let encoded = iri_to_uri(original);
let decoded = uri_to_iri(&encoded);
assert_eq!(decoded, original);
}
#[test]
fn filepath_to_uri_plain_path_passes_through() {
assert_eq!(
filepath_to_uri("/static/css/main.css"),
"/static/css/main.css"
);
assert_eq!(filepath_to_uri(""), "");
}
#[test]
fn filepath_to_uri_encodes_spaces() {
assert_eq!(
filepath_to_uri("/static/My File.png"),
"/static/My%20File.png"
);
}
#[test]
fn filepath_to_uri_encodes_non_ascii() {
assert_eq!(filepath_to_uri("/café/menu.html"), "/caf%C3%A9/menu.html");
}
#[test]
fn filepath_to_uri_normalizes_backslash_to_forward_slash() {
assert_eq!(filepath_to_uri("C:\\static\\app.js"), "C%3A/static/app.js");
assert_eq!(filepath_to_uri("a\\b\\c"), "a/b/c");
}
#[test]
fn filepath_to_uri_keeps_safe_set_chars() {
assert_eq!(filepath_to_uri("/a~b!c(d)e'f*g"), "/a~b!c(d)e'f*g");
assert_eq!(filepath_to_uri("a-b_c.d"), "a-b_c.d");
}
#[test]
fn filepath_to_uri_encodes_url_syntactic_chars() {
assert_eq!(filepath_to_uri("/x?y#z"), "/x%3Fy%23z");
assert_eq!(filepath_to_uri("/[bracket]"), "/%5Bbracket%5D");
assert_eq!(filepath_to_uri("a:b"), "a%3Ab");
assert_eq!(filepath_to_uri("a&b"), "a%26b");
}
#[test]
fn filepath_to_uri_distinct_from_escape_uri_path_on_colon() {
assert_eq!(filepath_to_uri("a:b"), "a%3Ab");
assert_eq!(escape_uri_path("a:b"), "a:b");
assert_eq!(filepath_to_uri("a&b=c"), "a%26b%3Dc");
assert_eq!(escape_uri_path("a&b=c"), "a&b=c");
}
}