anodizer_core/url.rs
1//! URL encoding helpers.
2//!
3//! `percent_encode_unreserved` implements the RFC 3986 unreserved-character
4//! set used by OAuth 1.0 signatures and generic URL path segments. Prior
5//! duplicates lived in `stage-announce/src/twitter.rs` and
6//! `cli/src/commands/release/milestones.rs` with byte-equivalent but
7//! independently-defined character sets.
8
9use percent_encoding::{AsciiSet, CONTROLS, NON_ALPHANUMERIC, utf8_percent_encode};
10
11/// RFC 3986 unreserved set: `A-Z a-z 0-9 - _ . ~`. Every other byte is
12/// encoded as `%XX`.
13const UNRESERVED: &AsciiSet = &CONTROLS
14 .add(b' ')
15 .add(b'!')
16 .add(b'"')
17 .add(b'#')
18 .add(b'$')
19 .add(b'%')
20 .add(b'&')
21 .add(b'\'')
22 .add(b'(')
23 .add(b')')
24 .add(b'*')
25 .add(b'+')
26 .add(b',')
27 .add(b'/')
28 .add(b':')
29 .add(b';')
30 .add(b'<')
31 .add(b'=')
32 .add(b'>')
33 .add(b'?')
34 .add(b'@')
35 .add(b'[')
36 .add(b'\\')
37 .add(b']')
38 .add(b'^')
39 .add(b'`')
40 .add(b'{')
41 .add(b'|')
42 .add(b'}');
43
44/// Percent-encode every byte that isn't in the RFC 3986 unreserved set
45/// (`A-Z a-z 0-9 - _ . ~`). Used for OAuth 1.0 signature base strings and
46/// generic URL path/query segments where only unreserved chars pass through.
47pub fn percent_encode_unreserved(s: &str) -> String {
48 utf8_percent_encode(s, UNRESERVED).to_string()
49}
50
51/// Encode set for a single URL path segment: everything that isn't alphanumeric
52/// or one of `- _ .` is percent-encoded. Notably `+`, `#`, `?`, `/`, space, and
53/// all other reserved characters are encoded — safe for tag names, owner/repo
54/// names, file names, and GitLab project-id path segments (where `/` must
55/// become `%2F`).
56const PATH_SEGMENT: &AsciiSet = &NON_ALPHANUMERIC.remove(b'-').remove(b'_').remove(b'.');
57
58/// Percent-encode a single URL path segment.
59///
60/// Keeps only `A-Z a-z 0-9 - _ .`. Used for tags, owner/repo names, package
61/// names, versions, and file names in release backend URLs so that identifiers
62/// like `v1.0.0+build.1` or `group/project` are safely encoded (`+` → `%2B`,
63/// `/` → `%2F`). Unifies previously-duplicated sets in the GitHub/GitLab/Gitea
64/// release backends that produced diverging URLs for the same tag.
65pub fn percent_encode_path_segment(s: &str) -> String {
66 utf8_percent_encode(s, PATH_SEGMENT).to_string()
67}
68
69#[cfg(test)]
70mod tests {
71 use super::*;
72
73 #[test]
74 fn unreserved_passes_through() {
75 assert_eq!(percent_encode_unreserved("hello"), "hello");
76 assert_eq!(percent_encode_unreserved("A-Za-z0-9-_.~"), "A-Za-z0-9-_.~");
77 }
78
79 #[test]
80 fn space_and_specials_encoded() {
81 assert_eq!(percent_encode_unreserved("hello world"), "hello%20world");
82 assert_eq!(percent_encode_unreserved("a=b&c=d"), "a%3Db%26c%3Dd");
83 }
84
85 #[test]
86 fn slashes_encoded() {
87 assert_eq!(percent_encode_unreserved("a/b/c"), "a%2Fb%2Fc");
88 }
89
90 #[test]
91 fn utf8_encoded_per_byte() {
92 // é = 0xC3 0xA9 in UTF-8
93 assert_eq!(percent_encode_unreserved("café"), "caf%C3%A9");
94 }
95}