Skip to main content

anodizer_core/
url.rs

1//! URL encoding helpers.
2//!
3//! `percent_encode_unreserved` implements the RFC 3986 unreserved-character
4//! set used by OAuth 1.0 signatures and generic URL path segments. Prior
5//! duplicates lived in `stage-announce/src/twitter.rs` and
6//! `cli/src/commands/release/milestones.rs` with byte-equivalent but
7//! independently-defined character sets.
8
9use percent_encoding::{AsciiSet, CONTROLS, NON_ALPHANUMERIC, utf8_percent_encode};
10
11/// RFC 3986 unreserved set: `A-Z a-z 0-9 - _ . ~`. Every other byte is
12/// encoded as `%XX`.
13const UNRESERVED: &AsciiSet = &CONTROLS
14    .add(b' ')
15    .add(b'!')
16    .add(b'"')
17    .add(b'#')
18    .add(b'$')
19    .add(b'%')
20    .add(b'&')
21    .add(b'\'')
22    .add(b'(')
23    .add(b')')
24    .add(b'*')
25    .add(b'+')
26    .add(b',')
27    .add(b'/')
28    .add(b':')
29    .add(b';')
30    .add(b'<')
31    .add(b'=')
32    .add(b'>')
33    .add(b'?')
34    .add(b'@')
35    .add(b'[')
36    .add(b'\\')
37    .add(b']')
38    .add(b'^')
39    .add(b'`')
40    .add(b'{')
41    .add(b'|')
42    .add(b'}');
43
44/// Percent-encode every byte that isn't in the RFC 3986 unreserved set
45/// (`A-Z a-z 0-9 - _ . ~`). Used for OAuth 1.0 signature base strings and
46/// generic URL path/query segments where only unreserved chars pass through.
47pub fn percent_encode_unreserved(s: &str) -> String {
48    utf8_percent_encode(s, UNRESERVED).to_string()
49}
50
51/// Encode set for a single URL path segment: everything that isn't alphanumeric
52/// or one of `- _ .` is percent-encoded. Notably `+`, `#`, `?`, `/`, space, and
53/// all other reserved characters are encoded — safe for tag names, owner/repo
54/// names, file names, and GitLab project-id path segments (where `/` must
55/// become `%2F`).
56const PATH_SEGMENT: &AsciiSet = &NON_ALPHANUMERIC.remove(b'-').remove(b'_').remove(b'.');
57
58/// Percent-encode a single URL path segment.
59///
60/// Keeps only `A-Z a-z 0-9 - _ .`. Used for tags, owner/repo names, package
61/// names, versions, and file names in release backend URLs so that identifiers
62/// like `v1.0.0+build.1` or `group/project` are safely encoded (`+` → `%2B`,
63/// `/` → `%2F`). Unifies previously-duplicated sets in the GitHub/GitLab/Gitea
64/// release backends that produced diverging URLs for the same tag.
65pub fn percent_encode_path_segment(s: &str) -> String {
66    utf8_percent_encode(s, PATH_SEGMENT).to_string()
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    #[test]
74    fn unreserved_passes_through() {
75        assert_eq!(percent_encode_unreserved("hello"), "hello");
76        assert_eq!(percent_encode_unreserved("A-Za-z0-9-_.~"), "A-Za-z0-9-_.~");
77    }
78
79    #[test]
80    fn space_and_specials_encoded() {
81        assert_eq!(percent_encode_unreserved("hello world"), "hello%20world");
82        assert_eq!(percent_encode_unreserved("a=b&c=d"), "a%3Db%26c%3Dd");
83    }
84
85    #[test]
86    fn slashes_encoded() {
87        assert_eq!(percent_encode_unreserved("a/b/c"), "a%2Fb%2Fc");
88    }
89
90    #[test]
91    fn utf8_encoded_per_byte() {
92        // é = 0xC3 0xA9 in UTF-8
93        assert_eq!(percent_encode_unreserved("café"), "caf%C3%A9");
94    }
95}