breezyshim/
urlutils.rs

1//! URL manipulation utilities.
2use pyo3::prelude::*;
3
4/// Join segment parameters to a URL.
5///
6/// This function adds the specified parameters to a URL as segment parameters.
7///
8/// # Parameters
9///
10/// * `url` - The URL to add parameters to.
11/// * `parameters` - The parameters to add to the URL.
12///
13/// # Returns
14///
15/// A new URL with the specified parameters added.
16pub fn join_segment_parameters(
17    url: &url::Url,
18    parameters: std::collections::HashMap<String, String>,
19) -> url::Url {
20    pyo3::Python::attach(|py| {
21        let urlutils = py.import("breezy.urlutils").unwrap();
22        urlutils
23            .call_method1("join_segment_parameters", (url.to_string(), parameters))
24            .unwrap()
25            .extract::<String>()
26            .map(|s| url::Url::parse(s.as_str()).unwrap())
27            .unwrap()
28    })
29}
30
31/// Split segment parameters from a URL.
32///
33/// This function extracts segment parameters from a URL.
34///
35/// # Parameters
36///
37/// * `url` - The URL to extract parameters from.
38///
39/// # Returns
40///
41/// A tuple containing the URL without parameters and a map of the parameters.
42pub fn split_segment_parameters(
43    url: &url::Url,
44) -> (url::Url, std::collections::HashMap<String, String>) {
45    pyo3::Python::attach(|py| {
46        let urlutils = py.import("breezy.urlutils").unwrap();
47        urlutils
48            .call_method1("split_segment_parameters", (url.to_string(),))
49            .unwrap()
50            .extract::<(String, std::collections::HashMap<String, String>)>()
51            .map(|(s, m)| (url::Url::parse(s.as_str()).unwrap(), m))
52            .unwrap()
53    })
54}
55
56fn char_is_safe(c: char) -> bool {
57    c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' || c == '~'
58}
59
60/// Escape a byte slice for use in a URL.
61///
62/// This function escapes bytes for use in a URL, preserving characters that
63/// are considered safe.
64///
65/// # Parameters
66///
67/// * `relpath` - The byte slice to escape.
68/// * `safe` - Additional characters to consider safe (not to escape).
69///
70/// # Returns
71///
72/// The escaped string.
73pub fn escape(relpath: &[u8], safe: Option<&str>) -> String {
74    let mut result = String::new();
75    let safe = safe.unwrap_or("/~").as_bytes();
76    for b in relpath {
77        if char_is_safe(char::from(*b)) || safe.contains(b) {
78            result.push(char::from(*b));
79        } else {
80            result.push_str(&format!("%{:02X}", *b));
81        }
82    }
83    result
84}
85
86/// Escape a UTF-8 string for use in a URL.
87///
88/// This is a convenience function that converts the string to bytes and calls `escape`.
89///
90/// # Parameters
91///
92/// * `relpath` - The string to escape.
93/// * `safe` - Additional characters to consider safe (not to escape).
94///
95/// # Returns
96///
97/// The escaped string.
98pub fn escape_utf8(relpath: &str, safe: Option<&str>) -> String {
99    escape(relpath.as_bytes(), safe)
100}
101
102/// Unescape a URL-encoded UTF-8 string.
103///
104/// This function decodes percent-encoded characters in a string.
105///
106/// # Parameters
107///
108/// * `url` - The URL-encoded string to decode.
109///
110/// # Returns
111///
112/// The decoded string.
113pub fn unescape_utf8(url: &str) -> String {
114    use percent_encoding::percent_decode_str;
115
116    percent_decode_str(url)
117        .decode_utf8()
118        .map(|s| s.to_string())
119        .unwrap_or_else(|_| url.to_string())
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn test_escape() {
128        assert_eq!(escape(b"blah", None), "blah");
129        assert_eq!(escape(b"blah", Some("")), "blah");
130        assert_eq!(escape(b"blah", Some("/~")), "blah");
131
132        assert_eq!(escape(b"la/bla", None), "la/bla");
133        assert_eq!(escape(b"la/bla", Some("")), "la%2Fbla");
134
135        assert_eq!(escape_utf8("la/bla", Some("/")), "la/bla");
136    }
137
138    #[test]
139    fn test_unescape() {
140        assert_eq!(unescape_utf8("blah"), "blah");
141        assert_eq!(unescape_utf8("la%2Fbla"), "la/bla");
142    }
143
144    #[test]
145    fn test_split_segment_parameters() {
146        let url = url::Url::parse("http://example.com/blah,a=1,b=2").unwrap();
147        let (result_url, result_parameters) = split_segment_parameters(&url);
148        assert_eq!(
149            result_url,
150            url::Url::parse("http://example.com/blah").unwrap()
151        );
152        let mut expected_parameters = std::collections::HashMap::new();
153        expected_parameters.insert("a".to_string(), "1".to_string());
154        expected_parameters.insert("b".to_string(), "2".to_string());
155        assert_eq!(result_parameters, expected_parameters);
156    }
157
158    #[test]
159    fn test_join_segment_parameters() {
160        let url = url::Url::parse("http://example.com/blah").unwrap();
161        let mut parameters = std::collections::HashMap::new();
162        parameters.insert("a".to_string(), "1".to_string());
163        parameters.insert("b".to_string(), "2".to_string());
164        let result = join_segment_parameters(&url, parameters);
165        assert_eq!(
166            result,
167            url::Url::parse("http://example.com/blah,a=1,b=2").unwrap()
168        );
169    }
170}