files_sdk/
utils.rs

1//! Utility functions for the Files.com SDK
2//!
3//! This module provides common utility functions used throughout the SDK,
4//! including path encoding, URL construction, and other helpers.
5
6/// Encodes a file path for safe use in URLs
7///
8/// Files.com paths may contain special characters (spaces, brackets, unicode, etc.)
9/// that need to be properly URL-encoded. This function:
10/// - Splits the path by `/` to preserve directory structure
11/// - Percent-encodes each path segment individually (using %20 for spaces, not +)
12/// - Rejoins with `/` separators
13///
14/// # Arguments
15///
16/// * `path` - The file or folder path to encode
17///
18/// # Returns
19///
20/// A URL-safe encoded path string
21///
22/// # Examples
23///
24/// ```
25/// use files_sdk::utils::encode_path;
26///
27/// assert_eq!(encode_path("/my folder/file.txt"), "/my%20folder/file.txt");
28/// assert_eq!(encode_path("/data/file[2024].txt"), "/data/file%5B2024%5D.txt");
29/// assert_eq!(encode_path("/文档/файл.txt"), "/%E6%96%87%E6%A1%A3/%D1%84%D0%B0%D0%B9%D0%BB.txt");
30/// ```
31pub fn encode_path(path: &str) -> String {
32    // Handle empty or root path
33    if path.is_empty() || path == "/" {
34        return path.to_string();
35    }
36
37    // Split by '/', encode each segment, then rejoin
38    let segments: Vec<String> = path
39        .split('/')
40        .map(|segment| {
41            if segment.is_empty() {
42                // Preserve empty segments (leading/trailing slashes)
43                segment.to_string()
44            } else {
45                // Percent-encode the segment
46                // We use a simple manual approach to ensure %20 for spaces (not +)
47                percent_encode(segment)
48            }
49        })
50        .collect();
51
52    segments.join("/")
53}
54
55/// Percent-encodes a string for use in URL paths
56///
57/// Unlike form encoding which uses + for spaces, this uses %20 for spaces
58/// and encodes all non-alphanumeric characters except: - _ . ~
59///
60/// This follows RFC 3986 unreserved characters
61fn percent_encode(s: &str) -> String {
62    let mut encoded = String::new();
63
64    for byte in s.bytes() {
65        match byte {
66            // Unreserved characters (RFC 3986)
67            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
68                encoded.push(byte as char);
69            }
70            // Everything else gets percent-encoded
71            _ => {
72                encoded.push_str(&format!("%{:02X}", byte));
73            }
74        }
75    }
76
77    encoded
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83    use proptest::prelude::*;
84
85    #[test]
86    fn test_encode_simple_path() {
87        assert_eq!(encode_path("/simple/path.txt"), "/simple/path.txt");
88    }
89
90    #[test]
91    fn test_encode_path_with_spaces() {
92        assert_eq!(
93            encode_path("/my folder/my file.txt"),
94            "/my%20folder/my%20file.txt"
95        );
96    }
97
98    #[test]
99    fn test_encode_path_with_brackets() {
100        assert_eq!(
101            encode_path("/data/file[2024].txt"),
102            "/data/file%5B2024%5D.txt"
103        );
104    }
105
106    #[test]
107    fn test_encode_path_with_unicode() {
108        // Chinese characters
109        assert_eq!(
110            encode_path("/文档/测试.txt"),
111            "/%E6%96%87%E6%A1%A3/%E6%B5%8B%E8%AF%95.txt"
112        );
113
114        // Cyrillic characters
115        assert_eq!(
116            encode_path("/папка/файл.txt"),
117            "/%D0%BF%D0%B0%D0%BF%D0%BA%D0%B0/%D1%84%D0%B0%D0%B9%D0%BB.txt"
118        );
119    }
120
121    #[test]
122    fn test_encode_path_with_quotes() {
123        assert_eq!(
124            encode_path("/\"quoted\"/file.txt"),
125            "/%22quoted%22/file.txt"
126        );
127    }
128
129    #[test]
130    fn test_encode_path_with_special_chars() {
131        assert_eq!(encode_path("/data/file@#$.txt"), "/data/file%40%23%24.txt");
132    }
133
134    #[test]
135    fn test_encode_empty_path() {
136        assert_eq!(encode_path(""), "");
137    }
138
139    #[test]
140    fn test_encode_root_path() {
141        assert_eq!(encode_path("/"), "/");
142    }
143
144    #[test]
145    fn test_encode_path_preserves_leading_slash() {
146        assert_eq!(encode_path("/folder/file"), "/folder/file");
147    }
148
149    #[test]
150    fn test_encode_path_without_leading_slash() {
151        assert_eq!(encode_path("folder/file"), "folder/file");
152    }
153
154    #[test]
155    fn test_encode_path_with_trailing_slash() {
156        assert_eq!(encode_path("/folder/"), "/folder/");
157    }
158
159    #[test]
160    fn test_encode_complex_path() {
161        // Combination of spaces, brackets, and unicode
162        assert_eq!(
163            encode_path("/my folder/data [2024]/文档.txt"),
164            "/my%20folder/data%20%5B2024%5D/%E6%96%87%E6%A1%A3.txt"
165        );
166    }
167
168    // Property-based tests
169
170    proptest! {
171        /// Property: Encoding a path always produces valid ASCII output
172        #[test]
173        fn prop_encoded_path_is_ascii(path in "(/[^/\0]{0,100}){0,10}") {
174            let encoded = encode_path(&path);
175            prop_assert!(encoded.is_ascii(), "Encoded path should be ASCII: {}", encoded);
176        }
177
178        /// Property: Encoding preserves slash structure
179        #[test]
180        fn prop_encoding_preserves_slash_count(path in "(/[^/\0]{0,100}){0,10}") {
181            let encoded = encode_path(&path);
182            let original_slashes = path.matches('/').count();
183            let encoded_slashes = encoded.matches('/').count();
184            prop_assert_eq!(original_slashes, encoded_slashes,
185                "Slash count should be preserved. Original: {}, Encoded: {}", path, encoded);
186        }
187
188        /// Property: Encoding is idempotent (encoding an encoded path doesn't change it)
189        #[test]
190        fn prop_encoding_is_idempotent(path in "[a-zA-Z0-9._~/-]{0,200}") {
191            let encoded_once = encode_path(&path);
192            let encoded_twice = encode_path(&encoded_once);
193            prop_assert_eq!(encoded_once, encoded_twice,
194                "Encoding should be idempotent for already-encoded paths");
195        }
196
197        /// Property: Empty segments are preserved (leading/trailing slashes)
198        #[test]
199        fn prop_preserves_leading_slash(path in "/[a-zA-Z0-9._~]{1,50}(/[a-zA-Z0-9._~]{0,50}){0,5}") {
200            let encoded = encode_path(&path);
201            prop_assert!(encoded.starts_with('/'), "Leading slash should be preserved");
202        }
203
204        /// Property: Trailing slashes are preserved
205        #[test]
206        fn prop_preserves_trailing_slash(path in "[a-zA-Z0-9._~]{1,50}(/[a-zA-Z0-9._~]{0,50}){0,5}/") {
207            let encoded = encode_path(&path);
208            prop_assert!(encoded.ends_with('/'), "Trailing slash should be preserved");
209        }
210
211        /// Property: No double encoding - percent signs in output are only from encoding
212        #[test]
213        fn prop_no_double_encoding(s in "[^/\0]{1,50}") {
214            let path = format!("/{}", s);
215            let encoded = encode_path(&path);
216
217            // If there's a % in the encoded output, it should always be followed by exactly 2 hex digits
218            let mut chars = encoded.chars().peekable();
219            while let Some(c) = chars.next() {
220                if c == '%' {
221                    let next1 = chars.next();
222                    let next2 = chars.next();
223                    prop_assert!(next1.is_some() && next2.is_some(),
224                        "% should be followed by 2 characters");
225                    prop_assert!(next1.unwrap().is_ascii_hexdigit() && next2.unwrap().is_ascii_hexdigit(),
226                        "% should be followed by 2 hex digits");
227                }
228            }
229        }
230
231        /// Property: Unreserved characters (A-Za-z0-9-_.~) are never encoded
232        #[test]
233        fn prop_unreserved_never_encoded(s in "[A-Za-z0-9._~-]+") {
234            let encoded = encode_path(&s);
235            prop_assert_eq!(&encoded, &s, "Unreserved characters should not be encoded");
236        }
237
238        /// Property: Spaces are always encoded as %20
239        #[test]
240        fn prop_spaces_encoded_as_percent20(s in "[a-z ]{1,50}") {
241            let path = format!("/{}", s);
242            let encoded = encode_path(&path);
243
244            if s.contains(' ') {
245                prop_assert!(encoded.contains("%20"), "Spaces should be encoded as %20");
246                prop_assert!(!encoded.contains('+'), "Spaces should not be encoded as +");
247            }
248        }
249
250        /// Property: Very long paths don't panic
251        #[test]
252        fn prop_handles_long_paths(path in "(/[a-zA-Z0-9]{0,500}){0,20}") {
253            let _ = encode_path(&path); // Should not panic
254        }
255
256        /// Property: Unicode characters are percent-encoded
257        #[test]
258        fn prop_unicode_is_encoded(s in "[\\u{0080}-\\u{FFFF}]{1,20}") {
259            let path = format!("/{}", s);
260            let encoded = encode_path(&path);
261
262            // Unicode should be encoded (will contain %)
263            if !s.is_ascii() {
264                prop_assert!(encoded.contains('%'),
265                    "Non-ASCII unicode should be percent-encoded: {} -> {}", s, encoded);
266            }
267        }
268
269        /// Property: Root path is unchanged
270        #[test]
271        fn prop_root_path_unchanged(_unit in prop::bool::ANY) {
272            prop_assert_eq!(encode_path("/"), "/");
273        }
274
275        /// Property: Empty path is unchanged
276        #[test]
277        fn prop_empty_path_unchanged(_unit in prop::bool::ANY) {
278            prop_assert_eq!(encode_path(""), "");
279        }
280    }
281}