urlable/
utils.rs

1use crate::{
2    parse::parse_url,
3    query::{parse_query, stringify_query, QueryObject},
4};
5use lazy_static::lazy_static;
6use regex::Regex;
7
8lazy_static! {
9    // Matches strict protocol format like "http://" or "https://"
10    static ref PROTOCOL_STRICT_REGEX: Regex = Regex::new(r"^[\s\w+.-]{2,}:([/\\]{1,2})").unwrap();
11    // Matches relaxed protocol format like "http:" or "https:"
12    static ref PROTOCOL_REGEX: Regex = Regex::new(r"^[\s\w+.-]{2,}:(?:/\\{2})?").unwrap();
13    // Matches protocol-relative URLs starting with "//"
14    static ref PROTOCOL_RELATIVE_REGEX: Regex = Regex::new(r"^([/\\]\s*){2,}[^/\\]").unwrap();
15    // Matches potentially dangerous protocols like javascript: or data:
16    static ref PROTOCOL_SCRIPT_RE: Regex =
17        Regex::new(r"^[\s\0]*(blob|data|javascript|vbscript):$").unwrap();
18    // Matches trailing slashes including those before ? or #
19    static ref TRAILING_SLASH_RE: Regex = Regex::new(r"\/$|\/\?|\/#").unwrap();
20    // Matches leading ./ or /
21    static ref JOIN_LEADING_SLASH_RE: Regex = Regex::new(r"^\.?/").unwrap();
22}
23
24// Checks if a URL is relative (starts with ./ or ../)
25// Example:
26// is_relative("./images/logo.png") -> true
27// is_relative("../styles/main.css") -> true
28// is_relative("/absolute/path") -> false
29pub fn is_relative(input_string: &str) -> bool {
30    input_string.starts_with("./") || input_string.starts_with("../")
31}
32
33#[derive(Default, Clone)]
34pub struct HasProtocolOptions {
35    pub accept_relative: bool, // Whether to accept protocol-relative URLs (starting with //)
36    pub strict: bool,          // Whether to strictly match protocol format
37}
38
39// Checks if a URL has a protocol prefix
40// Example:
41// has_protocol("https://example.com", strict_opts) -> true
42// has_protocol("//example.com", relative_opts) -> true
43// has_protocol("example.com", default_opts) -> false
44pub fn has_protocol(input_string: &str, opts: HasProtocolOptions) -> bool {
45    if opts.strict {
46        return PROTOCOL_STRICT_REGEX.is_match(input_string);
47    }
48    PROTOCOL_REGEX.is_match(input_string)
49        || (opts.accept_relative && PROTOCOL_RELATIVE_REGEX.is_match(input_string))
50}
51
52// Checks if a URL has a trailing slash, optionally respecting query params and fragments
53// Example:
54// has_trailing_slash("/path/", false) -> true
55// has_trailing_slash("/path/?query=1", true) -> true
56// has_trailing_slash("/path", false) -> false
57pub fn has_trailing_slash(input: &str, respect_query_fragment: bool) -> bool {
58    if !respect_query_fragment {
59        input.ends_with('/')
60    } else {
61        TRAILING_SLASH_RE.is_match(input)
62    }
63}
64
65// Removes trailing slash from URL, handling query params and fragments
66// Example:
67// without_trailing_slash("/path/", false) -> "/path"
68// without_trailing_slash("/path/?query=1", true) -> "/path?query=1"
69// without_trailing_slash("/path/#section/", true) -> "/path#section"
70pub fn without_trailing_slash(input: &str, respect_query_fragment: bool) -> String {
71    if !respect_query_fragment {
72        return if has_trailing_slash(input, false) {
73            input[..input.len() - 1].to_string()
74        } else {
75            input.to_string()
76        };
77    }
78
79    if !has_trailing_slash(input, true) {
80        return input.to_string();
81    }
82
83    let mut path = input.to_string();
84    let mut fragment = String::new();
85
86    if let Some(frag_idx) = input.find('#') {
87        fragment = input[frag_idx..].to_string();
88        path = input[..frag_idx].to_string();
89    }
90
91    let parts: Vec<&str> = path.split('?').collect();
92    let clean_path = if parts[0].ends_with('/') {
93        &parts[0][..parts[0].len() - 1]
94    } else {
95        parts[0]
96    };
97
98    format!(
99        "{}{}{}",
100        clean_path,
101        if parts.len() > 1 {
102            format!("?{}", parts[1..].join("?"))
103        } else {
104            String::new()
105        },
106        fragment
107    )
108}
109
110// Adds trailing slash to URL, handling query params and fragments
111// Example:
112// with_trailing_slash("/path", false) -> "/path/"
113// with_trailing_slash("/path?query=1", true) -> "/path/?query=1"
114// with_trailing_slash("/path#section", true) -> "/path/#section"
115pub fn with_trailing_slash(input: &str, respect_query_fragment: bool) -> String {
116    if !respect_query_fragment {
117        if input.ends_with('/') {
118            input.to_string()
119        } else {
120            format!("{}/", input)
121        }
122    } else {
123        if has_trailing_slash(input, true) {
124            return input.to_string();
125        }
126
127        let mut path = input.to_string();
128        let mut fragment = String::new();
129
130        if let Some(frag_idx) = input.find('#') {
131            fragment = input[frag_idx..].to_string();
132            path = input[..frag_idx].to_string();
133            if path.is_empty() {
134                return fragment;
135            }
136        }
137
138        let parts: Vec<&str> = path.split('?').collect();
139        format!(
140            "{}/{}{}",
141            parts[0],
142            if parts.len() > 1 {
143                format!("?{}", parts[1..].join("?"))
144            } else {
145                String::new()
146            },
147            fragment
148        )
149    }
150}
151
152// Checks if URL starts with a forward slash
153// Example:
154// has_leading_slash("/path") -> true
155// has_leading_slash("path") -> false
156pub fn has_leading_slash(input: &str) -> bool {
157    input.starts_with('/')
158}
159
160// Removes leading slash from URL
161// Example:
162// without_leading_slash("/path") -> "path"
163// without_leading_slash("path") -> "path"
164pub fn without_leading_slash(input: &str) -> String {
165    if has_leading_slash(input) {
166        input[1..].to_string()
167    } else {
168        input.to_string()
169    }
170}
171
172// Adds leading slash to URL
173// Example:
174// with_leading_slash("path") -> "/path"
175// with_leading_slash("/path") -> "/path"
176pub fn with_leading_slash(input: &str) -> String {
177    if has_leading_slash(input) {
178        input.to_string()
179    } else {
180        format!("/{}", input)
181    }
182}
183
184// Normalizes multiple slashes in URL while preserving protocol slashes
185// Example:
186// clean_double_slashes("http://example.com//path///to////file") -> "http://example.com/path/to/file"
187// clean_double_slashes("//path////to/////file") -> "/path/to/file"
188pub fn clean_double_slashes(url: &str) -> String {
189    // Pre-allocate string capacity to avoid reallocations
190    let mut result = String::with_capacity(url.len());
191    // Create peekable iterator to look ahead at next chars
192    let mut chars = url.chars().peekable();
193    // Track if we're right after a colon (for protocol handling)
194    let mut after_colon = false;
195
196    while let Some(c) = chars.next() {
197        // Handle colon character (potential protocol marker)
198        // Example: "http:" -> sets after_colon flag
199        if c == ':' {
200            result.push(c);
201            after_colon = true;
202            continue;
203        }
204
205        // Handle non-slash characters
206        // Example: "example.com" -> copies chars as-is
207        if c != '/' {
208            result.push(c);
209            after_colon = false;
210        } else {
211            result.push(c);
212            if after_colon {
213                // Special handling for protocol double slashes
214                // Example: "http://" -> preserves both slashes
215                while let Some(&next_c) = chars.peek() {
216                    if next_c == '/' {
217                        result.push(chars.next().unwrap());
218                    } else {
219                        break;
220                    }
221                }
222                after_colon = false;
223            } else {
224                // Skip consecutive slashes in path
225                // Example: "path///to" -> becomes "path/to"
226                while let Some(&next_c) = chars.peek() {
227                    if next_c == '/' {
228                        chars.next();
229                    } else {
230                        break;
231                    }
232                }
233            }
234        }
235    }
236
237    result
238}
239
240// Prepends base URL to a path if needed
241// Example:
242// with_base("/path", "/base") -> "/base/path"
243// with_base("http://example.com", "/base") -> "http://example.com"
244pub fn with_base(input: &str, base: &str) -> String {
245    let result = if is_empty_url(base) || has_protocol(input, HasProtocolOptions::default()) {
246        return input.to_string();
247    } else {
248        let base = without_trailing_slash(base, false);
249        if input.starts_with(&base) {
250            input.to_string()
251        } else {
252            join_url(&base, input)
253        }
254    };
255    clean_double_slashes(&result)
256}
257
258// Removes base URL from a path if present
259// Example:
260// without_base("/base/path", "/base") -> "/path"
261// without_base("/other/path", "/base") -> "/other/path"
262pub fn without_base(input: &str, base: &str) -> String {
263    if is_empty_url(base) {
264        return input.to_string();
265    }
266    let base = without_trailing_slash(base, false);
267    if !input.starts_with(&base) {
268        return input.to_string();
269    }
270    let trimmed = &input[base.len()..];
271    if trimmed.starts_with('/') {
272        trimmed.to_string()
273    } else {
274        format!("/{}", trimmed)
275    }
276}
277
278// Adds or merges query parameters to URL
279// Example:
280// with_query("http://example.com", {"page": "1"}) -> "http://example.com?page=1"
281// with_query("http://example.com?sort=desc", {"page": "1"}) -> "http://example.com?sort=desc&page=1"
282pub fn with_query(input: &str, query: &QueryObject) -> String {
283    let mut parsed = parse_url(input);
284    let current: QueryObject = parse_query(&parsed.search);
285
286    // Preserve existing query params first
287    let mut result = QueryObject::new();
288    for (key, value) in current.iter() {
289        result.insert(key.clone(), value.clone());
290    }
291
292    // Then append new query params
293    for (key, value) in query.iter() {
294        result.insert(key.clone(), value.clone());
295    }
296
297    parsed.search = stringify_query(&result);
298    parsed.stringify()
299}
300
301// Checks if URL is empty or just a slash
302// Example:
303// is_empty_url("") -> true
304// is_empty_url("/") -> true
305// is_empty_url("/path") -> false
306pub fn is_empty_url(url: &str) -> bool {
307    url.is_empty() || url == "/"
308}
309
310// Joins base URL with path
311// Example:
312// join_url("/base", "path") -> "/base/path"
313// join_url("", "path") -> "path"
314// join_url("/base", "") -> "/base"
315pub fn join_url(base: &str, input: &str) -> String {
316    let mut url = base.to_string();
317    if !is_empty_url(input) {
318        if !url.is_empty() {
319            let segment = input.trim_start_matches(|c| c == '.' || c == '/');
320            url = format!("{}/{}", with_trailing_slash(&url, false), segment);
321        } else {
322            url = input.to_string();
323        }
324    }
325    url
326}
327
328// Joins multiple URL segments handling relative paths
329// Example:
330// join_relative_url(["/base", "../other", "./path"]) -> "/other/path"
331// join_relative_url(["http:", "example.com", "path"]) -> "http://example.com/path"
332pub fn join_relative_url(inputs: &[&str]) -> String {
333    if inputs.is_empty() {
334        return String::new();
335    }
336
337    let mut segments: Vec<String> = Vec::new();
338    let mut segments_depth = 0;
339
340    for input in inputs.iter().filter(|&&i| !i.is_empty() && i != "/") {
341        for (sindex, s) in input.split('/').enumerate() {
342            if s.is_empty() || s == "." {
343                continue;
344            }
345            if s == ".." {
346                if segments.len() == 1
347                    && has_protocol(segments[0].as_str(), HasProtocolOptions::default())
348                {
349                    continue;
350                }
351                if !segments.is_empty() {
352                    segments.pop();
353                    segments_depth -= 1;
354                } else {
355                    segments_depth -= 1;
356                }
357                continue;
358            }
359            if sindex == 1 && segments.last().map_or(false, |last| last.ends_with(':')) {
360                if let Some(last) = segments.last_mut() {
361                    *last = format!("{}//", last);
362                }
363                segments.push(s.to_string());
364                segments_depth += 1;
365                continue;
366            }
367            segments.push(s.to_string());
368            segments_depth += 1;
369        }
370    }
371
372    let mut url = segments.join("/");
373
374    if segments_depth >= 0 {
375        if inputs.first().map_or(false, |&i| i.starts_with('/')) && !url.starts_with('/') {
376            url = format!("/{}", url);
377        } else if inputs.first().map_or(false, |&i| i.starts_with("./")) && !url.starts_with("./") {
378            url = format!("./{}", url);
379        }
380    } else {
381        url = format!("{}{}", "../".repeat(-segments_depth as usize), url);
382    }
383
384    if inputs.last().map_or(false, |&i| i.ends_with('/')) && !url.ends_with('/') {
385        url.push('/');
386    }
387
388    url
389}
390#[cfg(test)]
391mod tests {
392    use super::*;
393
394    #[test]
395    fn test_is_relative() {
396        assert!(is_relative("./foo"));
397        assert!(is_relative("../foo"));
398        assert!(!is_relative("/foo"));
399        assert!(!is_relative("foo"));
400        assert!(!is_relative("http://example.com"));
401        assert!(is_relative("./"));
402        assert!(is_relative("../"));
403        assert!(!is_relative("//foo"));
404        assert!(!is_relative("https://foo"));
405    }
406
407    #[test]
408    fn test_has_protocol() {
409        let strict_opts = HasProtocolOptions {
410            strict: true,
411            ..Default::default()
412        };
413        let relative_opts = HasProtocolOptions {
414            accept_relative: true,
415            ..Default::default()
416        };
417
418        assert!(has_protocol("http://example.com", strict_opts.clone()));
419        assert!(has_protocol("https://example.com", strict_opts.clone()));
420        assert!(has_protocol("ftp://files.example.com", strict_opts.clone()));
421        assert!(!has_protocol("//example.com", strict_opts));
422        assert!(has_protocol("//example.com", relative_opts));
423        assert!(!has_protocol("example.com", HasProtocolOptions::default()));
424
425        // Additional test cases
426        assert!(has_protocol(
427            "sftp://example.com",
428            HasProtocolOptions::default()
429        ));
430        assert!(has_protocol(
431            "ws://example.com",
432            HasProtocolOptions::default()
433        ));
434        assert!(has_protocol(
435            "wss://example.com",
436            HasProtocolOptions::default()
437        ));
438    }
439
440    #[test]
441    fn test_trailing_slash() {
442        // Basic cases
443        assert_eq!(without_trailing_slash("/foo/", false), "/foo");
444        assert_eq!(with_trailing_slash("/foo", false), "/foo/");
445
446        // With query parameters
447        assert_eq!(
448            without_trailing_slash("/foo/?query=1", true),
449            "/foo?query=1"
450        );
451        assert_eq!(with_trailing_slash("/foo?query=1", true), "/foo/?query=1");
452
453        // With fragments
454        assert_eq!(without_trailing_slash("/foo/#hash", true), "/foo#hash");
455        assert_eq!(with_trailing_slash("/foo#hash", true), "/foo/#hash");
456
457        // Complex cases
458        assert_eq!(
459            without_trailing_slash("/foo/bar/?query=1#hash", true),
460            "/foo/bar?query=1#hash"
461        );
462        assert_eq!(
463            with_trailing_slash("/foo/bar?query=1#hash", true),
464            "/foo/bar/?query=1#hash"
465        );
466
467        // Additional test cases
468        assert_eq!(without_trailing_slash("", false), "");
469        assert_eq!(with_trailing_slash("", false), "/");
470        assert_eq!(without_trailing_slash("/", false), "");
471        assert_eq!(with_trailing_slash("/", false), "/");
472        assert_eq!(without_trailing_slash("foo/", false), "foo");
473        assert_eq!(with_trailing_slash("foo", false), "foo/");
474    }
475
476    #[test]
477    fn test_leading_slash() {
478        assert_eq!(without_leading_slash("/foo"), "foo");
479        assert_eq!(with_leading_slash("foo"), "/foo");
480        assert_eq!(without_leading_slash("/foo/bar"), "foo/bar");
481        assert_eq!(with_leading_slash("foo/bar"), "/foo/bar");
482        assert_eq!(without_leading_slash("foo"), "foo");
483        assert_eq!(with_leading_slash("/foo"), "/foo");
484
485        // Additional test cases
486        assert_eq!(without_leading_slash(""), "");
487        assert_eq!(with_leading_slash(""), "/");
488        assert_eq!(without_leading_slash("/"), "");
489        assert_eq!(with_leading_slash("/"), "/");
490        assert_eq!(without_leading_slash("//foo"), "/foo");
491        assert_eq!(with_leading_slash("//foo"), "//foo");
492    }
493
494    #[test]
495    fn test_clean_double_slashes() {
496        assert_eq!(
497            clean_double_slashes("http://example.com//foo//bar"),
498            "http://example.com/foo/bar"
499        );
500        assert_eq!(
501            clean_double_slashes("https://example.com///foo////bar"),
502            "https://example.com/foo/bar"
503        );
504        assert_eq!(clean_double_slashes("//foo//bar"), "/foo/bar");
505        assert_eq!(clean_double_slashes("foo//bar"), "foo/bar");
506
507        // Additional test cases
508        assert_eq!(clean_double_slashes(""), "");
509        assert_eq!(clean_double_slashes("/"), "/");
510        assert_eq!(clean_double_slashes("////"), "/");
511        assert_eq!(
512            clean_double_slashes("ftp://example.com////foo///bar//"),
513            "ftp://example.com/foo/bar/"
514        );
515    }
516
517    #[test]
518    fn test_join_relative_url() {
519        assert_eq!(join_relative_url(&["/a", "../b", "./c"]), "/b/c");
520        assert_eq!(join_relative_url(&["a", "b", "c"]), "a/b/c");
521        assert_eq!(join_relative_url(&["a", "../b", "../c"]), "c");
522        assert_eq!(join_relative_url(&["/", "a", "b", "/"]), "/a/b/");
523        assert_eq!(join_relative_url(&["./", "a", "../b"]), "./b");
524        assert_eq!(join_relative_url(&["a", "b", "..", "c"]), "a/c");
525
526        // Additional test cases
527        assert_eq!(join_relative_url(&[]), "");
528        assert_eq!(join_relative_url(&["/"]), "/");
529        assert_eq!(join_relative_url(&[".", "."]), "");
530        assert_eq!(join_relative_url(&["..", ".."]), "../../");
531        assert_eq!(join_relative_url(&["a", ".", "b"]), "a/b");
532    }
533
534    #[test]
535    fn test_with_query() {
536        let mut query = QueryObject::new();
537        query.insert("foo".to_string(), serde_json::json!("bar"));
538
539        assert_eq!(
540            with_query("http://example.com", &query),
541            "http://example.com?foo=bar"
542        );
543        assert_eq!(
544            with_query("http://example.com?existing=1", &query),
545            "http://example.com?existing=1&foo=bar"
546        );
547
548        let mut complex_query = QueryObject::new();
549        complex_query.insert("array".to_string(), serde_json::json!(["1", "2"]));
550        assert_eq!(
551            with_query("http://example.com", &complex_query),
552            "http://example.com?array=1&array=2"
553        );
554
555        // Additional test cases
556        let empty_query = QueryObject::new();
557        assert_eq!(
558            with_query("http://example.com", &empty_query),
559            "http://example.com"
560        );
561
562        let mut multiple_query = QueryObject::new();
563        multiple_query.insert("a".to_string(), serde_json::json!("1"));
564        multiple_query.insert("b".to_string(), serde_json::json!("2"));
565        assert_eq!(
566            with_query("http://example.com?c=3", &multiple_query),
567            "http://example.com?c=3&a=1&b=2"
568        );
569    }
570
571    #[test]
572    fn test_with_base() {
573        assert_eq!(with_base("/path", ""), "/path");
574        assert_eq!(with_base("/path", "/"), "/path");
575        assert_eq!(with_base("/path", "/base"), "/base/path");
576        assert_eq!(
577            with_base("http://example.com", "/base"),
578            "http://example.com"
579        );
580        assert_eq!(with_base("/base/path", "/base"), "/base/path");
581        assert_eq!(with_base("path", "/base/"), "/base/path");
582    }
583
584    #[test]
585    fn test_without_base() {
586        assert_eq!(without_base("/path", ""), "/path");
587        assert_eq!(without_base("/path", "/"), "/path");
588        assert_eq!(without_base("/base/path", "/base"), "/path");
589        assert_eq!(without_base("/other/path", "/base"), "/other/path");
590        assert_eq!(without_base("/base", "/base"), "/");
591        assert_eq!(without_base("/base/", "/base"), "/");
592    }
593}