url_parse/
utils.rs

1use crate::core::scheme_separator::SchemeSeparator;
2use crate::core::Parser;
3use std::collections::HashMap;
4pub struct Utils;
5
6impl Utils {
7    /// Get substring immediately after scheme.
8    ///
9    /// # Example
10    /// ```rust
11    /// use url_parse::utils::Utils;
12    /// use url_parse::core::Parser;
13    /// let input =
14    ///     "https://user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
15    /// let expected =
16    ///     "user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone".to_string();
17    /// let parser = Parser::new(None);
18    /// let result = Utils::substring_after_scheme(&parser, input);
19    /// assert_eq!(result, expected);
20    /// ```
21    pub fn substring_after_scheme<'a>(parser: &Parser, input: &'a str) -> &'a str {
22        let scheme = parser.scheme(input);
23        match scheme {
24            Some((v, separator)) => input
25                .get(v.len() + <SchemeSeparator as Into<usize>>::into(separator)..)
26                .unwrap(),
27            None => input,
28        }
29    }
30
31    /// Get substring immediately after login. Eliminates scheme to ensure no colon present in remainder.
32    ///
33    /// # Example
34    /// ```rust
35    /// use url_parse::utils::Utils;
36    /// use url_parse::core::Parser;
37    /// let input =
38    ///     "https://user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
39    /// let expected = "www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone".to_string();
40    /// let parser = Parser::new(None);
41    /// let result = Utils::substring_after_login(&parser, input);
42    /// assert_eq!(result, expected);
43    /// ```
44    pub fn substring_after_login<'a>(parser: &Parser, input: &'a str) -> &'a str {
45        let input = Utils::substring_after_scheme(parser, input);
46        match input.find('@') {
47            Some(pos) => &input[pos + 1..],
48            None => input,
49        }
50    }
51
52    /// Get substring immediately after port. Eliminates scheme to ensure no colon present in remainder.
53    ///
54    /// # Example
55    /// ```rust
56    /// use url_parse::utils::Utils;
57    /// use url_parse::core::Parser;
58    /// let input =
59    ///     "https://user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
60    /// let expected = "www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone".to_string();
61    /// let parser = Parser::new(None);
62    /// let result = Utils::substring_after_login(&parser, input);
63    /// assert_eq!(result, expected);
64    /// ```
65    pub fn substring_after_port<'a>(parser: &Parser, input: &'a str) -> &'a str {
66        let input = Utils::substring_after_scheme(parser, input);
67        let port = parser.port(input);
68
69        if input.find(':').is_some() {
70            let (pos_port, len_port_string) = match port {
71                Some(v) => (input.find(&v.to_string()).unwrap(), v.to_string().len() + 1),
72                None => (0, 0),
73            };
74
75            let substring_after_port = input.get(pos_port + len_port_string..);
76            return substring_after_port.unwrap_or_default();
77        }
78        input
79    }
80
81    /// Get substring immediately before port.
82    ///
83    /// # Example
84    /// ```rust
85    /// use url_parse::utils::Utils;
86    /// use url_parse::core::Parser;
87    /// let input = "https://www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
88    /// let expected = "https://www.example.co.uk".to_string();
89    /// let parser = Parser::new(None);
90    /// let result = Utils::substring_before_port(&parser, input);
91    /// assert_eq!(result, expected);
92    /// ```
93    pub fn substring_before_port<'a>(parser: &Parser, input: &'a str) -> &'a str {
94        let port = parser.port(input);
95
96        let pos_port = match port {
97            Some(v) => input.find(&v.to_string()).unwrap() - 1,
98            None => input.len(),
99        };
100
101        return input.get(..pos_port).unwrap();
102    }
103
104    /// Get substring starting at path field. Eliminates scheme to ensure no colon present in remainder.
105    ///
106    /// # Example
107    /// ```rust
108    /// use url_parse::utils::Utils;
109    /// use url_parse::core::Parser;
110    /// let input =
111    ///     "https://user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
112    /// let expected =
113    ///     "/blog/article/search?docid=720&hl=en#dayone".to_string();
114    /// let parser = Parser::new(None);
115    /// let result = Utils::substring_from_path_begin(&parser, input).unwrap_or("");
116    /// assert_eq!(result, expected);
117    /// ```
118    pub fn substring_from_path_begin<'a>(parser: &Parser, input: &'a str) -> Option<&'a str> {
119        let input = Utils::substring_after_scheme(parser, input);
120        match input.find('/') {
121            Some(pos) => Some(&input[pos..]),
122            None => None,
123        }
124    }
125
126    /// Partially matches a subpath in a path. Useful for i.e. GitHub absolute paths from URL hrefs.
127    /// # Example
128    /// ```rust
129    /// use url_parse::utils::Utils;
130    /// use url_parse::core::Parser;
131    /// let input = "https://github.com/mihaigalos/aim/releases/tag/1.5.4";
132    /// let subpath = "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
133    /// let expected = "https://github.com/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
134    /// let result = Utils::canonicalize(&Parser::new(None), input, subpath);
135    /// assert_eq!(result, expected);
136    pub fn canonicalize<'a>(parser: &Parser, input: &'a str, subpath: &'a str) -> String {
137        let mut result = parser
138            .scheme(input)
139            .map(|s| s.0.to_string() + &<SchemeSeparator as Into<String>>::into(s.1))
140            .unwrap_or_default();
141
142        let subpath = Self::trim_leading_slash(subpath);
143        let (similarity, input_splits) = Utils::compute_similarity(parser, input, subpath);
144        let key_with_max_value = similarity.iter().max_by_key(|entry| entry.1).unwrap().0;
145
146        result += &input_splits[0..*key_with_max_value].join("/");
147        if *key_with_max_value != 0 || input.is_empty() {
148            result += "/";
149        }
150        result += subpath;
151
152        result
153    }
154
155    fn compute_similarity<'a>(
156        parser: &Parser,
157        input: &'a str,
158        subpath: &'a str,
159    ) -> (HashMap<usize, usize>, Vec<&'a str>) {
160        let input = Utils::substring_after_scheme(parser, input);
161        let input_splits = input.split('/').collect::<Vec<&str>>();
162        let subpath_splits = subpath.split('/').collect::<Vec<&str>>();
163
164        let mut similarity: HashMap<usize, usize> = HashMap::new();
165        let mut pos_subpath = 0;
166        let mut pos_match = 0;
167        for (pos_input, input_split) in input_splits.iter().enumerate() {
168            if input_split == &subpath_splits[pos_subpath] {
169                if pos_subpath == 0 {
170                    pos_match = pos_input;
171                }
172                pos_subpath += 1;
173                *similarity.entry(pos_match).or_insert(0) += 1;
174            } else {
175                pos_subpath = 0;
176            }
177        }
178        (similarity, input_splits)
179    }
180
181    fn trim_leading_slash(subpath: &str) -> &str {
182        if subpath.starts_with('/') {
183            return &subpath[1..subpath.len()];
184        }
185        subpath
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn test_substring_after_scheme_works_when_typical() {
195        let input =
196            "https://user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
197        let expected = "user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone"
198            .to_string();
199        let parser = Parser::new(None);
200        let result = Utils::substring_after_scheme(&parser, input);
201        assert_eq!(result, expected);
202    }
203
204    #[test]
205    fn test_substring_after_scheme_works_when_simple_schema() {
206        let input =
207            "https:user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
208        let expected = "user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone"
209            .to_string();
210        let parser = Parser::new(None);
211        let result = Utils::substring_after_scheme(&parser, input);
212        assert_eq!(result, expected);
213    }
214
215    #[test]
216    fn test_substring_after_port_works_when_typical() {
217        let input = "https://www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
218        let expected = "blog/article/search?docid=720&hl=en#dayone".to_string();
219        let parser = Parser::new(None);
220        let result = Utils::substring_after_port(&parser, input);
221        assert_eq!(result, expected);
222    }
223
224    #[test]
225    fn test_substring_after_port_works_when_no_scheme() {
226        let input = "user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
227        let expected = "blog/article/search?docid=720&hl=en#dayone".to_string();
228        let parser = Parser::new(None);
229        let result = Utils::substring_after_port(&parser, input);
230        assert_eq!(result, expected);
231    }
232
233    #[test]
234    fn test_substring_before_port_works_when_typical() {
235        let input = "https://www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
236        let expected = "https://www.example.co.uk".to_string();
237        let parser = Parser::new(None);
238        let result = Utils::substring_before_port(&parser, input);
239        assert_eq!(result, expected);
240    }
241
242    #[test]
243    fn test_substring_after_login_works_when_typical() {
244        let input =
245            "https://user:pass@www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
246        let expected =
247            "www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone".to_string();
248        let parser = Parser::new(None);
249        let result = Utils::substring_after_login(&parser, input);
250        assert_eq!(result, expected);
251    }
252
253    #[test]
254    fn test_substring_from_path_begin_works_when_typical() {
255        let input = "https://www.example.co.uk:443/blog/article/search?docid=720&hl=en#dayone";
256        let expected = "/blog/article/search?docid=720&hl=en#dayone".to_string();
257        let parser = Parser::new(None);
258        let result = Utils::substring_from_path_begin(&parser, input).unwrap();
259        assert_eq!(result, expected);
260    }
261
262    #[test]
263    fn test_substring_from_path_begin_works_when_no_port() {
264        let input = "https://www.example.co.uk/blog/article/search?docid=720&hl=en#dayone";
265        let expected = "/blog/article/search?docid=720&hl=en#dayone".to_string();
266        let parser = Parser::new(None);
267        let result = Utils::substring_from_path_begin(&parser, input).unwrap();
268        assert_eq!(result, expected);
269    }
270
271    #[test]
272    fn test_substring_after_port_works_when_colon_in_url() {
273        let input = "http://en.wikipedia.org/wiki/Template:Welcome";
274        let expected = "en.wikipedia.org/wiki/Template:Welcome".to_string();
275        let parser = Parser::new(None);
276        let result = Utils::substring_after_port(&parser, input);
277        assert_eq!(result, expected);
278    }
279
280    #[test]
281    fn test_substring_after_port_works_when_nothing_after_port() {
282        let input = "http://192.168.0.100:8080";
283        let expected = "".to_string();
284        let parser = Parser::new(None);
285        let result = Utils::substring_after_port(&parser, input);
286        assert_eq!(result, expected);
287    }
288
289    #[test]
290    fn test_compute_similarity_hashmap_works_when_typical() {
291        let input = "https://github.com/mihaigalos/aim/releases/tag/1.5.4";
292        let subpath =
293            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
294        let expected_pos_begin_match: usize = 1;
295        let expected_count_path_matches: usize = 3;
296
297        let parser = Parser::new(None);
298        let (hashmap, _) = Utils::compute_similarity(&parser, input, subpath);
299        assert_eq!(
300            hashmap[&expected_pos_begin_match],
301            expected_count_path_matches
302        );
303    }
304
305    #[test]
306    fn test_compute_similarity_input_splits_works_when_typical() {
307        let input = "https://github.com/mihaigalos/aim/releases/tag/1.5.4";
308        let subpath =
309            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
310        let expected_input_splits: Vec<&str> = vec![
311            "github.com",
312            "mihaigalos",
313            "aim",
314            "releases",
315            "tag",
316            "1.5.4",
317        ];
318
319        let parser = Parser::new(None);
320        let (_, input_splits) = Utils::compute_similarity(&parser, input, subpath);
321        assert_eq!(input_splits, expected_input_splits);
322    }
323
324    #[test]
325    fn test_compute_similarity_works_when_multiple_partial_matches() {
326        let input = "https://github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/tag/1.5.4";
327        let subpath =
328            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
329        let expected_pos_begin_match: usize = 5;
330        let expected_count_path_matches: usize = 3;
331
332        let parser = Parser::new(None);
333        let (hashmap, _) = Utils::compute_similarity(&parser, input, subpath);
334        assert_eq!(
335            hashmap[&expected_pos_begin_match],
336            expected_count_path_matches
337        );
338    }
339
340    #[test]
341    fn test_canonicalize_works_when_typical() {
342        let input = "https://github.com/mihaigalos/aim/releases/tag/1.5.4";
343        let subpath =
344            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
345        let expected = "https://github.com/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
346
347        let parser = Parser::new(None);
348        let result = Utils::canonicalize(&parser, input, subpath);
349        assert_eq!(result, expected);
350    }
351
352    #[test]
353    fn test_canonicalize_works_when_domain_with_path_and_no_scheme() {
354        let input = "https://github.com/mihaigalos/aim/releases/tag/1.5.4";
355        let subpath =
356            "github.com/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
357        let expected = "https://github.com/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
358
359        let parser = Parser::new(None);
360        let result = Utils::canonicalize(&parser, input, subpath);
361        assert_eq!(result, expected);
362    }
363
364    #[test]
365    fn test_canonicalize_works_when_multiple_partial_matches() {
366        let input = "https://github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/tag/1.5.4";
367        let subpath =
368            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
369        let expected = "https://github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
370
371        let parser = Parser::new(None);
372        let result = Utils::canonicalize(&parser, input, subpath);
373        assert_eq!(result, expected);
374    }
375
376    #[test]
377    fn test_canonicalize_works_when_scheme_with_colon() {
378        let input = "https:github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/tag/1.5.4";
379        let subpath =
380            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
381        let expected = "https:github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
382
383        let parser = Parser::new(None);
384        let result = Utils::canonicalize(&parser, input, subpath);
385        assert_eq!(result, expected);
386    }
387
388    #[test]
389    fn test_canonicalize_works_when_no_scheme() {
390        let input = "github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/tag/1.5.4";
391        let subpath =
392            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
393        let expected = "github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
394
395        let parser = Parser::new(None);
396        let result = Utils::canonicalize(&parser, input, subpath);
397        assert_eq!(result, expected);
398    }
399
400    #[test]
401    fn test_canonicalize_works_when_no_scheme_and_path_begins_with_slash() {
402        let input = "github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/tag/1.5.4";
403        let subpath =
404            "/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
405        let expected = "github.com/mihaigalos/aim/fake/path/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
406
407        let parser = Parser::new(None);
408        let result = Utils::canonicalize(&parser, input, subpath);
409        assert_eq!(result, expected);
410    }
411
412    #[test]
413    fn test_canonicalize_works_when_empty() {
414        let input = "";
415        let subpath = "";
416        let expected = "/";
417
418        let parser = Parser::new(None);
419        let result = Utils::canonicalize(&parser, input, subpath);
420        assert_eq!(result, expected);
421    }
422
423    #[test]
424    fn test_trim_leading_slash_works_when_typical() {
425        let input =
426            "/mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
427        let expected =
428            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
429
430        let result = Utils::trim_leading_slash(input);
431        assert_eq!(result, expected);
432    }
433
434    #[test]
435    fn test_trim_leading_slash_works_when_no_leading_slash() {
436        let input =
437            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
438        let expected =
439            "mihaigalos/aim/releases/download/1.5.4/aim-1.5.4-x86_64-unknown-linux-gnu.tar.gz";
440
441        let result = Utils::trim_leading_slash(input);
442        assert_eq!(result, expected);
443    }
444}