Skip to main content

iri_rs_core/
parse.rs

1//! Position finders — oxiri port. Locate scheme/authority/path/query/fragment boundaries.
2use memchr::{memchr, memchr2, memchr3};
3
4/// Cached component boundaries inside an IRI/URI string.
5///
6/// Indices are byte offsets.
7/// * `scheme_end` — index one past the `:` after the scheme (0 if no scheme)
8/// * `authority_end` — index one past the last byte of the authority (equals `scheme_end` if none)
9/// * `path_end` — index one past the last byte of the path
10/// * `query_end` — index one past the last byte of the query (equals `path_end` if none)
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub struct Positions {
13    pub scheme_end: usize,
14    pub authority_end: usize,
15    pub path_end: usize,
16    pub query_end: usize,
17}
18
19impl Positions {
20    pub const EMPTY: Self = Self {
21        scheme_end: 0,
22        authority_end: 0,
23        path_end: 0,
24        query_end: 0,
25    };
26}
27
28/// Finds positions assuming the input is a full IRI (absolute or relative, scheme allowed).
29#[inline]
30pub fn find_iri_positions(iri: &str) -> Positions {
31    let iri = iri.as_bytes();
32    let scheme_end = memchr(b':', iri).map_or(0, |l| l + 1);
33    find_iri_positions_knowing_scheme_end(iri, scheme_end)
34}
35
36#[inline]
37pub fn find_iri_positions_knowing_scheme_end(iri: &[u8], scheme_end: usize) -> Positions {
38    let path_end = memchr2(b'?', b'#', &iri[scheme_end..]).map_or(iri.len(), |l| scheme_end + l);
39    let query_end = memchr(b'#', &iri[path_end..]).map_or(iri.len(), |l| path_end + l);
40    let authority_end = if scheme_end + 2 <= path_end
41        && iri[scheme_end] == b'/'
42        && iri[scheme_end + 1] == b'/'
43    {
44        memchr(b'/', &iri[scheme_end + 2..path_end]).map_or(path_end, |l| scheme_end + 2 + l)
45    } else {
46        scheme_end
47    };
48    Positions {
49        scheme_end,
50        authority_end,
51        path_end,
52        query_end,
53    }
54}
55
56/// Finds positions for an IRI reference (may lack scheme, start with `/`, `?`, `#`, or be empty).
57pub fn find_iri_ref_positions(iri: &str) -> Positions {
58    let iri = iri.as_bytes();
59    match iri.first().copied() {
60        Some(b'/') => find_iri_positions_knowing_scheme_end(iri, 0),
61        Some(b'?') => {
62            let query_end = memchr(b'#', iri).unwrap_or(iri.len());
63            Positions {
64                scheme_end: 0,
65                authority_end: 0,
66                path_end: 0,
67                query_end,
68            }
69        }
70        Some(b'#') | None => Positions::EMPTY,
71        _ => {
72            let scheme_end = memchr3(b':', b'?', b'/', iri).map_or(0, |index| {
73                if iri[index] == b':' {
74                    if memchr(b'#', &iri[..index]).is_some() {
75                        0
76                    } else {
77                        index + 1
78                    }
79                } else {
80                    0
81                }
82            });
83            find_iri_positions_knowing_scheme_end(iri, scheme_end)
84        }
85    }
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn iri_ref_empty() {
94        assert_eq!(find_iri_ref_positions(""), Positions::EMPTY);
95    }
96
97    #[test]
98    fn iri_ref_fragment_only() {
99        assert_eq!(find_iri_ref_positions("#frag"), Positions::EMPTY);
100    }
101
102    #[test]
103    fn iri_ref_query_only() {
104        let p = find_iri_ref_positions("?q");
105        assert_eq!(
106            p,
107            Positions {
108                scheme_end: 0,
109                authority_end: 0,
110                path_end: 0,
111                query_end: 2
112            }
113        );
114    }
115
116    #[test]
117    fn iri_full() {
118        let s = "http://host/p?q#f";
119        let p = find_iri_positions(s);
120        assert_eq!(&s[..p.scheme_end - 1], "http");
121        assert_eq!(&s[p.scheme_end + 2..p.authority_end], "host");
122        assert_eq!(&s[p.authority_end..p.path_end], "/p");
123        assert_eq!(&s[p.path_end + 1..p.query_end], "q");
124        assert_eq!(&s[p.query_end + 1..], "f");
125    }
126
127    #[test]
128    fn iri_ref_path_only() {
129        let p = find_iri_ref_positions("/a/b");
130        assert_eq!(p.scheme_end, 0);
131        assert_eq!(p.authority_end, 0);
132        assert_eq!(p.path_end, 4);
133    }
134
135    #[test]
136    fn iri_ref_with_scheme() {
137        let p = find_iri_ref_positions("mailto:a@b");
138        assert_eq!(p.scheme_end, 7);
139    }
140
141    #[test]
142    fn iri_fragment_before_colon_not_scheme() {
143        let p = find_iri_ref_positions("a#b:c");
144        assert_eq!(p.scheme_end, 0);
145    }
146}