uriparser/
lib.rs

1//! These APIs are all pretty rough for now - only use them after verifying their safety
2
3use uriparser_sys::{self, UriPathSegmentA, UriTextRangeA, UriUriA, uriToStringA, uriToStringCharsRequiredA};
4use core::{fmt, ptr, cmp};
5fn bool_to_uri(b: bool) -> uriparser_sys::UriBool {
6    if b {
7        uriparser_sys::URI_TRUE as i32
8    } else {
9        uriparser_sys::URI_FALSE as i32
10    }
11}
12pub fn escape<'o>(text: &[u8], out: &'o mut [u8], replace_space: bool, normalize_line_breaks: bool) -> &'o mut [u8] {
13    assert!(out.len() >= text.len() * if normalize_line_breaks {
14        6
15    } else {
16        3
17    } + 1);
18    let end = unsafe {
19        uriparser_sys::uriEscapeExA(
20            text.as_ptr() as *const _, 
21            text.as_ptr().add(text.len()) as *const _,
22            out.as_mut_ptr() as *mut _, 
23            bool_to_uri(replace_space), 
24            bool_to_uri(normalize_line_breaks)
25        )
26    };
27    let new_len = end as usize - out.as_ptr() as usize;
28    &mut out[..new_len]
29}
30pub enum LineBreak {
31    Cr,
32    Lf,
33    Crlf,
34}
35fn hex(v: u8) -> Option<u8> {
36    match v {
37        b'a'..=b'f' => Some(v - b'a' + 10),
38        b'A'..=b'F' => Some(v - b'A' + 10),
39        b'0'..=b'9' => Some(v - b'0'),
40        _ => None
41    }
42}
43pub fn unescape(text: &mut [u8], replace_plus: bool, line_breaks: LineBreak) -> &mut [u8] {
44    let last_byte = match text {
45        [.., p, a, b] if *p == b'%' => {
46            if let Some(n) = hex(*a).and_then(|a| hex(*b).map(|b| a * 16 + b)) {
47                *p = 0;
48                n
49            } else {
50                core::mem::replace(b, 0)
51            }
52        }
53        [.., l] => core::mem::replace(l, 0),
54        [] => return text,
55    };
56    let last = unsafe {
57        uriparser_sys::uriUnescapeInPlaceExA(text.as_mut_ptr() as *mut _, bool_to_uri(replace_plus), match line_breaks {
58            LineBreak::Cr => uriparser_sys::UriBreakConversionEnum_URI_BR_TO_CR,
59            LineBreak::Lf => uriparser_sys::UriBreakConversionEnum_URI_BR_TO_LF,
60            LineBreak::Crlf => uriparser_sys::UriBreakConversionEnum_URI_BR_TO_CRLF,
61        })
62    };
63    unsafe {
64        *(last as *const u8 as *mut u8) = last_byte;
65    }
66    let new_len = last as usize - text.as_ptr() as usize;
67    &mut text[..=new_len]
68}
69unsafe fn text_range_as_bytes<'a>(range: &UriTextRangeA) -> Option<&'a [u8]> {
70    if range.first.is_null() {
71        None
72    } else {
73        Some(core::slice::from_raw_parts(
74            range.first as *const _,
75            range.afterLast as usize - range.first as usize,
76        ))
77    }    
78}
79#[derive(Debug)]
80pub struct Uri<'a> {
81    raw: UriUriA,
82    marker: core::marker::PhantomData<&'a [u8]>,
83}
84
85#[derive(Debug)]
86pub struct ParseError {
87    errpos: usize,
88}
89
90impl ParseError {
91    pub fn pos(&self) -> usize {
92        self.errpos
93    }
94    fn from_code(code: u32, errpos: usize) -> Self {
95        match code {
96            uriparser_sys::URI_ERROR_SYNTAX => Self { errpos },
97            uriparser_sys::URI_ERROR_NULL => panic!("unexpected error"),
98            uriparser_sys::URI_ERROR_MALLOC => panic!("unexpected error"),
99            uriparser_sys::URI_ERROR_OUTPUT_TOO_LARGE => panic!("unexpected error"),
100            uriparser_sys::URI_ERROR_NOT_IMPLEMENTED => panic!("unexpected error"),
101            uriparser_sys::URI_ERROR_RANGE_INVALID => panic!("unexpected error"),
102            uriparser_sys::URI_ERROR_MEMORY_MANAGER_INCOMPLETE => panic!("unexpected error"),
103            uriparser_sys::URI_ERROR_TOSTRING_TOO_LONG => panic!("unexpected error"),
104            uriparser_sys::URI_ERROR_ADDBASE_REL_BASE => panic!("unexpected error"),
105            uriparser_sys::URI_ERROR_REMOVEBASE_REL_BASE => panic!("unexpected error"),
106            uriparser_sys::URI_ERROR_REMOVEBASE_REL_SOURCE => panic!("unexpected error"),
107            uriparser_sys::URI_ERROR_MEMORY_MANAGER_FAULTY => panic!("unexpected error"),
108            err => unimplemented!("unknown error {}", err),
109        }
110    }
111}
112
113impl<'a> Uri<'a> {
114    pub fn parse(uri: &'a [u8]) -> Result<Self, ParseError> {
115        let mut raw = Default::default();
116        let mut errpos = ptr::null();
117        match unsafe { uriparser_sys::uriParseSingleUriExA(&mut raw, uri.as_ptr() as *const _, uri.as_ptr().add(uri.len()) as *const _, &mut errpos) } as u32
118        {
119            uriparser_sys::URI_SUCCESS => Ok(Self {
120                raw,
121                marker: core::marker::PhantomData,
122            }),
123            err => Err(ParseError::from_code(err, errpos as usize - uri.as_ptr() as usize)),
124        }
125    }
126    pub fn resolve(&self, reference: &Uri<'_>, strict: bool) -> Result<Uri<'static>, ()> {
127        let mut out = Default::default();
128        match unsafe {
129            uriparser_sys::uriAddBaseUriExA(&mut out, &reference.raw, &self.raw, if strict {
130                uriparser_sys::UriResolutionOptionsEnum_URI_RESOLVE_STRICTLY
131            } else {
132                uriparser_sys::UriResolutionOptionsEnum_URI_RESOLVE_IDENTICAL_SCHEME_COMPAT
133            })
134        } as u32 {
135            uriparser_sys::URI_SUCCESS => Ok(Uri {
136                raw: out,
137                marker: core::marker::PhantomData,
138            }),
139            err => Err(())
140        }
141    }
142    pub fn as_relative(&self, base: &Uri<'_>, from_domain_root: bool) -> Result<Uri<'static>, ()> {
143        let mut out = Default::default();
144        match unsafe {
145            uriparser_sys::uriRemoveBaseUriA(&mut out, &self.raw, &base.raw, bool_to_uri(from_domain_root))
146        } as u32 {
147            uriparser_sys::URI_SUCCESS => Ok(Uri {
148                raw: out,
149                marker: core::marker::PhantomData,
150            }),
151            err => Err(())
152        }
153    }
154    pub fn into_owned(mut self) -> Uri<'static> {
155        unsafe {
156            uriparser_sys::uriMakeOwnerA(&mut self.raw);
157        }
158        Uri {
159            raw: self.raw,
160            marker: core::marker::PhantomData,
161        }
162    }
163    pub fn scheme(&self) -> Option<&str> {
164        unsafe { text_range_as_bytes(&self.raw.scheme) }.map(|s| core::str::from_utf8(s).unwrap())
165    }
166    pub fn userinfo(&self) -> Option<&str> {
167        unsafe { text_range_as_bytes(&self.raw.userInfo) }.map(|s| core::str::from_utf8(s).unwrap())
168    }
169    pub fn host(&self) -> Option<&str> {
170        unsafe { text_range_as_bytes(&self.raw.hostText) }.map(|s| core::str::from_utf8(s).unwrap())
171    }
172    pub fn port(&self) -> Option<&str> {
173        unsafe { text_range_as_bytes(&self.raw.portText) }.map(|s| core::str::from_utf8(s).unwrap())
174    }
175    pub fn path(&self) -> Path<'_> {
176        let head = core::ptr::NonNull::new(self.raw.pathHead).map(|ptr| unsafe { &*ptr.as_ptr() });
177        let is_absolute = self.raw.absolutePath == uriparser_sys::URI_TRUE as i32 || (self.host().is_some() && head.is_some());
178        Path {
179            is_absolute,
180            head
181        }
182    }
183
184    pub fn query(&self) -> Option<&str> {
185        unsafe { text_range_as_bytes(&self.raw.query) }.map(|s| core::str::from_utf8(s).unwrap())
186    }
187    pub fn fragment(&self) -> Option<&str> {
188        unsafe { text_range_as_bytes(&self.raw.fragment) }.map(|s| core::str::from_utf8(s).unwrap())
189    }
190}
191impl PartialEq<Uri<'_>> for Uri<'_> {
192    fn eq(&self, other: &Uri<'_>) -> bool {
193        (unsafe {
194            uriparser_sys::uriEqualsUriA(&self.raw, &other.raw)
195        }) as u32 == uriparser_sys::URI_TRUE
196    }
197}
198impl ToString for Uri<'_> {
199    fn to_string(&self) -> String {
200        let mut capacity = 0;
201        assert_eq!(unsafe {
202            uriToStringCharsRequiredA(&self.raw, &mut capacity)
203        } as u32, uriparser_sys::URI_SUCCESS);
204        let mut s = String::with_capacity(capacity as usize);
205        let mut written = 0;
206        assert_eq!(unsafe {
207            uriToStringA(s.as_bytes_mut().as_mut_ptr() as *mut _, &self.raw, capacity, &mut written)
208        } as u32, uriparser_sys::URI_SUCCESS);
209        unsafe {
210            s.as_mut_vec().set_len(written as usize);
211        }
212        s
213    }
214}
215pub struct Path<'a> {
216    is_absolute: bool,
217    head: Option<&'a UriPathSegmentA>,
218}
219use fmt::Write;
220impl fmt::Debug for Path<'_> {
221    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
222        f.write_char('"')?;
223        if self.is_absolute {
224            f.write_char('/')?;
225        }
226        let mut segments = self.segments();
227        if let Some(segment) = segments.next() {
228            write!(f, "{}", segment.escape_debug())?;
229            for segment in segments {
230                f.write_char('/')?;
231                write!(f, "{}", segment.escape_debug())?;
232            }
233        }
234        Ok(())
235    }
236}
237impl cmp::PartialEq<[u8]> for Path<'_> {
238    fn eq(&self, mut other: &[u8]) -> bool {
239        macro_rules! unwrap {
240            ($e:expr) => {match $e {
241                Some(v) => v,
242                None => return false
243            }};
244        }
245        if self.is_absolute {
246            other = unwrap!(other.strip_prefix(b"/"));
247        }
248        let mut segments = self.segments();
249        if let Some(segment) = segments.next() {
250            other = unwrap!(other.strip_prefix(segment.as_bytes()));
251            for segment in segments {
252                other = unwrap!(other.strip_prefix(b"/"));
253                other = unwrap!(other.strip_prefix(segment.as_bytes()));
254            }
255        }
256        true
257    }
258}
259impl cmp::PartialEq<str> for Path<'_> {
260    fn eq(&self, other: &str) -> bool {
261        self.eq(other.as_bytes())
262    }
263}
264impl cmp::PartialEq for Path<'_> {
265    fn eq(&self, other: &Self) -> bool {
266        self.is_absolute.eq(&other.is_absolute) && self.segments().eq(other.segments())
267    }
268}
269impl fmt::Display for Path<'_> {
270    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271        if self.is_absolute {
272            f.write_char('/')?;
273        }
274        let mut segments = self.segments();
275        if let Some(segment) = segments.next() {
276            f.write_str(segment)?;
277            for segment in segments {
278                f.write_char('/')?;
279                f.write_str(segment)?;
280            }
281        }
282        Ok(())
283    }
284}
285impl<'a> Path<'a> {
286    pub fn segments(&self) -> impl Iterator<Item = &'a str> + 'a {
287        let mut next = self.head;
288        core::iter::from_fn(move || {
289            next.map(|segment| {
290                next = core::ptr::NonNull::new(segment.next).map(|ptr| unsafe { &*ptr.as_ptr() });
291                core::str::from_utf8(unsafe { text_range_as_bytes(&segment.text) }.unwrap()).unwrap()
292            })
293        })
294    }
295}
296impl Uri<'_> {
297    fn into_raw(self) -> uriparser_sys::UriUriA {
298        self.raw
299    }
300    unsafe fn from_raw(raw: uriparser_sys::UriUriA) -> Self {
301        Self {
302            raw,
303            marker: core::marker::PhantomData,
304        }
305    }
306    unsafe fn as_raw(&self) -> &uriparser_sys::UriUriA {
307        &self.raw
308    }
309    unsafe fn as_mut_raw(&mut self) -> &mut uriparser_sys::UriUriA {
310        &mut self.raw
311    }
312}
313
314impl Drop for Uri<'_> {
315    fn drop(&mut self) {
316        unsafe {
317            uriparser_sys::uriFreeUriMembersA(&mut self.raw);
318        }
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::Uri;
325
326    #[test]
327    fn parsing() {
328        let uri = Uri::parse(b"https://www.youtube.com/watch?v=HOJ1NVtlnyQ").unwrap();
329        assert_eq!(Some("v=HOJ1NVtlnyQ"), uri.query());
330        Uri::parse(b"foobar://abc.com/ooh##").expect_err("no hashes in fragment");
331    }
332}