anyxml_uri/
uri.rs

1use std::{
2    borrow::{Borrow, Cow},
3    ops::Deref,
4    path::Path,
5    rc::Rc,
6    str::{from_utf8, from_utf8_unchecked},
7    sync::Arc,
8};
9
10use crate::ParseRIError;
11
12#[derive(Debug, PartialEq, Eq, Hash)]
13#[repr(transparent)]
14pub struct URIStr {
15    uri: str,
16}
17
18impl URIStr {
19    fn new(s: &str) -> &Self {
20        unsafe {
21            // # Safety
22            // Since `URIStr` is a transparent newtype of `str`,
23            // the bit patterns are exactly the same and have the same features.
24            &*(s as *const str as *const Self)
25        }
26    }
27
28    /// # Reference
29    /// [5.2.  Relative Resolution](https://datatracker.ietf.org/doc/html/rfc3986#section-5.2)
30    pub fn resolve(&self, reference: &Self) -> URIString {
31        use Component::*;
32
33        assert!(
34            self.is_absolute(),
35            "'{}' is not absolute",
36            self.as_escaped_str()
37        );
38
39        let mut ref_components = reference.components().peekable();
40        if ref_components
41            .next_if(|comp| matches!(comp, Scheme(_)))
42            .is_some()
43        {
44            let mut ret = reference.to_owned();
45            ret.normalize();
46            return ret;
47        }
48
49        if ref_components
50            .next_if(|comp| matches!(comp, Authority { .. }))
51            .is_some()
52        {
53            // has authority
54            let mut ret = URIString {
55                uri: [self.scheme().unwrap(), ":", &reference.uri].concat(),
56            };
57            ret.normalize();
58            return ret;
59        }
60
61        let mut components = self.components().peekable();
62        let mut uri = String::new();
63        if let Some(Scheme(scheme)) = components.next_if(|comp| matches!(comp, Scheme(_))) {
64            uri.push_str(scheme);
65            uri.push(':');
66        }
67        if let Some(Authority {
68            userinfo,
69            host,
70            port,
71        }) = components.next_if(|comp| matches!(comp, Authority { .. }))
72        {
73            uri.push_str("//");
74            if let Some(userinfo) = userinfo {
75                uri.push_str(userinfo);
76                uri.push(':');
77            }
78            uri.push_str(host);
79            if let Some(port) = port {
80                uri.push(':');
81                uri.push_str(port);
82            }
83        }
84
85        if ref_components
86            .next_if(|comp| matches!(comp, RootSegment))
87            .is_some()
88        {
89            uri.push_str(&reference.uri);
90            let mut ret = URIString { uri };
91            ret.normalize();
92            return ret;
93        }
94
95        let mut segments = vec![];
96        let has_root = components
97            .next_if(|comp| matches!(comp, RootSegment))
98            .is_some();
99        let mut has_dot_segment = false;
100        while let Some(Segment(segment)) = components.next_if(|comp| matches!(comp, Segment(_))) {
101            segments.push(segment);
102            has_dot_segment |= segment == "." || segment == "..";
103        }
104        if has_dot_segment {
105            segments = normalize_path_segments(segments.into_iter(), has_root);
106        }
107
108        let mut has_path = false;
109        if let Some(Segment(segment)) = ref_components.next_if(|comp| matches!(comp, Segment(_))) {
110            let mut buf = vec![segment];
111            while let Some(Segment(segment)) =
112                ref_components.next_if(|comp| matches!(comp, Segment(_)))
113            {
114                buf.push(segment);
115            }
116            if buf.len() > 1 || !buf[0].is_empty() {
117                segments.pop();
118                segments.extend(buf);
119                has_path = true;
120            }
121        }
122        build_normalized_path(segments.into_iter(), has_root, &mut uri);
123
124        if let Some(Query(query)) = ref_components.next_if(|comp| matches!(comp, Query(_))) {
125            uri.push('?');
126            uri.push_str(query);
127        } else if !has_path
128            && let Some(Query(query)) = components.next_if(|comp| matches!(comp, Query(_)))
129        {
130            uri.push('?');
131            uri.push_str(query);
132        }
133
134        if let Some(Fragment(fragment)) = ref_components.next() {
135            uri.push('#');
136            uri.push_str(fragment);
137        }
138
139        URIString { uri }
140    }
141
142    pub fn as_escaped_str(&self) -> &str {
143        &self.uri
144    }
145
146    pub fn as_unescaped_str(&self) -> Option<Cow<'_, str>> {
147        unescape(&self.uri).ok()
148    }
149
150    /// # Reference
151    /// [4.3.  Absolute URI](https://datatracker.ietf.org/doc/html/rfc3986#section-4.3)
152    pub fn is_absolute(&self) -> bool {
153        self.scheme().is_some() && self.fragment().is_none()
154    }
155
156    /// # Reference
157    /// [4.2.  Relative Reference](https://datatracker.ietf.org/doc/html/rfc3986#section-4.2)
158    pub fn is_relative(&self) -> bool {
159        self.scheme().is_none()
160    }
161
162    pub fn scheme(&self) -> Option<&str> {
163        let pos = self.uri.bytes().position(is_reserved)?;
164        (self.uri.as_bytes()[pos] == b':').then_some(&self.uri[..pos])
165    }
166
167    pub fn authority(&self) -> Option<&str> {
168        let rem = self
169            .uri
170            .strip_prefix("//")
171            .or_else(|| self.uri.split_once("://").map(|p| p.1))?;
172        Some(rem.split_once('/').map(|p| p.0).unwrap_or(rem))
173    }
174
175    pub fn userinfo(&self) -> Option<&str> {
176        Some(self.authority()?.split_once('@')?.0)
177    }
178
179    pub fn host(&self) -> Option<&str> {
180        let mut auth = self.authority()?;
181        if let Some((_userinfo, rem)) = auth.split_once('@') {
182            auth = rem;
183        }
184        if let Some((host, port)) = auth.rsplit_once(':')
185            && port.bytes().all(|b| b.is_ascii_digit())
186        {
187            auth = host;
188        }
189        Some(auth)
190    }
191
192    pub fn port(&self) -> Option<&str> {
193        let (_, port) = self.authority()?.rsplit_once(':')?;
194        port.bytes().all(|b| b.is_ascii_digit()).then_some(port)
195    }
196
197    pub fn path(&self) -> &str {
198        let mut path = &self.uri;
199        if let Some(scheme) = self.scheme() {
200            // has scheme
201            path = &path[scheme.len() + 1..];
202        }
203        if let Some(rem) = path.strip_prefix("//") {
204            // has authority
205            let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
206            path = &rem[pos..]
207        }
208
209        path.split_once(['?', '#']).map(|p| p.0).unwrap_or(path)
210    }
211
212    pub fn query(&self) -> Option<&str> {
213        let pos = self.uri.bytes().position(|b| b == b'?' || b == b'#')?;
214        if self.uri.as_bytes()[pos] == b'#' {
215            return None;
216        }
217        let query = &self.uri[pos + 1..];
218        let pos = query.bytes().position(|b| b == b'#').unwrap_or(query.len());
219        Some(&query[..pos])
220    }
221
222    pub fn fragment(&self) -> Option<&str> {
223        let pos = self.uri.bytes().position(|b| b == b'#')?;
224        Some(&self.uri[pos + 1..])
225    }
226
227    pub fn components(&self) -> Components<'_> {
228        Components::new(&self.uri)
229    }
230}
231
232impl ToOwned for URIStr {
233    type Owned = URIString;
234
235    fn to_owned(&self) -> Self::Owned {
236        URIString {
237            uri: self.uri.to_owned(),
238        }
239    }
240}
241
242impl From<&URIStr> for URIString {
243    fn from(value: &URIStr) -> Self {
244        value.to_owned()
245    }
246}
247
248impl Clone for Box<URIStr> {
249    fn clone(&self) -> Self {
250        self.as_ref().into()
251    }
252}
253
254macro_rules! impl_boxed_convertion_uri_str {
255    ($( $t:ident ),*) => {
256        $(
257            impl From<&URIStr> for $t<URIStr> {
258                fn from(value: &URIStr) -> Self {
259                    let boxed: $t<str> = value.uri.into();
260                    unsafe {
261                        // # Safety
262                        // Since `URIStr` is a transparent newtype of `str`,
263                        // the bit patterns are exactly the same and have the same features.
264                        std::mem::transmute(boxed)
265                    }
266                }
267            }
268        )*
269    };
270}
271impl_boxed_convertion_uri_str!(Box, Rc, Arc);
272
273#[derive(Debug, Clone, PartialEq, Eq, Hash)]
274#[repr(transparent)]
275pub struct URIString {
276    /// Escaped URI string.
277    ///
278    /// Parts generated from UTF-8 strings can always be converted back
279    /// to the original UTF-8 byte sequence.
280    /// Similarly, the parts generated from Path can probably be converted back
281    /// to the original Path byte sequence.
282    ///
283    /// As a result of resolving URI references, there may be a mixture of parts generated
284    /// from UTF-8 strings and parts generated from Paths, so the whole may not always revert
285    /// to a UTF-8 string or Path byte sequence.
286    uri: String,
287}
288
289impl URIString {
290    pub fn parse(uri: impl AsRef<str>) -> Result<Self, ParseRIError> {
291        fn _parse(uri: &str) -> Result<URIString, ParseRIError> {
292            let uri = escape_except(uri, |b| {
293                b.is_ascii() && (is_reserved(b as u8) || is_unreserved(b as u8))
294            });
295            let mut bytes = uri.as_bytes();
296            parse_uri_reference(&mut bytes)?;
297            if !bytes.is_empty() {
298                Err(ParseRIError::NotTermination)
299            } else {
300                Ok(URIString {
301                    uri: uri.into_owned(),
302                })
303            }
304        }
305        _parse(uri.as_ref())
306    }
307
308    /// # Note
309    /// In the current implementation, paths that cannot be converted to UTF-8 strings
310    /// cannot be handled.  \
311    /// I don't think there will be any problems in most environments, but there may be
312    /// some paths that cannot be handled.
313    pub fn parse_file_path(path: impl AsRef<Path>) -> Result<Self, ParseRIError> {
314        #[cfg(target_family = "unix")]
315        fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
316            let mut path_str = path.to_str().ok_or(ParseRIError::Unsupported)?.to_owned();
317            if (path.is_dir() || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")))
318                && !path_str.ends_with('/')
319            {
320                path_str.push('/');
321            }
322            if path.is_absolute() {
323                path_str.insert_str(0, "file://");
324            }
325            URIString::parse(path_str)
326        }
327        #[cfg(target_family = "windows")]
328        fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
329            use std::path::{Component::*, Prefix::*};
330
331            let mut path_str = String::new();
332            let mut verbatim = false;
333            for comp in path.components() {
334                match comp {
335                    Prefix(prefix) => match prefix.kind() {
336                        Verbatim(root) => {
337                            path_str.push_str("file:///");
338                            path_str.push_str(
339                                &root
340                                    .to_str()
341                                    .ok_or(ParseRIError::Unsupported)?
342                                    .replace('/', "%2F"),
343                            );
344                            verbatim = true;
345                        }
346                        VerbatimUNC(server, root) => {
347                            path_str.push_str("file://");
348                            path_str.push_str(
349                                &server
350                                    .to_str()
351                                    .ok_or(ParseRIError::Unsupported)?
352                                    .replace('/', "%2F"),
353                            );
354                            path_str.push('/');
355                            path_str.push_str(
356                                &root
357                                    .to_str()
358                                    .ok_or(ParseRIError::Unsupported)?
359                                    .replace('/', "%2F"),
360                            );
361                            verbatim = true;
362                        }
363                        VerbatimDisk(letter) => {
364                            path_str.push_str("file:");
365                            path_str.push(letter as char);
366                            path_str.push(':');
367                            verbatim = true;
368                        }
369                        DeviceNS(device) => {
370                            path_str.push_str("file:///");
371                            path_str.push_str(device.to_str().ok_or(ParseRIError::Unsupported)?);
372                        }
373                        UNC(server, root) => {
374                            path_str.push_str("file://");
375                            path_str.push_str(server.to_str().ok_or(ParseRIError::Unsupported)?);
376                            path_str.push('/');
377                            path_str.push_str(root.to_str().ok_or(ParseRIError::Unsupported)?);
378                        }
379                        Disk(letter) => {
380                            path_str.push_str("file:");
381                            path_str.push(letter as char);
382                            path_str.push(':');
383                        }
384                    },
385                    RootDir => {}
386                    CurDir => path_str.push_str("/."),
387                    ParentDir => path_str.push_str("/.."),
388                    Normal(segment) => {
389                        path_str.push('/');
390                        let segment = segment.to_str().ok_or(ParseRIError::Unsupported)?;
391                        if verbatim {
392                            path_str.push_str(&segment.replace('/', "%2F"));
393                        } else {
394                            path_str.push_str(segment);
395                        }
396                    }
397                }
398            }
399            if (path.is_dir()
400                || (path.as_os_str().as_encoded_bytes().ends_with(b"\\")
401                    || (!verbatim && path.as_os_str().as_encoded_bytes().ends_with(b"/"))))
402                && !path_str.ends_with('/')
403            {
404                path_str.push('/');
405            }
406            URIString::parse(path_str)
407        }
408        #[cfg(all(not(target_family = "unix"), not(target_family = "windows")))]
409        fn _parse_file_path(path: &Path) -> Result<URIString, ParseRIError> {
410            todo!()
411        }
412        _parse_file_path(path.as_ref())
413    }
414
415    pub fn into_boxed_uri_str(self) -> Box<URIStr> {
416        Box::from(self.as_ref())
417    }
418
419    /// # Reference
420    /// [6.2.2.  Syntax-Based Normalization](https://datatracker.ietf.org/doc/html/rfc3986#section-6.2.2).
421    pub fn normalize(&mut self) {
422        use Component::*;
423
424        let mut uri = String::with_capacity(self.uri.len());
425        let mut paths = vec![];
426        let mut query = None;
427        let mut fragment = None;
428        let mut has_root = false;
429        for comp in self.components() {
430            match comp {
431                Scheme(scheme) => {
432                    uri.push_str(&scheme.to_ascii_lowercase());
433                    uri.push(':');
434                }
435                Authority {
436                    userinfo,
437                    host,
438                    port,
439                } => {
440                    uri.push_str("//");
441                    if let Some(userinfo) = userinfo {
442                        uri.push_str(userinfo);
443                        uri.push('@');
444                    }
445                    uri.push_str(host);
446                    if let Some(port) = port {
447                        uri.push(':');
448                        uri.push_str(port);
449                    }
450                }
451                RootSegment => has_root = true,
452                Segment(segment) => paths.push(segment),
453                Query(q) => query = Some(q),
454                Fragment(f) => fragment = Some(f),
455            }
456        }
457        build_normalized_path(paths.into_iter(), has_root, &mut uri);
458        if let Some(query) = query {
459            uri.push('?');
460            uri.push_str(query);
461        }
462        if let Some(fragment) = fragment {
463            uri.push('#');
464            uri.push_str(fragment);
465        }
466        self.uri = uri;
467    }
468}
469
470impl AsRef<URIStr> for URIString {
471    fn as_ref(&self) -> &URIStr {
472        URIStr::new(&self.uri)
473    }
474}
475
476impl Borrow<URIStr> for URIString {
477    fn borrow(&self) -> &URIStr {
478        self.as_ref()
479    }
480}
481
482impl Deref for URIString {
483    type Target = URIStr;
484
485    fn deref(&self) -> &Self::Target {
486        self.as_ref()
487    }
488}
489
490macro_rules! impl_convertion_uri_string {
491    ($( $t:ty ),*) => {
492        $(
493            impl From<URIString> for $t {
494                fn from(value: URIString) -> $t {
495                    From::from(value.as_ref())
496                }
497            }
498        )*
499    };
500}
501impl_convertion_uri_string!(Box<URIStr>, Rc<URIStr>, Arc<URIStr>);
502
503fn build_normalized_path<'a>(
504    segments: impl Iterator<Item = &'a str>,
505    has_root: bool,
506    buffer: &mut String,
507) {
508    let segments = normalize_path_segments(segments, has_root);
509    if has_root {
510        buffer.push('/');
511    }
512    for (i, seg) in segments.into_iter().enumerate() {
513        if i > 0 {
514            buffer.push('/');
515        }
516        buffer.push_str(seg);
517    }
518}
519
520fn normalize_path_segments<'a>(
521    segments: impl Iterator<Item = &'a str>,
522    has_root: bool,
523) -> Vec<&'a str> {
524    let mut stack = vec![];
525    let mut last_dot = false;
526    for seg in segments {
527        if seg == "." {
528            // no op
529            last_dot = true;
530        } else if seg == ".." {
531            if !stack.is_empty() && stack.last() != Some(&"..") {
532                stack.pop();
533            } else if !has_root {
534                stack.push(seg);
535            }
536            last_dot = true;
537        } else {
538            stack.push(seg);
539            last_dot = false;
540        }
541    }
542
543    if last_dot {
544        stack.push("");
545    }
546
547    stack
548}
549
550/// # Reference
551/// [4.1.  URI Reference](https://datatracker.ietf.org/doc/html/rfc3986#section-4.1)
552///
553/// ```text
554/// URI-reference = URI / relative-ref
555/// ```
556fn parse_uri_reference(b: &mut &[u8]) -> Result<(), ParseRIError> {
557    if b.is_empty() || matches!(b[0], b'/' | b'?' | b'#') {
558        // If `b` is an empty string or starts with either '/', '?' or '#',
559        // it is definitely 'relative-ref'.
560        parse_relative_ref(b)
561    } else {
562        // Otherwise, it is necessary to distinguish between `URI` and `relative-ref`
563        // starting with `relative-part` that matches `path-noscheme`.
564
565        if !b[0].is_ascii_alphabetic() {
566            // Since `scheme` begins with at least one `ALPHA`,
567            // if it does not, it is definitely `irelative-ref`.
568            parse_relative_ref(b)
569        } else {
570            // The characters that can be used in `scheme` are very limited,
571            // so it might be quicker to try parsing `scheme` to distinguish between them?
572            // [25] scheme ::= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
573            if let Some(&c) = b
574                .iter()
575                .find(|&&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
576                && c == b':'
577            {
578                parse_uri(b)
579            } else {
580                parse_relative_ref(b)
581            }
582        }
583    }
584}
585
586/// # Reference
587/// [3.  Syntax Components](https://datatracker.ietf.org/doc/html/rfc3986#section-3)
588///
589/// ```text
590/// URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
591/// ```
592fn parse_uri(b: &mut &[u8]) -> Result<(), ParseRIError> {
593    parse_scheme(b)?;
594    *b = b
595        .strip_prefix(b":")
596        .ok_or(ParseRIError::InvalidSchemeSeparator)?;
597    parse_hier_part(b)?;
598    if let Some(query) = b.strip_prefix(b"?") {
599        *b = query;
600        parse_query(b)?;
601    }
602    if let Some(fragment) = b.strip_prefix(b"#") {
603        *b = fragment;
604        parse_fragment(b)?;
605    }
606    Ok(())
607}
608
609/// # Reference
610/// [3.1.  Scheme](https://datatracker.ietf.org/doc/html/rfc3986#section-3.1)
611///
612/// ```text
613/// scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
614/// ```
615fn parse_scheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
616    if b.is_empty() || !b[0].is_ascii_alphabetic() {
617        return Err(ParseRIError::InvalidScheme);
618    }
619    let pos = b
620        .iter()
621        .position(|&b| !b.is_ascii_alphanumeric() && !matches!(b, b'+' | b'-' | b'.'))
622        .unwrap_or(b.len());
623    *b = &b[pos..];
624    Ok(())
625}
626
627/// # Reference
628/// [3.  Syntax Components](https://datatracker.ietf.org/doc/html/rfc3986#section-3)
629///
630/// ```text
631/// hier-part   = "//" authority path-abempty
632///             / path-absolute
633///             / path-rootless
634///             / path-empty
635/// ```
636fn parse_hier_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
637    if let Some(rem) = b.strip_prefix(b"/") {
638        // If `b` starts with '/', `b` starts with 'authority' or `path-absolute`,
639
640        if let Some(rem) = rem.strip_prefix(b"/") {
641            // If `b` starts with '//', it should be followed by 'authority'.
642            // This is because 'path-absolute' is followed by exactly one '/' at the beginning
643            // and optionally 'segment-nz', so there cannot be two consecutive '/' characters.
644            *b = rem;
645            parse_authority(b)?;
646            parse_path_abempty(b)
647        } else {
648            // path-absolute = "/" [ segment-nz *( "/" segment ) ]
649            // segment-nz    = 1*pchar
650            parse_path_absolute(b)
651        }
652    } else {
653        // otherwise, `b` starts with 'path-rootless' or 'path-empty'
654        let mut dum = *b;
655        if parse_pchar(&mut dum).is_ok() {
656            // If 'path-rootless' follows, one or more 'pchar' should follow.
657            parse_path_rootless(b)
658        } else {
659            // If not, it is 'path-empty'.
660            // Since 'path-empty' is an empty string,
661            // we can simply return `Ok` without doing anything.
662            Ok(())
663        }
664    }
665}
666
667/// # Reference
668/// [3.2.  Authority](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2)
669///
670/// ```text
671/// authority   = [ userinfo "@" ] host [ ":" port ]
672/// ```
673fn parse_authority(b: &mut &[u8]) -> Result<(), ParseRIError> {
674    if b.starts_with(b"[") {
675        // If `b` starts with '[', it is definitely an `host` that matches `IP-literal`.
676        parse_ip_literal(b)?;
677        if let Some(rem) = b.strip_prefix(b":") {
678            *b = rem;
679            parse_port(b)?;
680        }
681        return Ok(());
682    }
683
684    // If not, it may start with `userinfo`, or it may start with `host`
685    // that matches `IPv4address` or `reg-name`.
686    //
687    // If it is either `IPv4address` or `reg-name`, there is no need to consider `IPv4address`.
688    // This is because `reg-name` includes `IPv4address`. More specifically, since `unreserved`
689    // contains `DIGIT` and `.`, `IPv4address` can be regarded as a specific sequence of `unreserved`.
690    //
691    // `userinfo` and `reg-name` are rules that share characters other than colons.
692    // Therefore, they can be distinguished using the following algorithm.
693    //
694    // 1. Increment the counter as long as it matches `userinfo`.
695    // 2. If the first ":" is encountered, note its position.
696    // 3. Determine the matching rule according to the characters that did not match `userinfo`.
697    //      i.   If it is "@", the string seen so far is `userinfo`.
698    //      ii.  If it is "[" , then an `host` matching "IP-literal" should start there,
699    //           but since there is no "@" immediately before it, it is an error.
700    //      iii. In other cases, if the position of ":" is noted, the string before it is `host`;
701    //                           if not, all strings seen so far are `host`.
702    //
703    // userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
704    //
705    // reg-name    = *( unreserved / pct-encoded / sub-delims )
706    // unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
707    //
708    // IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
709    let mut colon = usize::MAX;
710    let mut now = 0;
711    let mut t = *b;
712    while !t.is_empty() {
713        let pos = t
714            .iter()
715            .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b'%')
716            .unwrap_or(t.len());
717        t = &t[pos..];
718        now += pos;
719        if let Some(rem) = t.strip_prefix(b":") {
720            now += 1;
721            t = rem;
722            colon = colon.min(now);
723        } else {
724            break;
725        }
726    }
727
728    debug_assert_eq!(now, b.len() - t.len());
729
730    if let Some(rem) = t.strip_prefix(b"@") {
731        *b = rem;
732        parse_host(b)?;
733        if let Some(rem) = b.strip_prefix(b":") {
734            *b = rem;
735            parse_port(b)?;
736        }
737        Ok(())
738    } else if t.starts_with(b"[") {
739        Err(ParseRIError::InvalidAuthority)
740    } else if colon < usize::MAX {
741        *b = &b[colon + 1..];
742        parse_port(b)
743    } else {
744        *b = t;
745        Ok(())
746    }
747}
748
749// This function has no use.
750// /// # Reference
751// /// [3.2.1.  User Information](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1)
752// ///
753// /// ```text
754// /// userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
755// /// ```
756// fn parse_userinfo(b: &mut &[u8]) -> Result<(), ParseRIError> {
757//     todo!()
758// }
759
760/// # Reference
761/// [3.2.2.  Host]
762///
763/// ```text
764/// host        = IP-literal / IPv4address / reg-name
765/// ```
766fn parse_host(b: &mut &[u8]) -> Result<(), ParseRIError> {
767    if b.starts_with(b"[") {
768        parse_ip_literal(b)
769    } else {
770        // Since `IPv4address` is covered by `reg-name`, it does not need to be considered.
771        parse_reg_name(b)
772    }
773}
774
775/// # Reference
776/// [3.2.2.  Host]
777///
778/// ```text
779/// IP-literal  = "[" ( IPv6address / IPvFuture  ) "]"
780/// ```
781fn parse_ip_literal(b: &mut &[u8]) -> Result<(), ParseRIError> {
782    *b = b.strip_prefix(b"[").ok_or(ParseRIError::InvalidIPLiteral)?;
783    if !b.is_empty() && b[0].eq_ignore_ascii_case(&b'v') {
784        parse_ipv_future(b)?;
785    } else {
786        parse_ipv6_address(b)?;
787    }
788    *b = b.strip_prefix(b"]").ok_or(ParseRIError::InvalidIPLiteral)?;
789    Ok(())
790}
791
792/// # Reference
793/// [3.2.2.  Host]
794///
795/// ```text
796/// IPvFuture   = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
797/// ```
798fn parse_ipv_future(b: &mut &[u8]) -> Result<(), ParseRIError> {
799    if b.is_empty() || !b[0].eq_ignore_ascii_case(&b'v') {
800        return Err(ParseRIError::InvalidIPvFuture);
801    }
802    *b = &b[1..];
803    let pos = b
804        .iter()
805        .position(|&b| !b.is_ascii_hexdigit())
806        .unwrap_or(b.len());
807    if !(1..=b.len() - 2).contains(&pos) {
808        return Err(ParseRIError::InvalidIPvFuture);
809    }
810    *b = &b[pos..];
811    *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidIPvFuture)?;
812    let pos = b
813        .iter()
814        .position(|&b| !is_unreserved(b) && !is_sub_delims(b) && b != b':')
815        .unwrap_or(b.len());
816    if pos == 0 {
817        return Err(ParseRIError::InvalidIPvFuture);
818    }
819    *b = &b[pos..];
820    Ok(())
821}
822
823/// # Reference
824/// [3.2.2.  Host]
825///
826/// ```text
827/// IPv6address =                            6( h16 ":" ) ls32
828///             /                       "::" 5( h16 ":" ) ls32
829///             / [               h16 ] "::" 4( h16 ":" ) ls32
830///             / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
831///             / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
832///             / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
833///             / [ *4( h16 ":" ) h16 ] "::"              ls32
834///             / [ *5( h16 ":" ) h16 ] "::"              h16
835///             / [ *6( h16 ":" ) h16 ] "::"
836///  ls32       = ( h16 ":" h16 ) / IPv4address
837///             ; least-significant 32 bits of address
838///  h16        = 1*4HEXDIG
839///             ; 16 bits of address represented in hexadecimal
840/// ```
841fn parse_ipv6_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
842    let mut cnt = 1;
843    let mut omit = false;
844    if let Some(rem) = b.strip_prefix(b":") {
845        *b = rem;
846        omit = true;
847    } else {
848        parse_h16(b)?;
849    }
850
851    while cnt + (omit as i32) < 8
852        && let Some(rem) = b.strip_prefix(b":")
853    {
854        *b = rem;
855        if b.starts_with(b":") {
856            if omit {
857                return Err(ParseRIError::InvalidIPv6address);
858            }
859            omit = true;
860            cnt += 1;
861            continue;
862        }
863
864        // It's not a smart approach, but it'll probably work...
865        //
866        // Checking `h16` first will not work because it cannot be distinguished
867        // from the first octet of the IPv4 address.
868        //
869        // Checking the positions where ':' and '.' appear also seems unlikely to work,
870        // considering cases where such characters appear in the segments of the following paths.
871        let mut dum = *b;
872        if parse_ipv4_address(&mut dum).is_ok() {
873            *b = dum;
874            // An IPv4 address consumes two hextets.
875            cnt += 2;
876            // An IPv4 address only appears at the end.
877            break;
878        } else if !b.is_empty() && b[0].is_ascii_hexdigit() {
879            parse_h16(b)?;
880        }
881    }
882
883    // If "::" is included, some hextets may be omitted, resulting in fewer than eight.
884    // Otherwise, exactly eight hextets are required.
885    if (omit && cnt <= 8) || (!omit && cnt == 8) {
886        Ok(())
887    } else {
888        Err(ParseRIError::InvalidIPv6address)
889    }
890}
891
892/// # Reference
893/// [3.2.2.  Host]
894///
895/// ```text
896///  h16        = 1*4HEXDIG
897///             ; 16 bits of address represented in hexadecimal
898/// ```
899fn parse_h16(b: &mut &[u8]) -> Result<(), ParseRIError> {
900    let pos = b
901        .iter()
902        .position(|&b| !b.is_ascii_hexdigit())
903        .unwrap_or(b.len());
904    if pos == 0 {
905        Err(ParseRIError::InvalidH16)
906    } else {
907        *b = &b[pos.min(4)..];
908        Ok(())
909    }
910}
911
912/// # Reference
913/// [3.2.2.  Host]
914///
915/// ```text
916/// IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
917/// dec-octet   = DIGIT                 ; 0-9
918///             / %x31-39 DIGIT         ; 10-99
919///             / "1" 2DIGIT            ; 100-199
920///             / "2" %x30-34 DIGIT     ; 200-249
921///             / "25" %x30-35          ; 250-255
922/// ```
923fn parse_ipv4_address(b: &mut &[u8]) -> Result<(), ParseRIError> {
924    parse_dec_octet(b)?;
925    for _ in 0..3 {
926        *b = b.strip_prefix(b".").ok_or(ParseRIError::InvalidDecOctet)?;
927        parse_dec_octet(b)?;
928    }
929    Ok(())
930}
931fn parse_dec_octet(b: &mut &[u8]) -> Result<(), ParseRIError> {
932    let len = match b {
933        [b'2', b'5', b'0'..=b'5', ..] => 3,
934        [b'2', b'0'..=b'4', b'0'..=b'9', ..] => 3,
935        [b'1', b'0'..=b'9', b'0'..=b'9', ..] => 3,
936        [b'1'..=b'9', b'0'..=b'9', ..] => 2,
937        [b'0'..=b'9', ..] => 1,
938        _ => return Err(ParseRIError::InvalidDecOctet),
939    };
940    *b = &b[len..];
941    Ok(())
942}
943
944/// # Reference
945/// [3.2.2.  Host]
946///
947/// ```text
948/// reg-name    = *( unreserved / pct-encoded / sub-delims )
949/// ```
950fn parse_reg_name(b: &mut &[u8]) -> Result<(), ParseRIError> {
951    // pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
952    // reg-name      = pchar - (":" | "@")
953    while !b.is_empty() && !matches!(b[0], b':' | b'@') && parse_pchar(b).is_ok() {}
954    Ok(())
955}
956
957/// # Reference
958/// [3.2.3.  Port](https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.3)
959///
960/// ```text
961/// port        = *DIGIT
962/// ```
963fn parse_port(b: &mut &[u8]) -> Result<(), ParseRIError> {
964    let pos = b
965        .iter()
966        .position(|&b| !b.is_ascii_digit())
967        .unwrap_or(b.len());
968    *b = &b[pos..];
969    Ok(())
970}
971
972/// # Reference
973/// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
974///
975/// ```text
976/// path-abempty  = *( "/" segment )
977/// ```
978fn parse_path_abempty(b: &mut &[u8]) -> Result<(), ParseRIError> {
979    while let Some(rem) = b.strip_prefix(b"/") {
980        *b = rem;
981        parse_segment(b)?;
982    }
983    Ok(())
984}
985
986/// # Reference
987/// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
988///
989/// ```text
990/// path-absolute = "/" [ segment-nz *( "/" segment ) ]
991/// ```
992fn parse_path_absolute(b: &mut &[u8]) -> Result<(), ParseRIError> {
993    *b = b
994        .strip_prefix(b"/")
995        .ok_or(ParseRIError::InvalidPathAbsolute)?;
996    if parse_segment_nz(b).is_ok() {
997        while let Some(rem) = b.strip_prefix(b"/") {
998            *b = rem;
999            parse_segment(b)?;
1000        }
1001    }
1002    Ok(())
1003}
1004
1005/// # Reference
1006/// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
1007///
1008/// ```text
1009/// path-noscheme = segment-nz-nc *( "/" segment )
1010/// ```
1011fn parse_path_noscheme(b: &mut &[u8]) -> Result<(), ParseRIError> {
1012    parse_segment_nz_nc(b)?;
1013    while let Some(rem) = b.strip_prefix(b"/") {
1014        *b = rem;
1015        parse_segment(b)?;
1016    }
1017    Ok(())
1018}
1019
1020/// # Reference
1021/// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
1022///
1023/// ```text
1024/// path-rootless = segment-nz *( "/" segment )
1025/// ```
1026fn parse_path_rootless(b: &mut &[u8]) -> Result<(), ParseRIError> {
1027    parse_segment_nz(b)?;
1028    while let Some(rem) = b.strip_prefix(b"/") {
1029        *b = rem;
1030        parse_segment(b)?;
1031    }
1032    Ok(())
1033}
1034
1035// This is not necessary because this does nothing.
1036// /// # Reference
1037// /// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
1038// ///
1039// /// ```text
1040// /// path-empty    = 0<pchar>
1041// /// ```
1042// fn parse_path_empty(b: &mut &[u8]) -> Result<(), ParseRIError> {
1043//     todo!()
1044// }
1045
1046/// # Reference
1047/// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
1048///
1049/// ```text
1050/// segment       = *pchar
1051/// ```
1052fn parse_segment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1053    while parse_pchar(b).is_ok() {}
1054    Ok(())
1055}
1056
1057/// # Reference
1058/// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
1059///
1060/// ```text
1061/// segment-nz    = 1*pchar
1062/// ```
1063fn parse_segment_nz(b: &mut &[u8]) -> Result<(), ParseRIError> {
1064    parse_pchar(b)?;
1065    while parse_pchar(b).is_ok() {}
1066    Ok(())
1067}
1068
1069/// # Reference
1070/// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
1071///
1072/// ```text
1073/// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
1074///                     ; non-zero-length segment without any colon ":"
1075/// ```
1076fn parse_segment_nz_nc(b: &mut &[u8]) -> Result<(), ParseRIError> {
1077    if b.is_empty() || b[0] == b':' || parse_pchar(b).is_err() {
1078        return Err(ParseRIError::InvalidSegmentNzNc);
1079    }
1080    while !b.is_empty() && b[0] != b':' && parse_pchar(b).is_ok() {}
1081    Ok(())
1082}
1083
1084/// # Reference
1085/// [3.3.  Path](https://datatracker.ietf.org/doc/html/rfc3986#section-3.3)
1086///
1087/// ```text
1088/// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
1089/// ```
1090fn parse_pchar(b: &mut &[u8]) -> Result<(), ParseRIError> {
1091    if b.is_empty() {
1092        return Err(ParseRIError::InvalidPChar);
1093    }
1094
1095    if is_unreserved(b[0]) || is_sub_delims(b[0]) || matches!(b[0], b':' | b'@') {
1096        *b = &b[1..];
1097        Ok(())
1098    } else if b.len() >= 3 && b[0] == b'%' && b[1].is_ascii_hexdigit() && b[2].is_ascii_hexdigit() {
1099        *b = &b[3..];
1100        Ok(())
1101    } else {
1102        Err(ParseRIError::InvalidPChar)
1103    }
1104}
1105
1106/// # Reference
1107/// [3.4.  Query](https://datatracker.ietf.org/doc/html/rfc3986#section-3.4)
1108///
1109/// ```text
1110/// query       = *( pchar / "/" / "?" )
1111/// ```
1112fn parse_query(b: &mut &[u8]) -> Result<(), ParseRIError> {
1113    loop {
1114        if let Some(rem) = b.strip_prefix(b"/") {
1115            *b = rem;
1116        } else if let Some(rem) = b.strip_prefix(b"?") {
1117            *b = rem;
1118        } else if parse_pchar(b).is_ok() {
1119            // no op
1120        } else {
1121            break Ok(());
1122        }
1123    }
1124}
1125
1126/// # Reference
1127/// [3.5.  Fragment](https://datatracker.ietf.org/doc/html/rfc3986#section-3.5)
1128///
1129/// ```text
1130/// fragment    = *( pchar / "/" / "?" )
1131/// ```
1132fn parse_fragment(b: &mut &[u8]) -> Result<(), ParseRIError> {
1133    loop {
1134        if let Some(rem) = b.strip_prefix(b"/") {
1135            *b = rem;
1136        } else if let Some(rem) = b.strip_prefix(b"?") {
1137            *b = rem;
1138        } else if parse_pchar(b).is_ok() {
1139            // no op
1140        } else {
1141            break Ok(());
1142        }
1143    }
1144}
1145
1146/// # Reference
1147/// [4.2.  Relative Reference](https://datatracker.ietf.org/doc/html/rfc3986#section-4.2)
1148///
1149/// ```text
1150/// relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
1151/// ```
1152fn parse_relative_ref(b: &mut &[u8]) -> Result<(), ParseRIError> {
1153    parse_relative_part(b)?;
1154    if let Some(query) = b.strip_prefix(b"?") {
1155        *b = query;
1156        parse_query(b)?;
1157    }
1158    if let Some(fragment) = b.strip_prefix(b"#") {
1159        *b = fragment;
1160        parse_fragment(b)?;
1161    }
1162    Ok(())
1163}
1164
1165/// # Reference
1166/// [4.2.  Relative Reference](https://datatracker.ietf.org/doc/html/rfc3986#section-4.2)
1167///
1168/// ```text
1169/// relative-part = "//" authority path-abempty
1170///               / path-absolute
1171///               / path-noscheme
1172///               / path-empty
1173/// ```
1174fn parse_relative_part(b: &mut &[u8]) -> Result<(), ParseRIError> {
1175    if let Some(rem) = b.strip_prefix(b"/") {
1176        if let Some(rem) = rem.strip_prefix(b"/") {
1177            *b = rem;
1178            parse_authority(b)?;
1179            parse_path_abempty(b)
1180        } else {
1181            parse_path_absolute(b)
1182        }
1183    } else {
1184        let orig = b.len();
1185        let ret = parse_path_noscheme(b);
1186        // If no characters have been consumed, it matches `path-empty` and returns `Ok`.
1187        if orig == b.len() { Ok(()) } else { ret }
1188    }
1189}
1190
1191/// # Reference
1192/// [2.2.  Reserved Characters](https://datatracker.ietf.org/doc/html/rfc3986#section-2.2)
1193///
1194/// ```text
1195/// reserved    = gen-delims / sub-delims
1196/// ```
1197fn is_reserved(b: u8) -> bool {
1198    is_gen_delims(b) || is_sub_delims(b)
1199}
1200
1201/// # Reference
1202/// [2.2.  Reserved Characters](https://datatracker.ietf.org/doc/html/rfc3986#section-2.2)
1203///
1204/// ```text
1205/// gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1206/// ```
1207fn is_gen_delims(b: u8) -> bool {
1208    matches!(b, b':' | b'/' | b'?' | b'#' | b'[' | b']' | b'@')
1209}
1210
1211/// # Reference
1212/// [2.2.  Reserved Characters](https://datatracker.ietf.org/doc/html/rfc3986#section-2.2)
1213///
1214/// ```text
1215/// sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
1216/// ```
1217fn is_sub_delims(b: u8) -> bool {
1218    matches!(
1219        b,
1220        b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';' | b'='
1221    )
1222}
1223
1224/// # Reference
1225/// [2.3.  Unreserved Characters](https://datatracker.ietf.org/doc/html/rfc3986#section-2.3)
1226///
1227/// ```text
1228/// unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
1229/// ```
1230fn is_unreserved(b: u8) -> bool {
1231    b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~')
1232}
1233
1234const LUT_BYTES: [u8; 256 * 3] = {
1235    const fn digit_to_hex_char(b: u8) -> u8 {
1236        if b < 10 { b + b'0' } else { b - 10 + b'A' }
1237    }
1238    let mut buf = [0u8; 256 * 3];
1239    let mut i = 0;
1240    while i < 256 {
1241        buf[3 * i] = b'%';
1242        let hi = (i as u8 >> 4) & 0xF;
1243        let lo = i as u8 & 0xF;
1244        buf[3 * i + 1] = digit_to_hex_char(hi);
1245        buf[3 * i + 2] = digit_to_hex_char(lo);
1246        i += 1;
1247    }
1248    buf
1249};
1250const LUT: &str = unsafe {
1251    // # Safety
1252    // `LUT_BYTES` contains only '%' and ASCII hex digit characters.
1253    // Therefore, UTF-8 validation won't fail.
1254    from_utf8_unchecked(&LUT_BYTES)
1255};
1256
1257pub fn escape(s: &str) -> Cow<'_, str> {
1258    escape_except(s, |_| false)
1259}
1260
1261pub fn escape_bytes(b: &[u8]) -> Cow<'_, [u8]> {
1262    escape_bytes_except(b, |_| false)
1263}
1264
1265pub fn escape_except(s: &str, is_except: impl Fn(char) -> bool) -> Cow<'_, str> {
1266    let cap = s
1267        .chars()
1268        .filter_map(|c| (!is_except(c)).then_some(c.len_utf8() * 2))
1269        .sum::<usize>();
1270    if cap == 0 {
1271        return Cow::Borrowed(s);
1272    }
1273    let mut encode = [0; 6];
1274    let mut buf = String::with_capacity(s.len() + cap);
1275    for c in s.chars() {
1276        if is_except(c) {
1277            buf.push(c);
1278        } else {
1279            let encoded = c.encode_utf8(&mut encode);
1280            for b in encoded.bytes() {
1281                let index = b as usize * 3;
1282                buf.push_str(&LUT[index..index + 3]);
1283            }
1284        }
1285    }
1286    Cow::Owned(buf)
1287}
1288
1289pub fn escape_bytes_except(b: &[u8], is_except: impl Fn(u8) -> bool) -> Cow<'_, [u8]> {
1290    let cap = b.iter().copied().filter(|&b| !is_except(b)).count() * 2;
1291    if cap == 0 {
1292        return Cow::Borrowed(b);
1293    }
1294    let mut buf = Vec::with_capacity(b.len() + cap);
1295    for &b in b {
1296        if is_except(b) {
1297            buf.push(b);
1298        } else {
1299            let index = b as usize * 3;
1300            buf.extend_from_slice(&LUT_BYTES[index..index + 3]);
1301        }
1302    }
1303    Cow::Owned(buf)
1304}
1305
1306pub enum URIUnescapeError {
1307    InvalidEscape,
1308    Utf8Error(std::str::Utf8Error),
1309}
1310
1311impl From<std::str::Utf8Error> for URIUnescapeError {
1312    fn from(value: std::str::Utf8Error) -> Self {
1313        Self::Utf8Error(value)
1314    }
1315}
1316
1317pub fn unescape(s: &str) -> Result<Cow<'_, str>, URIUnescapeError> {
1318    if !s.contains('%') {
1319        return Ok(Cow::Borrowed(s));
1320    }
1321
1322    let mut split = s.split('%');
1323    let mut buf = String::with_capacity(s.len());
1324    buf.push_str(split.next().unwrap());
1325    let mut bytes = vec![];
1326    for chunk in split {
1327        if chunk.len() < 2 {
1328            return Err(URIUnescapeError::InvalidEscape);
1329        }
1330        let byte =
1331            u8::from_str_radix(&chunk[..2], 16).map_err(|_| URIUnescapeError::InvalidEscape)?;
1332        bytes.push(byte);
1333
1334        if chunk.len() > 2 {
1335            buf.push_str(from_utf8(&bytes)?);
1336            buf.push_str(&chunk[2..]);
1337            bytes.clear();
1338        }
1339    }
1340
1341    if !bytes.is_empty() {
1342        buf.push_str(from_utf8(&bytes)?);
1343    }
1344    Ok(Cow::Owned(buf))
1345}
1346
1347pub fn unescape_bytes(b: &[u8]) -> Result<Cow<'_, [u8]>, URIUnescapeError> {
1348    if !b.contains(&b'%') {
1349        return Ok(Cow::Borrowed(b));
1350    }
1351
1352    let mut split = b.split(|&b| b == b'%');
1353    let mut buf = Vec::with_capacity(b.len());
1354    buf.extend_from_slice(split.next().unwrap());
1355
1356    fn hexdigit_to_byte(hex: u8) -> u8 {
1357        if hex.is_ascii_digit() {
1358            hex - b'0'
1359        } else if hex.is_ascii_uppercase() {
1360            hex - b'A' + 10
1361        } else {
1362            hex - b'a' + 10
1363        }
1364    }
1365    for chunk in split {
1366        if chunk.len() < 2 || !chunk[0].is_ascii_hexdigit() || !chunk[1].is_ascii_hexdigit() {
1367            return Err(URIUnescapeError::InvalidEscape);
1368        }
1369        let hi = hexdigit_to_byte(chunk[0]);
1370        let lo = hexdigit_to_byte(chunk[1]);
1371        buf.push((hi << 4) | lo);
1372    }
1373    Ok(Cow::Owned(buf))
1374}
1375
1376#[derive(Debug, Clone, Copy)]
1377enum DecomposeState {
1378    Scheme,
1379    Authority,
1380    Root,
1381    Path,
1382    Query,
1383    Fragment,
1384    Finish,
1385}
1386
1387pub struct Components<'a> {
1388    state: DecomposeState,
1389    uri: &'a str,
1390}
1391
1392impl Components<'_> {
1393    fn new(uri: &str) -> Components<'_> {
1394        Components {
1395            state: DecomposeState::Scheme,
1396            uri,
1397        }
1398    }
1399}
1400
1401impl<'a> Iterator for Components<'a> {
1402    type Item = Component<'a>;
1403
1404    fn next(&mut self) -> Option<Self::Item> {
1405        use DecomposeState::*;
1406        loop {
1407            match self.state {
1408                Scheme => {
1409                    self.state = Authority;
1410                    let mut bytes = self.uri.as_bytes();
1411                    if parse_scheme(&mut bytes).is_ok() && bytes.starts_with(b":") {
1412                        let len = self.uri.len() - bytes.len();
1413                        let (scheme, rem) = self.uri.split_at(len);
1414                        self.uri = &rem[1..];
1415                        break Some(Component::Scheme(scheme));
1416                    }
1417                }
1418                Authority => {
1419                    self.state = Root;
1420                    if let Some(rem) = self.uri.strip_prefix("//") {
1421                        let pos = rem.bytes().position(|b| b == b'/').unwrap_or(rem.len());
1422                        let (mut authority, rem) = rem.split_at(pos);
1423                        self.uri = rem;
1424                        let mut userinfo = None;
1425                        if let Some((ui, rem)) = authority.split_once('@') {
1426                            userinfo = Some(ui);
1427                            authority = rem;
1428                        }
1429                        let mut port = None;
1430                        if let Some((host, p)) = authority.rsplit_once(':')
1431                            && p.bytes().all(|b| b.is_ascii_digit())
1432                        {
1433                            port = Some(p);
1434                            authority = host;
1435                        }
1436                        break Some(Component::Authority {
1437                            userinfo,
1438                            host: authority,
1439                            port,
1440                        });
1441                    }
1442                }
1443                Root => {
1444                    self.state = Path;
1445                    if let Some(rem) = self.uri.strip_prefix('/') {
1446                        self.uri = rem;
1447                        break Some(Component::RootSegment);
1448                    }
1449                }
1450                Path => {
1451                    let pos = self
1452                        .uri
1453                        .bytes()
1454                        .position(|b| b == b'/' || b == b'?' || b == b'#')
1455                        .unwrap_or(self.uri.len());
1456                    let (segment, rem) = self.uri.split_at(pos);
1457                    if let Some(rem) = rem.strip_prefix('/') {
1458                        self.uri = rem;
1459                    } else {
1460                        self.uri = rem;
1461                        self.state = Query;
1462                    }
1463                    break Some(Component::Segment(segment));
1464                }
1465                Query => {
1466                    self.state = Fragment;
1467                    if let Some(rem) = self.uri.strip_prefix('?') {
1468                        let pos = rem.bytes().position(|b| b == b'#').unwrap_or(rem.len());
1469                        let (query, rem) = rem.split_at(pos);
1470                        self.uri = rem;
1471                        break Some(Component::Query(query));
1472                    }
1473                }
1474                Fragment => {
1475                    debug_assert!(self.uri.is_empty() || self.uri.starts_with('#'));
1476                    self.state = Finish;
1477                    if !self.uri.is_empty() {
1478                        let (_, frag) = self.uri.split_at(1);
1479                        self.uri = "";
1480                        break Some(Component::Fragment(frag));
1481                    }
1482                }
1483                Finish => break None,
1484            }
1485        }
1486    }
1487}
1488
1489pub enum Component<'a> {
1490    Scheme(&'a str),
1491    Authority {
1492        userinfo: Option<&'a str>,
1493        host: &'a str,
1494        port: Option<&'a str>,
1495    },
1496    RootSegment,
1497    Segment(&'a str),
1498    Query(&'a str),
1499    Fragment(&'a str),
1500}