fluent_uri/pct_enc/
mod.rs

1//! Percent-encoding utilities.
2
3pub mod encoder;
4#[cfg(feature = "alloc")]
5mod estring;
6pub(crate) mod table;
7
8#[cfg(feature = "alloc")]
9pub use estring::EString;
10pub use table::Table;
11
12use crate::imp::PathEncoder;
13use core::{cmp::Ordering, hash, iter::FusedIterator, marker::PhantomData, str};
14use ref_cast::{ref_cast_custom, RefCastCustom};
15
16#[cfg(feature = "alloc")]
17use alloc::{
18    borrow::{Cow, ToOwned},
19    string::String,
20    vec::Vec,
21};
22
23/// A trait used by [`EStr`] and [`EString`] to specify the table used for encoding.
24///
25/// # Sub-encoders
26///
27/// A sub-encoder `SubE` of `E` is an encoder such that `SubE::TABLE` is a [subset] of `E::TABLE`.
28///
29/// [subset]: Table::is_subset
30pub trait Encoder: 'static {
31    /// The table used for encoding.
32    const TABLE: &'static Table;
33}
34
35/// Percent-encoded string slices.
36///
37/// The owned counterpart of `EStr` is [`EString`]. See its documentation
38/// if you want to build a percent-encoded string from scratch.
39///
40/// # Type parameter
41///
42/// The `EStr<E>` type is parameterized over a type `E` that implements [`Encoder`].
43/// The associated constant `E::TABLE` of type [`Table`] specifies the byte patterns
44/// allowed in a string. In short, the underlying byte sequence of an `EStr<E>` slice
45/// can be formed by joining any number of the following byte sequences:
46///
47/// - `ch.encode_utf8(&mut [0; 4])` where `E::TABLE.allows(ch)`.
48/// - `[b'%', hi, lo]` where `E::TABLE.allows_pct_encoded() && hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit()`.
49///
50/// # Comparison
51///
52/// `EStr` slices are compared [lexicographically](Ord#lexicographical-comparison)
53/// by their byte values. Normalization is **not** performed prior to comparison.
54///
55/// # Examples
56///
57/// Parse key-value pairs from a query string into a hash map:
58///
59/// ```
60/// use fluent_uri::{pct_enc::EStr, UriRef};
61/// use std::collections::HashMap;
62///
63/// let s = "?name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9%21";
64/// let query = UriRef::parse(s)?.query().unwrap();
65/// let map: HashMap<_, _> = query
66///     .split('&')
67///     .map(|s| s.split_once('=').unwrap_or((s, EStr::EMPTY)))
68///     .map(|(k, v)| (k.decode().to_string_lossy(), v.decode().to_string_lossy()))
69///     .collect();
70/// assert_eq!(map["name"], "张三");
71/// assert_eq!(map["speech"], "¡Olé!");
72/// # Ok::<_, fluent_uri::ParseError>(())
73/// ```
74#[derive(RefCastCustom)]
75#[repr(transparent)]
76pub struct EStr<E: Encoder> {
77    encoder: PhantomData<E>,
78    inner: str,
79}
80
81#[cfg(feature = "alloc")]
82struct Assert<L: Encoder, R: Encoder> {
83    _marker: PhantomData<(L, R)>,
84}
85
86#[cfg(feature = "alloc")]
87impl<L: Encoder, R: Encoder> Assert<L, R> {
88    const L_IS_SUB_ENCODER_OF_R: () = assert!(L::TABLE.is_subset(R::TABLE), "not a sub-encoder");
89}
90
91impl<E: Encoder> EStr<E> {
92    const ASSERT_ALLOWS_PCT_ENCODED: () = assert!(
93        E::TABLE.allows_pct_encoded(),
94        "table does not allow percent-encoded octets"
95    );
96
97    /// Converts a string slice to an `EStr` slice assuming validity.
98    #[ref_cast_custom]
99    pub(crate) const fn new_validated(s: &str) -> &Self;
100
101    /// An empty `EStr` slice.
102    pub const EMPTY: &'static Self = Self::new_validated("");
103
104    pub(crate) fn cast<F: Encoder>(&self) -> &EStr<F> {
105        EStr::new_validated(&self.inner)
106    }
107
108    /// Converts a string slice to an `EStr` slice.
109    ///
110    /// # Panics
111    ///
112    /// Panics if the string is not properly encoded with `E`.
113    /// For a non-panicking variant, use [`new`](Self::new).
114    #[must_use]
115    pub const fn new_or_panic(s: &str) -> &Self {
116        match Self::new(s) {
117            Some(s) => s,
118            None => panic!("improperly encoded string"),
119        }
120    }
121
122    /// Converts a string slice to an `EStr` slice, returning `None` if the conversion fails.
123    #[must_use]
124    pub const fn new(s: &str) -> Option<&Self> {
125        if E::TABLE.validate(s.as_bytes()) {
126            Some(Self::new_validated(s))
127        } else {
128            None
129        }
130    }
131
132    /// Creates an `EStr` slice containing a single percent-encoded octet representing the given byte.
133    ///
134    /// # Panics
135    ///
136    /// Panics at compile time if `E::TABLE` does not [allow percent-encoded octets].
137    ///
138    /// [allow percent-encoded octets]: Table::allows_pct_encoded
139    ///
140    /// # Examples
141    ///
142    /// ```
143    /// use fluent_uri::pct_enc::{encoder::Path, EStr};
144    ///
145    /// assert_eq!(EStr::<Path>::encode_byte(b'1'), "%31");
146    /// ```
147    #[must_use]
148    pub fn encode_byte(x: u8) -> &'static Self {
149        () = Self::ASSERT_ALLOWS_PCT_ENCODED;
150        Self::new_validated(encode_byte(x))
151    }
152
153    /// Yields the underlying string slice.
154    #[must_use]
155    pub fn as_str(&self) -> &str {
156        &self.inner
157    }
158
159    /// Returns the length of the `EStr` slice in bytes.
160    #[must_use]
161    pub fn len(&self) -> usize {
162        self.inner.len()
163    }
164
165    /// Checks whether the `EStr` slice is empty.
166    #[must_use]
167    pub fn is_empty(&self) -> bool {
168        self.inner.is_empty()
169    }
170
171    /// Upcasts the `EStr` slice to associate it with the given super-encoder.
172    ///
173    /// # Panics
174    ///
175    /// Panics at compile time if `E` is not a [sub-encoder](Encoder#sub-encoders) of `SuperE`.
176    ///
177    /// # Example
178    ///
179    /// ```
180    /// use fluent_uri::pct_enc::{encoder::{IPath, Path}, EStr};
181    ///
182    /// let path = EStr::<Path>::new_or_panic("foo");
183    /// let path: &EStr<IPath> = path.upcast();
184    /// ```
185    #[cfg(fluent_uri_unstable)]
186    #[must_use]
187    pub fn upcast<SuperE: Encoder>(&self) -> &EStr<SuperE> {
188        () = Assert::<E, SuperE>::L_IS_SUB_ENCODER_OF_R;
189        EStr::new_validated(self.as_str())
190    }
191
192    /// Checks whether the `EStr` slice is unencoded, i.e., does not contain `'%'`.
193    ///
194    /// # Examples
195    ///
196    /// ```
197    /// use fluent_uri::pct_enc::{encoder::Path, EStr};
198    ///
199    /// assert!(EStr::<Path>::new_or_panic("Hello!").is_unencoded());
200    /// assert!(!EStr::<Path>::new_or_panic("%C2%A1Hola%21").is_unencoded());
201    /// ```
202    #[cfg(fluent_uri_unstable)]
203    #[must_use]
204    pub fn is_unencoded(&self) -> bool {
205        !(E::TABLE.allows_pct_encoded() && self.inner.contains('%'))
206    }
207
208    /// Returns an iterator used to decode the `EStr` slice.
209    ///
210    /// Always **split before decoding**, as otherwise the data may be
211    /// mistaken for component delimiters.
212    ///
213    /// Note that the iterator will **not** decode `U+002B` (+) as `0x20` (space).
214    ///
215    /// # Panics
216    ///
217    /// Panics at compile time if `E::TABLE` does not [allow percent-encoded octets].
218    ///
219    /// [allow percent-encoded octets]: Table::allows_pct_encoded
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// use fluent_uri::pct_enc::{encoder::Path, EStr};
225    ///
226    /// let dec = EStr::<Path>::new_or_panic("%C2%A1Hola%21").decode();
227    /// assert_eq!(*dec.clone().to_bytes(), [0xc2, 0xa1, 0x48, 0x6f, 0x6c, 0x61, 0x21]);
228    /// assert_eq!(dec.to_string().unwrap(), "¡Hola!");
229    /// ```
230    pub fn decode(&self) -> Decode<'_> {
231        () = Self::ASSERT_ALLOWS_PCT_ENCODED;
232        Decode::new(&self.inner)
233    }
234
235    /// Returns an iterator over subslices of the `EStr` slice separated by the given delimiter.
236    ///
237    /// # Panics
238    ///
239    /// Panics if the delimiter is not a [reserved] character.
240    ///
241    /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
242    ///
243    /// # Examples
244    ///
245    /// ```
246    /// use fluent_uri::pct_enc::{encoder::Path, EStr};
247    ///
248    /// assert!(EStr::<Path>::new_or_panic("a,b,c").split(',').eq(["a", "b", "c"]));
249    /// assert!(EStr::<Path>::new_or_panic(",").split(',').eq(["", ""]));
250    /// assert!(EStr::<Path>::EMPTY.split(',').eq([""]));
251    /// ```
252    pub fn split(&self, delim: char) -> Split<'_, E> {
253        assert!(
254            delim.is_ascii() && table::RESERVED.allows(delim),
255            "splitting with non-reserved character"
256        );
257        Split {
258            inner: self.inner.split(delim),
259            encoder: PhantomData,
260        }
261    }
262
263    /// Splits the `EStr` slice on the first occurrence of the given delimiter and
264    /// returns prefix before delimiter and suffix after delimiter.
265    ///
266    /// Returns `None` if the delimiter is not found.
267    ///
268    /// # Panics
269    ///
270    /// Panics if the delimiter is not a [reserved] character.
271    ///
272    /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
273    ///
274    /// # Examples
275    ///
276    /// ```
277    /// use fluent_uri::pct_enc::{encoder::Path, EStr};
278    ///
279    /// assert_eq!(
280    ///     EStr::<Path>::new_or_panic("foo;bar;baz").split_once(';'),
281    ///     Some((EStr::new_or_panic("foo"), EStr::new_or_panic("bar;baz")))
282    /// );
283    ///
284    /// assert_eq!(EStr::<Path>::new_or_panic("foo").split_once(';'), None);
285    /// ```
286    #[must_use]
287    pub fn split_once(&self, delim: char) -> Option<(&Self, &Self)> {
288        assert!(
289            delim.is_ascii() && table::RESERVED.allows(delim),
290            "splitting with non-reserved character"
291        );
292        self.inner
293            .split_once(delim)
294            .map(|(a, b)| (Self::new_validated(a), Self::new_validated(b)))
295    }
296
297    /// Splits the `EStr` slice on the last occurrence of the given delimiter and
298    /// returns prefix before delimiter and suffix after delimiter.
299    ///
300    /// Returns `None` if the delimiter is not found.
301    ///
302    /// # Panics
303    ///
304    /// Panics if the delimiter is not a [reserved] character.
305    ///
306    /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
307    ///
308    /// # Examples
309    ///
310    /// ```
311    /// use fluent_uri::pct_enc::{encoder::Path, EStr};
312    ///
313    /// assert_eq!(
314    ///     EStr::<Path>::new_or_panic("foo;bar;baz").rsplit_once(';'),
315    ///     Some((EStr::new_or_panic("foo;bar"), EStr::new_or_panic("baz")))
316    /// );
317    ///
318    /// assert_eq!(EStr::<Path>::new_or_panic("foo").rsplit_once(';'), None);
319    /// ```
320    #[must_use]
321    pub fn rsplit_once(&self, delim: char) -> Option<(&Self, &Self)> {
322        assert!(
323            delim.is_ascii() && table::RESERVED.allows(delim),
324            "splitting with non-reserved character"
325        );
326        self.inner
327            .rsplit_once(delim)
328            .map(|(a, b)| (Self::new_validated(a), Self::new_validated(b)))
329    }
330}
331
332impl<E: Encoder> AsRef<Self> for EStr<E> {
333    fn as_ref(&self) -> &Self {
334        self
335    }
336}
337
338impl<E: Encoder> AsRef<str> for EStr<E> {
339    fn as_ref(&self) -> &str {
340        &self.inner
341    }
342}
343
344impl<E: Encoder> PartialEq for EStr<E> {
345    fn eq(&self, other: &Self) -> bool {
346        self.inner == other.inner
347    }
348}
349
350impl<E: Encoder> PartialEq<str> for EStr<E> {
351    fn eq(&self, other: &str) -> bool {
352        &self.inner == other
353    }
354}
355
356impl<E: Encoder> PartialEq<EStr<E>> for str {
357    fn eq(&self, other: &EStr<E>) -> bool {
358        self == &other.inner
359    }
360}
361
362impl<E: Encoder> Eq for EStr<E> {}
363
364impl<E: Encoder> hash::Hash for EStr<E> {
365    fn hash<H: hash::Hasher>(&self, state: &mut H) {
366        self.inner.hash(state);
367    }
368}
369
370impl<E: Encoder> PartialOrd for EStr<E> {
371    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
372        Some(self.cmp(other))
373    }
374}
375
376impl<E: Encoder> Ord for EStr<E> {
377    fn cmp(&self, other: &Self) -> Ordering {
378        self.inner.cmp(&other.inner)
379    }
380}
381
382impl<E: Encoder> Default for &EStr<E> {
383    /// Creates an empty `EStr` slice.
384    fn default() -> Self {
385        EStr::EMPTY
386    }
387}
388
389#[cfg(feature = "alloc")]
390impl<E: Encoder> ToOwned for EStr<E> {
391    type Owned = EString<E>;
392
393    fn to_owned(&self) -> EString<E> {
394        EString::new_validated(self.inner.to_owned())
395    }
396
397    fn clone_into(&self, target: &mut EString<E>) {
398        self.inner.clone_into(&mut target.buf);
399    }
400}
401
402/// Extension methods for the [path] component.
403///
404/// [path]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
405impl<E: PathEncoder> EStr<E> {
406    /// Checks whether the path is absolute, i.e., starting with `'/'`.
407    #[inline]
408    #[must_use]
409    pub fn is_absolute(&self) -> bool {
410        self.inner.starts_with('/')
411    }
412
413    /// Checks whether the path is rootless, i.e., not starting with `'/'`.
414    #[inline]
415    #[must_use]
416    pub fn is_rootless(&self) -> bool {
417        !self.inner.starts_with('/')
418    }
419
420    /// Returns an iterator over the path segments, separated by `'/'`.
421    ///
422    /// Returns `None` if the path is [rootless]. Use [`split`]
423    /// instead if you need to split a rootless path on occurrences of `'/'`.
424    ///
425    /// Note that the path can be [empty] when authority is present,
426    /// in which case this method will return `None`.
427    ///
428    /// [rootless]: Self::is_rootless
429    /// [`split`]: Self::split
430    /// [empty]: Self::is_empty
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// use fluent_uri::Uri;
436    ///
437    /// // Segments are separated by '/'.
438    /// // The empty string before a leading '/' is not a segment.
439    /// // However, segments can be empty in the other cases.
440    /// let path = Uri::parse("file:///path/to//dir/")?.path();
441    /// assert_eq!(path, "/path/to//dir/");
442    /// assert!(path.segments_if_absolute().unwrap().eq(["path", "to", "", "dir", ""]));
443    ///
444    /// let path = Uri::parse("foo:bar/baz")?.path();
445    /// assert_eq!(path, "bar/baz");
446    /// assert!(path.segments_if_absolute().is_none());
447    ///
448    /// let path = Uri::parse("http://example.com")?.path();
449    /// assert!(path.is_empty());
450    /// assert!(path.segments_if_absolute().is_none());
451    /// # Ok::<_, fluent_uri::ParseError>(())
452    /// ```
453    #[inline]
454    #[must_use]
455    pub fn segments_if_absolute(&self) -> Option<Split<'_, E>> {
456        self.inner
457            .strip_prefix('/')
458            .map(|s| Self::new_validated(s).split('/'))
459    }
460}
461
462const fn gen_octet_table(hi: bool) -> [u8; 256] {
463    let mut out = [0xff; 256];
464    let shift = if hi { 4 } else { 0 };
465
466    let mut i = 0;
467    while i < 10 {
468        out[(i + b'0') as usize] = i << shift;
469        i += 1;
470    }
471    while i < 16 {
472        out[(i - 10 + b'A') as usize] = i << shift;
473        out[(i - 10 + b'a') as usize] = i << shift;
474        i += 1;
475    }
476    out
477}
478
479const OCTET_TABLE_HI: &[u8; 256] = &gen_octet_table(true);
480pub(crate) const OCTET_TABLE_LO: &[u8; 256] = &gen_octet_table(false);
481
482/// Decodes a percent-encoded octet, assuming that the bytes are hexadecimal.
483pub(crate) fn decode_octet(hi: u8, lo: u8) -> u8 {
484    debug_assert!(hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit());
485    OCTET_TABLE_HI[hi as usize] | OCTET_TABLE_LO[lo as usize]
486}
487
488/// An iterator used to decode an [`EStr`] slice.
489///
490/// This struct is created by [`EStr::decode`]. Normally you'll use the methods below
491/// instead of iterating over a `Decode` manually, unless you need precise control
492/// over allocation.
493///
494/// See the [`DecodedChunk`] type for documentation of the items yielded by this iterator.
495#[derive(Clone, Debug)]
496#[must_use = "iterators are lazy and do nothing unless consumed"]
497pub struct Decode<'a> {
498    source: &'a str,
499}
500
501/// An item returned by the [`Decode`] iterator.
502#[derive(Clone, Copy, Debug)]
503pub enum DecodedChunk<'a> {
504    /// An unencoded subslice.
505    Unencoded(&'a str),
506    /// A percent-encoded octet, decoded (for example, `"%20"` decoded as `0x20`).
507    PctDecoded(u8),
508}
509
510impl<'a> Decode<'a> {
511    pub(crate) fn new(source: &'a str) -> Self {
512        Self { source }
513    }
514
515    fn next_if_unencoded(&mut self) -> Option<&'a str> {
516        let i = self
517            .source
518            .bytes()
519            .position(|x| x == b'%')
520            .unwrap_or(self.source.len());
521
522        if i == 0 {
523            None
524        } else {
525            let s;
526            (s, self.source) = self.source.split_at(i);
527            Some(s)
528        }
529    }
530}
531
532impl<'a> Iterator for Decode<'a> {
533    type Item = DecodedChunk<'a>;
534
535    fn next(&mut self) -> Option<Self::Item> {
536        if self.source.is_empty() {
537            None
538        } else if let Some(s) = self.next_if_unencoded() {
539            Some(DecodedChunk::Unencoded(s))
540        } else {
541            let s;
542            (s, self.source) = self.source.split_at(3);
543            let x = decode_octet(s.as_bytes()[1], s.as_bytes()[2]);
544            Some(DecodedChunk::PctDecoded(x))
545        }
546    }
547}
548
549impl FusedIterator for Decode<'_> {}
550
551#[cfg(feature = "alloc")]
552pub(crate) enum DecodedUtf8Chunk<'a, 'b> {
553    Unencoded(&'a str),
554    Decoded { valid: &'b str, invalid: &'b [u8] },
555}
556
557#[cfg(feature = "alloc")]
558impl<'a> Decode<'a> {
559    pub(crate) fn decode_utf8(self, mut handle_chunk: impl FnMut(DecodedUtf8Chunk<'a, '_>)) {
560        use crate::utf8::Utf8Chunks;
561
562        let mut buf = [0; 32];
563        let mut len = 0;
564
565        'decode: for chunk in self {
566            match chunk {
567                DecodedChunk::Unencoded(s) => {
568                    if len > 0 {
569                        for chunk in Utf8Chunks::new(&buf[..len]) {
570                            handle_chunk(DecodedUtf8Chunk::Decoded {
571                                valid: chunk.valid(),
572                                invalid: chunk.invalid(),
573                            });
574                        }
575                        len = 0;
576                    }
577                    handle_chunk(DecodedUtf8Chunk::Unencoded(s));
578                }
579                DecodedChunk::PctDecoded(x) => {
580                    buf[len] = x;
581                    len += 1;
582
583                    if len >= buf.len() {
584                        for chunk in Utf8Chunks::new(&buf[..len]) {
585                            if chunk.incomplete() {
586                                handle_chunk(DecodedUtf8Chunk::Decoded {
587                                    valid: chunk.valid(),
588                                    invalid: &[],
589                                });
590
591                                let invalid_len = chunk.invalid().len();
592                                buf.copy_within(len - invalid_len..len, 0);
593
594                                len = invalid_len;
595                                continue 'decode;
596                            }
597                            handle_chunk(DecodedUtf8Chunk::Decoded {
598                                valid: chunk.valid(),
599                                invalid: chunk.invalid(),
600                            });
601                        }
602                        len = 0;
603                    }
604                }
605            }
606        }
607
608        for chunk in Utf8Chunks::new(&buf[..len]) {
609            handle_chunk(DecodedUtf8Chunk::Decoded {
610                valid: chunk.valid(),
611                invalid: chunk.invalid(),
612            });
613        }
614    }
615
616    fn decoded_len(&self) -> usize {
617        self.source.len() - self.source.bytes().filter(|&x| x == b'%').count() * 2
618    }
619
620    fn borrow_all_or_prep_buf(&mut self) -> Result<&'a str, String> {
621        if let Some(s) = self.next_if_unencoded() {
622            if self.source.is_empty() {
623                return Ok(s);
624            }
625            let mut buf = String::with_capacity(s.len() + self.decoded_len());
626            buf.push_str(s);
627            Err(buf)
628        } else {
629            Err(String::with_capacity(self.decoded_len()))
630        }
631    }
632
633    /// Decodes the slice to bytes.
634    ///
635    /// This method allocates only when the slice contains any percent-encoded octet.
636    #[must_use]
637    pub fn to_bytes(mut self) -> Cow<'a, [u8]> {
638        if self.source.is_empty() {
639            return Cow::Borrowed(&[]);
640        }
641
642        let mut buf = match self.borrow_all_or_prep_buf() {
643            Ok(s) => return Cow::Borrowed(s.as_bytes()),
644            Err(buf) => buf.into_bytes(),
645        };
646
647        for chunk in self {
648            match chunk {
649                DecodedChunk::Unencoded(s) => buf.extend_from_slice(s.as_bytes()),
650                DecodedChunk::PctDecoded(s) => buf.push(s),
651            }
652        }
653        Cow::Owned(buf)
654    }
655
656    /// Attempts to decode the slice to a string.
657    ///
658    /// This method allocates only when the slice contains any percent-encoded octet.
659    ///
660    /// # Errors
661    ///
662    /// Returns `Err` containing the decoded bytes if they are not valid UTF-8.
663    pub fn to_string(mut self) -> Result<Cow<'a, str>, Vec<u8>> {
664        if self.source.is_empty() {
665            return Ok(Cow::Borrowed(""));
666        }
667
668        let mut buf = match self.borrow_all_or_prep_buf() {
669            Ok(s) => return Ok(Cow::Borrowed(s)),
670            Err(buf) => Ok::<_, Vec<u8>>(buf),
671        };
672
673        self.decode_utf8(|chunk| match chunk {
674            DecodedUtf8Chunk::Unencoded(s) => match &mut buf {
675                Ok(string) => string.push_str(s),
676                Err(vec) => vec.extend_from_slice(s.as_bytes()),
677            },
678            DecodedUtf8Chunk::Decoded { valid, invalid } => match &mut buf {
679                Ok(string) => {
680                    string.push_str(valid);
681                    if !invalid.is_empty() {
682                        let mut vec = core::mem::take(string).into_bytes();
683                        vec.extend_from_slice(invalid);
684                        buf = Err(vec);
685                    }
686                }
687                Err(vec) => {
688                    vec.extend_from_slice(valid.as_bytes());
689                    vec.extend_from_slice(invalid);
690                }
691            },
692        });
693
694        match buf {
695            Ok(buf) => Ok(Cow::Owned(buf)),
696            Err(buf) => Err(buf),
697        }
698    }
699
700    /// Decodes the slice to a string, replacing any invalid UTF-8 sequences with
701    /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD].
702    ///
703    /// [U+FFFD]: char::REPLACEMENT_CHARACTER
704    ///
705    /// This method allocates only when the slice contains any percent-encoded octet.
706    #[must_use]
707    pub fn to_string_lossy(mut self) -> Cow<'a, str> {
708        if self.source.is_empty() {
709            return Cow::Borrowed("");
710        }
711
712        let mut buf = match self.borrow_all_or_prep_buf() {
713            Ok(s) => return Cow::Borrowed(s),
714            Err(buf) => buf,
715        };
716
717        self.decode_utf8(|chunk| match chunk {
718            DecodedUtf8Chunk::Unencoded(s) => buf.push_str(s),
719            DecodedUtf8Chunk::Decoded { valid, invalid } => {
720                buf.push_str(valid);
721                if !invalid.is_empty() {
722                    buf.push(char::REPLACEMENT_CHARACTER);
723                }
724            }
725        });
726        Cow::Owned(buf)
727    }
728}
729
730pub(crate) fn encode_byte(x: u8) -> &'static str {
731    const TABLE: &[u8; 256 * 3] = &{
732        const HEX_DIGITS: &[u8; 16] = b"0123456789ABCDEF";
733
734        let mut i = 0;
735        let mut table = [0; 256 * 3];
736        while i < 256 {
737            table[i * 3] = b'%';
738            table[i * 3 + 1] = HEX_DIGITS[i >> 4];
739            table[i * 3 + 2] = HEX_DIGITS[i & 0b1111];
740            i += 1;
741        }
742        table
743    };
744
745    const TABLE_STR: &str = match str::from_utf8(TABLE) {
746        Ok(s) => s,
747        Err(_) => unreachable!(),
748    };
749
750    &TABLE_STR[x as usize * 3..x as usize * 3 + 3]
751}
752
753/// An iterator used to percent-encode a string slice.
754///
755/// This struct is created by [`Table::encode`]. Normally you'll use [`EString::encode_str`]
756/// instead, unless you need precise control over allocation.
757///
758/// See the [`EncodedChunk`] type for documentation of the items yielded by this iterator.
759#[cfg(feature = "alloc")]
760#[derive(Clone, Debug)]
761#[must_use = "iterators are lazy and do nothing unless consumed"]
762pub(crate) struct Encode<'t, 's> {
763    table: &'t Table,
764    source: &'s str,
765    enc_len: usize,
766    enc_i: usize,
767}
768
769#[cfg(feature = "alloc")]
770impl<'t, 's> Encode<'t, 's> {
771    pub(crate) fn new(table: &'t Table, source: &'s str) -> Self {
772        Self {
773            table,
774            source,
775            enc_len: 0,
776            enc_i: 0,
777        }
778    }
779}
780
781/// An item returned by the [`Encode`] iterator.
782#[cfg(feature = "alloc")]
783#[derive(Clone, Copy, Debug, PartialEq, Eq)]
784pub(crate) enum EncodedChunk<'a> {
785    /// An unencoded subslice.
786    Unencoded(&'a str),
787    /// A byte, percent-encoded (for example, `0x20` encoded as `"%20"`).
788    PctEncoded(&'static str),
789}
790
791#[cfg(feature = "alloc")]
792impl<'a> EncodedChunk<'a> {
793    /// Returns the chunk as a string slice.
794    #[must_use]
795    pub fn as_str(self) -> &'a str {
796        match self {
797            Self::Unencoded(s) | Self::PctEncoded(s) => s,
798        }
799    }
800}
801
802#[cfg(feature = "alloc")]
803impl<'t, 's> Iterator for Encode<'t, 's> {
804    type Item = EncodedChunk<'s>;
805
806    fn next(&mut self) -> Option<Self::Item> {
807        if self.enc_i < self.enc_len {
808            let s = encode_byte(self.source.as_bytes()[self.enc_i]);
809            self.enc_i += 1;
810            return Some(EncodedChunk::PctEncoded(s));
811        }
812
813        self.source = &self.source[self.enc_len..];
814        self.enc_len = 0;
815
816        if self.source.is_empty() {
817            return None;
818        }
819
820        let mut iter = self.source.char_indices();
821        let i = iter
822            .find_map(|(i, ch)| (!self.table.allows(ch)).then_some(i))
823            .unwrap_or(self.source.len());
824
825        // `CharIndices::offset` sadly requires an MSRV of 1.82,
826        // so we do pointer math to get the offset for now.
827        if i == 0 {
828            self.enc_len = iter.as_str().as_ptr() as usize - self.source.as_ptr() as usize;
829            self.enc_i = 1;
830
831            let s = encode_byte(self.source.as_bytes()[0]);
832            Some(EncodedChunk::PctEncoded(s))
833        } else {
834            let s;
835            (s, self.source) = self.source.split_at(i);
836
837            self.enc_len = iter.as_str().as_ptr() as usize - self.source.as_ptr() as usize;
838            self.enc_i = 0;
839
840            Some(EncodedChunk::Unencoded(s))
841        }
842    }
843}
844
845#[cfg(feature = "alloc")]
846impl FusedIterator for Encode<'_, '_> {}
847
848/// An iterator over subslices of an [`EStr`] slice separated by a delimiter.
849///
850/// This struct is created by [`EStr::split`].
851#[derive(Clone, Debug)]
852#[must_use = "iterators are lazy and do nothing unless consumed"]
853pub struct Split<'a, E: Encoder> {
854    inner: str::Split<'a, char>,
855    encoder: PhantomData<E>,
856}
857
858impl<'a, E: Encoder> Iterator for Split<'a, E> {
859    type Item = &'a EStr<E>;
860
861    fn next(&mut self) -> Option<&'a EStr<E>> {
862        self.inner.next().map(EStr::new_validated)
863    }
864}
865
866impl<'a, E: Encoder> DoubleEndedIterator for Split<'a, E> {
867    fn next_back(&mut self) -> Option<&'a EStr<E>> {
868        self.inner.next_back().map(EStr::new_validated)
869    }
870}
871
872impl<E: Encoder> FusedIterator for Split<'_, E> {}
fluent_uri/pct_enc/mod.rs

fluent_uri/pct_enc/
mod.rs