fluent_uri/pct_enc/mod.rs
1//! Percent-encoding utilities.
2
3pub mod encoder;
4#[cfg(feature = "alloc")]
5mod estring;
6pub(crate) mod table;
7
8#[cfg(feature = "alloc")]
9pub use estring::EString;
10pub use table::Table;
11
12use crate::imp::PathEncoder;
13use core::{cmp::Ordering, hash, iter::FusedIterator, marker::PhantomData, str};
14use ref_cast::{ref_cast_custom, RefCastCustom};
15
16#[cfg(feature = "alloc")]
17use alloc::{
18 borrow::{Cow, ToOwned},
19 string::String,
20 vec::Vec,
21};
22
23/// A trait used by [`EStr`] and [`EString`] to specify the table used for encoding.
24///
25/// # Sub-encoders
26///
27/// A sub-encoder `SubE` of `E` is an encoder such that `SubE::TABLE` is a [subset] of `E::TABLE`.
28///
29/// [subset]: Table::is_subset
30pub trait Encoder: 'static {
31 /// The table used for encoding.
32 const TABLE: &'static Table;
33}
34
35/// Percent-encoded string slices.
36///
37/// The owned counterpart of `EStr` is [`EString`]. See its documentation
38/// if you want to build a percent-encoded string from scratch.
39///
40/// # Type parameter
41///
42/// The `EStr<E>` type is parameterized over a type `E` that implements [`Encoder`].
43/// The associated constant `E::TABLE` of type [`Table`] specifies the byte patterns
44/// allowed in a string. In short, the underlying byte sequence of an `EStr<E>` slice
45/// can be formed by joining any number of the following byte sequences:
46///
47/// - `ch.encode_utf8(&mut [0; 4])` where `E::TABLE.allows(ch)`.
48/// - `[b'%', hi, lo]` where `E::TABLE.allows_pct_encoded() && hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit()`.
49///
50/// # Comparison
51///
52/// `EStr` slices are compared [lexicographically](Ord#lexicographical-comparison)
53/// by their byte values. Normalization is **not** performed prior to comparison.
54///
55/// # Examples
56///
57/// Parse key-value pairs from a query string into a hash map:
58///
59/// ```
60/// use fluent_uri::{pct_enc::EStr, UriRef};
61/// use std::collections::HashMap;
62///
63/// let s = "?name=%E5%BC%A0%E4%B8%89&speech=%C2%A1Ol%C3%A9%21";
64/// let query = UriRef::parse(s)?.query().unwrap();
65/// let map: HashMap<_, _> = query
66/// .split('&')
67/// .map(|s| s.split_once('=').unwrap_or((s, EStr::EMPTY)))
68/// .map(|(k, v)| (k.decode().to_string_lossy(), v.decode().to_string_lossy()))
69/// .collect();
70/// assert_eq!(map["name"], "张三");
71/// assert_eq!(map["speech"], "¡Olé!");
72/// # Ok::<_, fluent_uri::ParseError>(())
73/// ```
74#[derive(RefCastCustom)]
75#[repr(transparent)]
76pub struct EStr<E: Encoder> {
77 encoder: PhantomData<E>,
78 inner: str,
79}
80
81#[cfg(feature = "alloc")]
82struct Assert<L: Encoder, R: Encoder> {
83 _marker: PhantomData<(L, R)>,
84}
85
86#[cfg(feature = "alloc")]
87impl<L: Encoder, R: Encoder> Assert<L, R> {
88 const L_IS_SUB_ENCODER_OF_R: () = assert!(L::TABLE.is_subset(R::TABLE), "not a sub-encoder");
89}
90
91impl<E: Encoder> EStr<E> {
92 const ASSERT_ALLOWS_PCT_ENCODED: () = assert!(
93 E::TABLE.allows_pct_encoded(),
94 "table does not allow percent-encoded octets"
95 );
96
97 /// Converts a string slice to an `EStr` slice assuming validity.
98 #[ref_cast_custom]
99 pub(crate) const fn new_validated(s: &str) -> &Self;
100
101 /// An empty `EStr` slice.
102 pub const EMPTY: &'static Self = Self::new_validated("");
103
104 pub(crate) fn cast<F: Encoder>(&self) -> &EStr<F> {
105 EStr::new_validated(&self.inner)
106 }
107
108 /// Converts a string slice to an `EStr` slice.
109 ///
110 /// # Panics
111 ///
112 /// Panics if the string is not properly encoded with `E`.
113 /// For a non-panicking variant, use [`new`](Self::new).
114 #[must_use]
115 pub const fn new_or_panic(s: &str) -> &Self {
116 match Self::new(s) {
117 Some(s) => s,
118 None => panic!("improperly encoded string"),
119 }
120 }
121
122 /// Converts a string slice to an `EStr` slice, returning `None` if the conversion fails.
123 #[must_use]
124 pub const fn new(s: &str) -> Option<&Self> {
125 if E::TABLE.validate(s.as_bytes()) {
126 Some(Self::new_validated(s))
127 } else {
128 None
129 }
130 }
131
132 /// Creates an `EStr` slice containing a single percent-encoded octet representing the given byte.
133 ///
134 /// # Panics
135 ///
136 /// Panics at compile time if `E::TABLE` does not [allow percent-encoded octets].
137 ///
138 /// [allow percent-encoded octets]: Table::allows_pct_encoded
139 ///
140 /// # Examples
141 ///
142 /// ```
143 /// use fluent_uri::pct_enc::{encoder::Path, EStr};
144 ///
145 /// assert_eq!(EStr::<Path>::encode_byte(b'1'), "%31");
146 /// ```
147 #[must_use]
148 pub fn encode_byte(x: u8) -> &'static Self {
149 () = Self::ASSERT_ALLOWS_PCT_ENCODED;
150 Self::new_validated(encode_byte(x))
151 }
152
153 /// Yields the underlying string slice.
154 #[must_use]
155 pub fn as_str(&self) -> &str {
156 &self.inner
157 }
158
159 /// Returns the length of the `EStr` slice in bytes.
160 #[must_use]
161 pub fn len(&self) -> usize {
162 self.inner.len()
163 }
164
165 /// Checks whether the `EStr` slice is empty.
166 #[must_use]
167 pub fn is_empty(&self) -> bool {
168 self.inner.is_empty()
169 }
170
171 /// Upcasts the `EStr` slice to associate it with the given super-encoder.
172 ///
173 /// # Panics
174 ///
175 /// Panics at compile time if `E` is not a [sub-encoder](Encoder#sub-encoders) of `SuperE`.
176 ///
177 /// # Example
178 ///
179 /// ```
180 /// use fluent_uri::pct_enc::{encoder::{IPath, Path}, EStr};
181 ///
182 /// let path = EStr::<Path>::new_or_panic("foo");
183 /// let path: &EStr<IPath> = path.upcast();
184 /// ```
185 #[cfg(fluent_uri_unstable)]
186 #[must_use]
187 pub fn upcast<SuperE: Encoder>(&self) -> &EStr<SuperE> {
188 () = Assert::<E, SuperE>::L_IS_SUB_ENCODER_OF_R;
189 EStr::new_validated(self.as_str())
190 }
191
192 /// Checks whether the `EStr` slice is unencoded, i.e., does not contain `'%'`.
193 ///
194 /// # Examples
195 ///
196 /// ```
197 /// use fluent_uri::pct_enc::{encoder::Path, EStr};
198 ///
199 /// assert!(EStr::<Path>::new_or_panic("Hello!").is_unencoded());
200 /// assert!(!EStr::<Path>::new_or_panic("%C2%A1Hola%21").is_unencoded());
201 /// ```
202 #[cfg(fluent_uri_unstable)]
203 #[must_use]
204 pub fn is_unencoded(&self) -> bool {
205 !(E::TABLE.allows_pct_encoded() && self.inner.contains('%'))
206 }
207
208 /// Returns an iterator used to decode the `EStr` slice.
209 ///
210 /// Always **split before decoding**, as otherwise the data may be
211 /// mistaken for component delimiters.
212 ///
213 /// Note that the iterator will **not** decode `U+002B` (+) as `0x20` (space).
214 ///
215 /// # Panics
216 ///
217 /// Panics at compile time if `E::TABLE` does not [allow percent-encoded octets].
218 ///
219 /// [allow percent-encoded octets]: Table::allows_pct_encoded
220 ///
221 /// # Examples
222 ///
223 /// ```
224 /// use fluent_uri::pct_enc::{encoder::Path, EStr};
225 ///
226 /// let dec = EStr::<Path>::new_or_panic("%C2%A1Hola%21").decode();
227 /// assert_eq!(*dec.clone().to_bytes(), [0xc2, 0xa1, 0x48, 0x6f, 0x6c, 0x61, 0x21]);
228 /// assert_eq!(dec.to_string().unwrap(), "¡Hola!");
229 /// ```
230 pub fn decode(&self) -> Decode<'_> {
231 () = Self::ASSERT_ALLOWS_PCT_ENCODED;
232 Decode::new(&self.inner)
233 }
234
235 /// Returns an iterator over subslices of the `EStr` slice separated by the given delimiter.
236 ///
237 /// # Panics
238 ///
239 /// Panics if the delimiter is not a [reserved] character.
240 ///
241 /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
242 ///
243 /// # Examples
244 ///
245 /// ```
246 /// use fluent_uri::pct_enc::{encoder::Path, EStr};
247 ///
248 /// assert!(EStr::<Path>::new_or_panic("a,b,c").split(',').eq(["a", "b", "c"]));
249 /// assert!(EStr::<Path>::new_or_panic(",").split(',').eq(["", ""]));
250 /// assert!(EStr::<Path>::EMPTY.split(',').eq([""]));
251 /// ```
252 pub fn split(&self, delim: char) -> Split<'_, E> {
253 assert!(
254 delim.is_ascii() && table::RESERVED.allows(delim),
255 "splitting with non-reserved character"
256 );
257 Split {
258 inner: self.inner.split(delim),
259 encoder: PhantomData,
260 }
261 }
262
263 /// Splits the `EStr` slice on the first occurrence of the given delimiter and
264 /// returns prefix before delimiter and suffix after delimiter.
265 ///
266 /// Returns `None` if the delimiter is not found.
267 ///
268 /// # Panics
269 ///
270 /// Panics if the delimiter is not a [reserved] character.
271 ///
272 /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
273 ///
274 /// # Examples
275 ///
276 /// ```
277 /// use fluent_uri::pct_enc::{encoder::Path, EStr};
278 ///
279 /// assert_eq!(
280 /// EStr::<Path>::new_or_panic("foo;bar;baz").split_once(';'),
281 /// Some((EStr::new_or_panic("foo"), EStr::new_or_panic("bar;baz")))
282 /// );
283 ///
284 /// assert_eq!(EStr::<Path>::new_or_panic("foo").split_once(';'), None);
285 /// ```
286 #[must_use]
287 pub fn split_once(&self, delim: char) -> Option<(&Self, &Self)> {
288 assert!(
289 delim.is_ascii() && table::RESERVED.allows(delim),
290 "splitting with non-reserved character"
291 );
292 self.inner
293 .split_once(delim)
294 .map(|(a, b)| (Self::new_validated(a), Self::new_validated(b)))
295 }
296
297 /// Splits the `EStr` slice on the last occurrence of the given delimiter and
298 /// returns prefix before delimiter and suffix after delimiter.
299 ///
300 /// Returns `None` if the delimiter is not found.
301 ///
302 /// # Panics
303 ///
304 /// Panics if the delimiter is not a [reserved] character.
305 ///
306 /// [reserved]: https://datatracker.ietf.org/doc/html/rfc3986#section-2.2
307 ///
308 /// # Examples
309 ///
310 /// ```
311 /// use fluent_uri::pct_enc::{encoder::Path, EStr};
312 ///
313 /// assert_eq!(
314 /// EStr::<Path>::new_or_panic("foo;bar;baz").rsplit_once(';'),
315 /// Some((EStr::new_or_panic("foo;bar"), EStr::new_or_panic("baz")))
316 /// );
317 ///
318 /// assert_eq!(EStr::<Path>::new_or_panic("foo").rsplit_once(';'), None);
319 /// ```
320 #[must_use]
321 pub fn rsplit_once(&self, delim: char) -> Option<(&Self, &Self)> {
322 assert!(
323 delim.is_ascii() && table::RESERVED.allows(delim),
324 "splitting with non-reserved character"
325 );
326 self.inner
327 .rsplit_once(delim)
328 .map(|(a, b)| (Self::new_validated(a), Self::new_validated(b)))
329 }
330}
331
332impl<E: Encoder> AsRef<Self> for EStr<E> {
333 fn as_ref(&self) -> &Self {
334 self
335 }
336}
337
338impl<E: Encoder> AsRef<str> for EStr<E> {
339 fn as_ref(&self) -> &str {
340 &self.inner
341 }
342}
343
344impl<E: Encoder> PartialEq for EStr<E> {
345 fn eq(&self, other: &Self) -> bool {
346 self.inner == other.inner
347 }
348}
349
350impl<E: Encoder> PartialEq<str> for EStr<E> {
351 fn eq(&self, other: &str) -> bool {
352 &self.inner == other
353 }
354}
355
356impl<E: Encoder> PartialEq<EStr<E>> for str {
357 fn eq(&self, other: &EStr<E>) -> bool {
358 self == &other.inner
359 }
360}
361
362impl<E: Encoder> Eq for EStr<E> {}
363
364impl<E: Encoder> hash::Hash for EStr<E> {
365 fn hash<H: hash::Hasher>(&self, state: &mut H) {
366 self.inner.hash(state);
367 }
368}
369
370impl<E: Encoder> PartialOrd for EStr<E> {
371 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
372 Some(self.cmp(other))
373 }
374}
375
376impl<E: Encoder> Ord for EStr<E> {
377 fn cmp(&self, other: &Self) -> Ordering {
378 self.inner.cmp(&other.inner)
379 }
380}
381
382impl<E: Encoder> Default for &EStr<E> {
383 /// Creates an empty `EStr` slice.
384 fn default() -> Self {
385 EStr::EMPTY
386 }
387}
388
389#[cfg(feature = "alloc")]
390impl<E: Encoder> ToOwned for EStr<E> {
391 type Owned = EString<E>;
392
393 fn to_owned(&self) -> EString<E> {
394 EString::new_validated(self.inner.to_owned())
395 }
396
397 fn clone_into(&self, target: &mut EString<E>) {
398 self.inner.clone_into(&mut target.buf);
399 }
400}
401
402/// Extension methods for the [path] component.
403///
404/// [path]: https://datatracker.ietf.org/doc/html/rfc3986#section-3.3
405impl<E: PathEncoder> EStr<E> {
406 /// Checks whether the path is absolute, i.e., starting with `'/'`.
407 #[inline]
408 #[must_use]
409 pub fn is_absolute(&self) -> bool {
410 self.inner.starts_with('/')
411 }
412
413 /// Checks whether the path is rootless, i.e., not starting with `'/'`.
414 #[inline]
415 #[must_use]
416 pub fn is_rootless(&self) -> bool {
417 !self.inner.starts_with('/')
418 }
419
420 /// Returns an iterator over the path segments, separated by `'/'`.
421 ///
422 /// Returns `None` if the path is [rootless]. Use [`split`]
423 /// instead if you need to split a rootless path on occurrences of `'/'`.
424 ///
425 /// Note that the path can be [empty] when authority is present,
426 /// in which case this method will return `None`.
427 ///
428 /// [rootless]: Self::is_rootless
429 /// [`split`]: Self::split
430 /// [empty]: Self::is_empty
431 ///
432 /// # Examples
433 ///
434 /// ```
435 /// use fluent_uri::Uri;
436 ///
437 /// // Segments are separated by '/'.
438 /// // The empty string before a leading '/' is not a segment.
439 /// // However, segments can be empty in the other cases.
440 /// let path = Uri::parse("file:///path/to//dir/")?.path();
441 /// assert_eq!(path, "/path/to//dir/");
442 /// assert!(path.segments_if_absolute().unwrap().eq(["path", "to", "", "dir", ""]));
443 ///
444 /// let path = Uri::parse("foo:bar/baz")?.path();
445 /// assert_eq!(path, "bar/baz");
446 /// assert!(path.segments_if_absolute().is_none());
447 ///
448 /// let path = Uri::parse("http://example.com")?.path();
449 /// assert!(path.is_empty());
450 /// assert!(path.segments_if_absolute().is_none());
451 /// # Ok::<_, fluent_uri::ParseError>(())
452 /// ```
453 #[inline]
454 #[must_use]
455 pub fn segments_if_absolute(&self) -> Option<Split<'_, E>> {
456 self.inner
457 .strip_prefix('/')
458 .map(|s| Self::new_validated(s).split('/'))
459 }
460}
461
462const fn gen_octet_table(hi: bool) -> [u8; 256] {
463 let mut out = [0xff; 256];
464 let shift = if hi { 4 } else { 0 };
465
466 let mut i = 0;
467 while i < 10 {
468 out[(i + b'0') as usize] = i << shift;
469 i += 1;
470 }
471 while i < 16 {
472 out[(i - 10 + b'A') as usize] = i << shift;
473 out[(i - 10 + b'a') as usize] = i << shift;
474 i += 1;
475 }
476 out
477}
478
479const OCTET_TABLE_HI: &[u8; 256] = &gen_octet_table(true);
480pub(crate) const OCTET_TABLE_LO: &[u8; 256] = &gen_octet_table(false);
481
482/// Decodes a percent-encoded octet, assuming that the bytes are hexadecimal.
483pub(crate) fn decode_octet(hi: u8, lo: u8) -> u8 {
484 debug_assert!(hi.is_ascii_hexdigit() && lo.is_ascii_hexdigit());
485 OCTET_TABLE_HI[hi as usize] | OCTET_TABLE_LO[lo as usize]
486}
487
488/// An iterator used to decode an [`EStr`] slice.
489///
490/// This struct is created by [`EStr::decode`]. Normally you'll use the methods below
491/// instead of iterating over a `Decode` manually, unless you need precise control
492/// over allocation.
493///
494/// See the [`DecodedChunk`] type for documentation of the items yielded by this iterator.
495#[derive(Clone, Debug)]
496#[must_use = "iterators are lazy and do nothing unless consumed"]
497pub struct Decode<'a> {
498 source: &'a str,
499}
500
501/// An item returned by the [`Decode`] iterator.
502#[derive(Clone, Copy, Debug)]
503pub enum DecodedChunk<'a> {
504 /// An unencoded subslice.
505 Unencoded(&'a str),
506 /// A percent-encoded octet, decoded (for example, `"%20"` decoded as `0x20`).
507 PctDecoded(u8),
508}
509
510impl<'a> Decode<'a> {
511 pub(crate) fn new(source: &'a str) -> Self {
512 Self { source }
513 }
514
515 fn next_if_unencoded(&mut self) -> Option<&'a str> {
516 let i = self
517 .source
518 .bytes()
519 .position(|x| x == b'%')
520 .unwrap_or(self.source.len());
521
522 if i == 0 {
523 None
524 } else {
525 let s;
526 (s, self.source) = self.source.split_at(i);
527 Some(s)
528 }
529 }
530}
531
532impl<'a> Iterator for Decode<'a> {
533 type Item = DecodedChunk<'a>;
534
535 fn next(&mut self) -> Option<Self::Item> {
536 if self.source.is_empty() {
537 None
538 } else if let Some(s) = self.next_if_unencoded() {
539 Some(DecodedChunk::Unencoded(s))
540 } else {
541 let s;
542 (s, self.source) = self.source.split_at(3);
543 let x = decode_octet(s.as_bytes()[1], s.as_bytes()[2]);
544 Some(DecodedChunk::PctDecoded(x))
545 }
546 }
547}
548
549impl FusedIterator for Decode<'_> {}
550
551#[cfg(feature = "alloc")]
552pub(crate) enum DecodedUtf8Chunk<'a, 'b> {
553 Unencoded(&'a str),
554 Decoded { valid: &'b str, invalid: &'b [u8] },
555}
556
557#[cfg(feature = "alloc")]
558impl<'a> Decode<'a> {
559 pub(crate) fn decode_utf8(self, mut handle_chunk: impl FnMut(DecodedUtf8Chunk<'a, '_>)) {
560 use crate::utf8::Utf8Chunks;
561
562 let mut buf = [0; 32];
563 let mut len = 0;
564
565 'decode: for chunk in self {
566 match chunk {
567 DecodedChunk::Unencoded(s) => {
568 if len > 0 {
569 for chunk in Utf8Chunks::new(&buf[..len]) {
570 handle_chunk(DecodedUtf8Chunk::Decoded {
571 valid: chunk.valid(),
572 invalid: chunk.invalid(),
573 });
574 }
575 len = 0;
576 }
577 handle_chunk(DecodedUtf8Chunk::Unencoded(s));
578 }
579 DecodedChunk::PctDecoded(x) => {
580 buf[len] = x;
581 len += 1;
582
583 if len >= buf.len() {
584 for chunk in Utf8Chunks::new(&buf[..len]) {
585 if chunk.incomplete() {
586 handle_chunk(DecodedUtf8Chunk::Decoded {
587 valid: chunk.valid(),
588 invalid: &[],
589 });
590
591 let invalid_len = chunk.invalid().len();
592 buf.copy_within(len - invalid_len..len, 0);
593
594 len = invalid_len;
595 continue 'decode;
596 }
597 handle_chunk(DecodedUtf8Chunk::Decoded {
598 valid: chunk.valid(),
599 invalid: chunk.invalid(),
600 });
601 }
602 len = 0;
603 }
604 }
605 }
606 }
607
608 for chunk in Utf8Chunks::new(&buf[..len]) {
609 handle_chunk(DecodedUtf8Chunk::Decoded {
610 valid: chunk.valid(),
611 invalid: chunk.invalid(),
612 });
613 }
614 }
615
616 fn decoded_len(&self) -> usize {
617 self.source.len() - self.source.bytes().filter(|&x| x == b'%').count() * 2
618 }
619
620 fn borrow_all_or_prep_buf(&mut self) -> Result<&'a str, String> {
621 if let Some(s) = self.next_if_unencoded() {
622 if self.source.is_empty() {
623 return Ok(s);
624 }
625 let mut buf = String::with_capacity(s.len() + self.decoded_len());
626 buf.push_str(s);
627 Err(buf)
628 } else {
629 Err(String::with_capacity(self.decoded_len()))
630 }
631 }
632
633 /// Decodes the slice to bytes.
634 ///
635 /// This method allocates only when the slice contains any percent-encoded octet.
636 #[must_use]
637 pub fn to_bytes(mut self) -> Cow<'a, [u8]> {
638 if self.source.is_empty() {
639 return Cow::Borrowed(&[]);
640 }
641
642 let mut buf = match self.borrow_all_or_prep_buf() {
643 Ok(s) => return Cow::Borrowed(s.as_bytes()),
644 Err(buf) => buf.into_bytes(),
645 };
646
647 for chunk in self {
648 match chunk {
649 DecodedChunk::Unencoded(s) => buf.extend_from_slice(s.as_bytes()),
650 DecodedChunk::PctDecoded(s) => buf.push(s),
651 }
652 }
653 Cow::Owned(buf)
654 }
655
656 /// Attempts to decode the slice to a string.
657 ///
658 /// This method allocates only when the slice contains any percent-encoded octet.
659 ///
660 /// # Errors
661 ///
662 /// Returns `Err` containing the decoded bytes if they are not valid UTF-8.
663 pub fn to_string(mut self) -> Result<Cow<'a, str>, Vec<u8>> {
664 if self.source.is_empty() {
665 return Ok(Cow::Borrowed(""));
666 }
667
668 let mut buf = match self.borrow_all_or_prep_buf() {
669 Ok(s) => return Ok(Cow::Borrowed(s)),
670 Err(buf) => Ok::<_, Vec<u8>>(buf),
671 };
672
673 self.decode_utf8(|chunk| match chunk {
674 DecodedUtf8Chunk::Unencoded(s) => match &mut buf {
675 Ok(string) => string.push_str(s),
676 Err(vec) => vec.extend_from_slice(s.as_bytes()),
677 },
678 DecodedUtf8Chunk::Decoded { valid, invalid } => match &mut buf {
679 Ok(string) => {
680 string.push_str(valid);
681 if !invalid.is_empty() {
682 let mut vec = core::mem::take(string).into_bytes();
683 vec.extend_from_slice(invalid);
684 buf = Err(vec);
685 }
686 }
687 Err(vec) => {
688 vec.extend_from_slice(valid.as_bytes());
689 vec.extend_from_slice(invalid);
690 }
691 },
692 });
693
694 match buf {
695 Ok(buf) => Ok(Cow::Owned(buf)),
696 Err(buf) => Err(buf),
697 }
698 }
699
700 /// Decodes the slice to a string, replacing any invalid UTF-8 sequences with
701 /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD].
702 ///
703 /// [U+FFFD]: char::REPLACEMENT_CHARACTER
704 ///
705 /// This method allocates only when the slice contains any percent-encoded octet.
706 #[must_use]
707 pub fn to_string_lossy(mut self) -> Cow<'a, str> {
708 if self.source.is_empty() {
709 return Cow::Borrowed("");
710 }
711
712 let mut buf = match self.borrow_all_or_prep_buf() {
713 Ok(s) => return Cow::Borrowed(s),
714 Err(buf) => buf,
715 };
716
717 self.decode_utf8(|chunk| match chunk {
718 DecodedUtf8Chunk::Unencoded(s) => buf.push_str(s),
719 DecodedUtf8Chunk::Decoded { valid, invalid } => {
720 buf.push_str(valid);
721 if !invalid.is_empty() {
722 buf.push(char::REPLACEMENT_CHARACTER);
723 }
724 }
725 });
726 Cow::Owned(buf)
727 }
728}
729
730pub(crate) fn encode_byte(x: u8) -> &'static str {
731 const TABLE: &[u8; 256 * 3] = &{
732 const HEX_DIGITS: &[u8; 16] = b"0123456789ABCDEF";
733
734 let mut i = 0;
735 let mut table = [0; 256 * 3];
736 while i < 256 {
737 table[i * 3] = b'%';
738 table[i * 3 + 1] = HEX_DIGITS[i >> 4];
739 table[i * 3 + 2] = HEX_DIGITS[i & 0b1111];
740 i += 1;
741 }
742 table
743 };
744
745 const TABLE_STR: &str = match str::from_utf8(TABLE) {
746 Ok(s) => s,
747 Err(_) => unreachable!(),
748 };
749
750 &TABLE_STR[x as usize * 3..x as usize * 3 + 3]
751}
752
753/// An iterator used to percent-encode a string slice.
754///
755/// This struct is created by [`Table::encode`]. Normally you'll use [`EString::encode_str`]
756/// instead, unless you need precise control over allocation.
757///
758/// See the [`EncodedChunk`] type for documentation of the items yielded by this iterator.
759#[cfg(feature = "alloc")]
760#[derive(Clone, Debug)]
761#[must_use = "iterators are lazy and do nothing unless consumed"]
762pub(crate) struct Encode<'t, 's> {
763 table: &'t Table,
764 source: &'s str,
765 enc_len: usize,
766 enc_i: usize,
767}
768
769#[cfg(feature = "alloc")]
770impl<'t, 's> Encode<'t, 's> {
771 pub(crate) fn new(table: &'t Table, source: &'s str) -> Self {
772 Self {
773 table,
774 source,
775 enc_len: 0,
776 enc_i: 0,
777 }
778 }
779}
780
781/// An item returned by the [`Encode`] iterator.
782#[cfg(feature = "alloc")]
783#[derive(Clone, Copy, Debug, PartialEq, Eq)]
784pub(crate) enum EncodedChunk<'a> {
785 /// An unencoded subslice.
786 Unencoded(&'a str),
787 /// A byte, percent-encoded (for example, `0x20` encoded as `"%20"`).
788 PctEncoded(&'static str),
789}
790
791#[cfg(feature = "alloc")]
792impl<'a> EncodedChunk<'a> {
793 /// Returns the chunk as a string slice.
794 #[must_use]
795 pub fn as_str(self) -> &'a str {
796 match self {
797 Self::Unencoded(s) | Self::PctEncoded(s) => s,
798 }
799 }
800}
801
802#[cfg(feature = "alloc")]
803impl<'t, 's> Iterator for Encode<'t, 's> {
804 type Item = EncodedChunk<'s>;
805
806 fn next(&mut self) -> Option<Self::Item> {
807 if self.enc_i < self.enc_len {
808 let s = encode_byte(self.source.as_bytes()[self.enc_i]);
809 self.enc_i += 1;
810 return Some(EncodedChunk::PctEncoded(s));
811 }
812
813 self.source = &self.source[self.enc_len..];
814 self.enc_len = 0;
815
816 if self.source.is_empty() {
817 return None;
818 }
819
820 let mut iter = self.source.char_indices();
821 let i = iter
822 .find_map(|(i, ch)| (!self.table.allows(ch)).then_some(i))
823 .unwrap_or(self.source.len());
824
825 // `CharIndices::offset` sadly requires an MSRV of 1.82,
826 // so we do pointer math to get the offset for now.
827 if i == 0 {
828 self.enc_len = iter.as_str().as_ptr() as usize - self.source.as_ptr() as usize;
829 self.enc_i = 1;
830
831 let s = encode_byte(self.source.as_bytes()[0]);
832 Some(EncodedChunk::PctEncoded(s))
833 } else {
834 let s;
835 (s, self.source) = self.source.split_at(i);
836
837 self.enc_len = iter.as_str().as_ptr() as usize - self.source.as_ptr() as usize;
838 self.enc_i = 0;
839
840 Some(EncodedChunk::Unencoded(s))
841 }
842 }
843}
844
845#[cfg(feature = "alloc")]
846impl FusedIterator for Encode<'_, '_> {}
847
848/// An iterator over subslices of an [`EStr`] slice separated by a delimiter.
849///
850/// This struct is created by [`EStr::split`].
851#[derive(Clone, Debug)]
852#[must_use = "iterators are lazy and do nothing unless consumed"]
853pub struct Split<'a, E: Encoder> {
854 inner: str::Split<'a, char>,
855 encoder: PhantomData<E>,
856}
857
858impl<'a, E: Encoder> Iterator for Split<'a, E> {
859 type Item = &'a EStr<E>;
860
861 fn next(&mut self) -> Option<&'a EStr<E>> {
862 self.inner.next().map(EStr::new_validated)
863 }
864}
865
866impl<'a, E: Encoder> DoubleEndedIterator for Split<'a, E> {
867 fn next_back(&mut self) -> Option<&'a EStr<E>> {
868 self.inner.next_back().map(EStr::new_validated)
869 }
870}
871
872impl<E: Encoder> FusedIterator for Split<'_, E> {}