pct_str/
lib.rs

1//! This crate provides two types, [`PctStr`] and [`PctString`], similar to [`str`] and [`String`],
2//! representing percent-encoded strings used in URL, URI, IRI, etc.
3//! You can use them to encode, decode and compare percent-encoded strings.
4//!
5//! # Basic usage
6//!
7//! You can parse/decode percent-encoded strings by building a [`PctStr`] slice over a [`str`] slice.
8//!
9//! ```
10//! use pct_str::PctStr;
11//!
12//! let pct_str = PctStr::new("Hello%20World%21").unwrap();
13//! assert_eq!(pct_str, "Hello World!");
14//!
15//! let decoded_string: String = pct_str.decode();
16//! assert_eq!(decoded_string, "Hello World!")
17//! ```
18//!
19//! To create new percent-encoded strings, use the [`PctString`] to copy or encode new strings.
20//!
21//! ```
22//! use pct_str::{PctString, URIReserved};
23//!
24//! // Copy the given percent-encoded string.
25//! let pct_string = PctString::new("Hello%20World%21").unwrap();
26//!
27//! // Encode the given regular string.
28//! let pct_string = PctString::encode("Hello World!".chars(), URIReserved);
29//!
30//! assert_eq!(pct_string.as_str(), "Hello%20World%21");
31//! ```
32//!
33//! You can choose which character will be percent-encoded by the `encode` function
34//! by implementing the [`Encoder`] trait.
35//!
36//! ```
37//! use pct_str::{URIReserved, PctString};
38//!
39//! struct CustomEncoder;
40//!
41//! impl pct_str::Encoder for CustomEncoder {
42//!   fn encode(&self, c: char) -> bool {
43//!     URIReserved.encode(c) || c.is_uppercase()
44//!   }
45//! }
46//!
47//! let pct_string = PctString::encode("Hello World!".chars(), CustomEncoder);
48//! assert_eq!(pct_string.as_str(), "%48ello%20%57orld%21")
49//! ```
50
51use std::borrow::Borrow;
52use std::hash;
53use std::{
54	cmp::{Ord, Ordering, PartialOrd},
55	fmt::Display,
56};
57use std::{convert::TryFrom, fmt, io, str::FromStr};
58
59/// Encoding error.
60///
61/// Raised when a given input string is not percent-encoded as expected.
62#[derive(Debug, Clone, thiserror::Error)]
63#[error("invalid percent-encoded string")]
64pub struct InvalidPctString<T>(pub T);
65
66impl<T> InvalidPctString<T> {
67	pub fn map<U>(self, f: impl FnOnce(T) -> U) -> InvalidPctString<U> {
68		InvalidPctString(f(self.0))
69	}
70}
71
72impl<'a, T: ?Sized + ToOwned> InvalidPctString<&'a T> {
73	pub fn into_owned(self) -> InvalidPctString<T::Owned> {
74		self.map(T::to_owned)
75	}
76}
77
78#[inline(always)]
79fn to_digit(b: u8) -> Result<u8, ByteError> {
80	match b {
81		// ASCII 0..=9
82		0x30..=0x39 => Ok(b - 0x30),
83		// ASCII A..=F
84		0x41..=0x46 => Ok(b - 0x37),
85		// ASCII a..=f
86		0x61..=0x66 => Ok(b - 0x57),
87		_ => Err(ByteError::InvalidByte(b)),
88	}
89}
90
91/// Bytes iterator.
92///
93/// Iterates over the encoded bytes of a percent-encoded string.
94pub struct Bytes<'a>(std::slice::Iter<'a, u8>);
95
96#[derive(Debug, Clone)]
97enum ByteError {
98	InvalidByte(u8),
99	IncompleteEncoding,
100}
101
102impl From<ByteError> for io::Error {
103	fn from(e: ByteError) -> Self {
104		io::Error::new(io::ErrorKind::InvalidData, e.to_string())
105	}
106}
107
108impl Display for ByteError {
109	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110		match self {
111			ByteError::InvalidByte(b) => write!(f, "Invalid UTF-8 byte: {:#x}", b),
112			ByteError::IncompleteEncoding => f.write_str("Incomplete percent-encoding segment"),
113		}
114	}
115}
116
117impl std::error::Error for ByteError {}
118
119impl<'a> Iterator for Bytes<'a> {
120	type Item = u8;
121
122	fn next(&mut self) -> Option<u8> {
123		if let Some(next) = self.0.next().copied() {
124			match next {
125				b'%' => {
126					let a = self.0.next().copied().unwrap();
127					let a = to_digit(a).unwrap();
128					let b = self.0.next().copied().unwrap();
129					let b = to_digit(b).unwrap();
130					let byte = a << 4 | b;
131					Some(byte)
132				}
133				_ => Some(next),
134			}
135		} else {
136			None
137		}
138	}
139}
140
141impl<'a> std::iter::FusedIterator for Bytes<'a> {}
142
143/// Untrusted bytes iterator.
144///
145/// Iterates over the encoded bytes of a percent-encoded string.
146struct UntrustedBytes<B>(B);
147
148impl<B> UntrustedBytes<B> {
149	fn new(bytes: B) -> Self {
150		Self(bytes)
151	}
152}
153
154impl<B: Iterator<Item = u8>> UntrustedBytes<B> {
155	fn try_next(&mut self, next: u8) -> io::Result<u8> {
156		match next {
157			b'%' => {
158				let a = self.0.next().ok_or(ByteError::IncompleteEncoding)?;
159				let a = to_digit(a)?;
160				let b = self.0.next().ok_or(ByteError::IncompleteEncoding)?;
161				let b = to_digit(b)?;
162				let byte = a << 4 | b;
163				Ok(byte)
164			}
165			_ => Ok(next),
166		}
167	}
168}
169
170impl<B: Iterator<Item = u8>> Iterator for UntrustedBytes<B> {
171	type Item = io::Result<u8>;
172
173	fn next(&mut self) -> Option<io::Result<u8>> {
174		self.0.next().map(|b| self.try_next(b))
175	}
176}
177
178impl<B: Iterator<Item = u8>> std::iter::FusedIterator for UntrustedBytes<B> {}
179
180/// Characters iterator.
181///
182/// Iterates over the encoded characters of a percent-encoded string.
183pub struct Chars<'a> {
184	inner: utf8_decode::Decoder<Bytes<'a>>,
185}
186
187impl<'a> Chars<'a> {
188	fn new(bytes: Bytes<'a>) -> Self {
189		Self {
190			inner: utf8_decode::Decoder::new(bytes),
191		}
192	}
193}
194
195impl<'a> Iterator for Chars<'a> {
196	type Item = char;
197
198	fn next(&mut self) -> Option<char> {
199		// Safe as PctStr guarantees a valid byte sequence
200		self.inner.next().map(|x| x.unwrap())
201	}
202}
203
204impl<'a> std::iter::FusedIterator for Chars<'a> {}
205
206/// Percent-Encoded string slice.
207///
208/// This is the equivalent of [`str`] for percent-encoded strings.
209/// This is an *unsized* type, meaning that it must always be used behind a
210/// pointer like `&` or [`Box`]. For an owned version of this type,
211/// see [`PctString`].
212///
213/// # Examples
214///
215/// ```
216/// use pct_str::PctStr;
217///
218/// let buffer = "Hello%20World%21";
219/// let pct_str = PctStr::new(buffer).unwrap();
220///
221/// // You can compare percent-encoded strings with a regular string.
222/// assert!(pct_str == "Hello World!");
223///
224/// // The underlying string is unchanged.
225/// assert!(pct_str.as_str() == "Hello%20World%21");
226///
227/// // Just as a regular string, you can iterate over the
228/// // encoded characters of `pct_str` with [`PctStr::chars`].
229/// for c in pct_str.chars() {
230///   print!("{}", c);
231/// }
232///
233/// // You can decode the string and every remove percent-encoded characters
234/// // with the [`PctStr::decode`] method.
235/// let decoded_string: String = pct_str.decode();
236/// println!("{}", decoded_string);
237/// ```
238pub struct PctStr([u8]);
239
240impl PctStr {
241	/// Create a new percent-encoded string slice.
242	///
243	/// The input slice is checked for correct percent-encoding.
244	/// If the test fails, a [`InvalidEncoding`] error is returned.
245	pub fn new<S: AsRef<[u8]> + ?Sized>(input: &S) -> Result<&PctStr, InvalidPctString<&S>> {
246		let input_bytes = input.as_ref();
247		if Self::validate(input_bytes.iter().copied()) {
248			Ok(unsafe { Self::new_unchecked(input_bytes) })
249		} else {
250			Err(InvalidPctString(input))
251		}
252	}
253
254	/// Create a new percent-encoded string slice without checking for correct encoding.
255	///
256	/// This is an unsafe function. The resulting string slice will have an undefined behaviour
257	/// if the input slice is not percent-encoded.
258	///
259	/// # Safety
260	///
261	/// The input `str` must be a valid percent-encoded string.
262	pub unsafe fn new_unchecked<S: AsRef<[u8]> + ?Sized>(input: &S) -> &PctStr {
263		std::mem::transmute(input.as_ref())
264	}
265
266	/// Checks that the given iterator produces a valid percent-encoded string.
267	pub fn validate(input: impl Iterator<Item = u8>) -> bool {
268		let chars = UntrustedBytes::new(input);
269		utf8_decode::UnsafeDecoder::new(chars).all(|r| r.is_ok())
270	}
271
272	/// Length of the decoded string (character count).
273	///
274	/// Computed in linear time.
275	/// This is different from the byte length, which can be retrieved using
276	/// `value.as_bytes().len()`.
277	#[inline]
278	pub fn len(&self) -> usize {
279		self.chars().count()
280	}
281
282	/// Checks if the string is empty.
283	#[inline]
284	pub fn is_empty(&self) -> bool {
285		self.0.is_empty()
286	}
287
288	/// Returns the underlying percent-encoding bytes.
289	#[inline]
290	pub fn as_bytes(&self) -> &[u8] {
291		&self.0
292	}
293
294	/// Get the underlying percent-encoded string slice.
295	#[inline]
296	pub fn as_str(&self) -> &str {
297		unsafe {
298			// SAFETY: the data has be validated, and all percent-encoded
299			//         strings are valid UTF-8 strings.
300			core::str::from_utf8_unchecked(&self.0)
301		}
302	}
303
304	/// Iterate over the encoded characters of the string.
305	#[inline]
306	pub fn chars(&self) -> Chars {
307		Chars::new(self.bytes())
308	}
309
310	/// Iterate over the encoded bytes of the string.
311	#[inline]
312	pub fn bytes(&self) -> Bytes {
313		Bytes(self.0.iter())
314	}
315
316	/// Decoding.
317	///
318	/// Return the string with the percent-encoded characters decoded.
319	pub fn decode(&self) -> String {
320		let mut decoded = String::with_capacity(self.len());
321		for c in self.chars() {
322			decoded.push(c)
323		}
324
325		decoded
326	}
327}
328
329impl PartialEq for PctStr {
330	#[inline]
331	fn eq(&self, other: &PctStr) -> bool {
332		let mut a = self.chars();
333		let mut b = other.chars();
334
335		loop {
336			match (a.next(), b.next()) {
337				(Some(a), Some(b)) if a != b => return false,
338				(Some(_), None) => return false,
339				(None, Some(_)) => return false,
340				(None, None) => break,
341				_ => (),
342			}
343		}
344
345		true
346	}
347}
348
349impl Eq for PctStr {}
350
351impl PartialEq<str> for PctStr {
352	#[inline]
353	fn eq(&self, other: &str) -> bool {
354		let mut a = self.chars();
355		let mut b = other.chars();
356
357		loop {
358			match (a.next(), b.next()) {
359				(Some(a), Some(b)) if a != b => return false,
360				(Some(_), None) => return false,
361				(None, Some(_)) => return false,
362				(None, None) => break,
363				_ => (),
364			}
365		}
366
367		true
368	}
369}
370
371impl PartialEq<PctString> for PctStr {
372	#[inline]
373	fn eq(&self, other: &PctString) -> bool {
374		let mut a = self.chars();
375		let mut b = other.chars();
376
377		loop {
378			match (a.next(), b.next()) {
379				(Some(a), Some(b)) if a != b => return false,
380				(Some(_), None) => return false,
381				(None, Some(_)) => return false,
382				(None, None) => break,
383				_ => (),
384			}
385		}
386
387		true
388	}
389}
390
391impl PartialOrd for PctStr {
392	fn partial_cmp(&self, other: &PctStr) -> Option<Ordering> {
393		Some(self.cmp(other))
394	}
395}
396
397impl Ord for PctStr {
398	fn cmp(&self, other: &PctStr) -> Ordering {
399		let mut self_chars = self.chars();
400		let mut other_chars = other.chars();
401
402		loop {
403			match (self_chars.next(), other_chars.next()) {
404				(None, None) => return Ordering::Equal,
405				(None, Some(_)) => return Ordering::Less,
406				(Some(_), None) => return Ordering::Greater,
407				(Some(a), Some(b)) => match a.cmp(&b) {
408					Ordering::Less => return Ordering::Less,
409					Ordering::Greater => return Ordering::Greater,
410					Ordering::Equal => (),
411				},
412			}
413		}
414	}
415}
416
417impl PartialOrd<PctString> for PctStr {
418	fn partial_cmp(&self, other: &PctString) -> Option<Ordering> {
419		self.partial_cmp(other.as_pct_str())
420	}
421}
422
423impl hash::Hash for PctStr {
424	#[inline]
425	fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
426		for c in self.chars() {
427			c.hash(hasher)
428		}
429	}
430}
431
432impl fmt::Display for PctStr {
433	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
434		fmt::Display::fmt(self.as_str(), f)
435	}
436}
437
438impl fmt::Debug for PctStr {
439	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
440		fmt::Debug::fmt(self.as_str(), f)
441	}
442}
443
444impl ToOwned for PctStr {
445	type Owned = PctString;
446
447	fn to_owned(&self) -> Self::Owned {
448		unsafe { PctString::new_unchecked(self.0.to_owned()) }
449	}
450}
451
452impl Borrow<str> for PctStr {
453	fn borrow(&self) -> &str {
454		self.as_str()
455	}
456}
457
458impl AsRef<str> for PctStr {
459	fn as_ref(&self) -> &str {
460		self.as_str()
461	}
462}
463
464impl Borrow<[u8]> for PctStr {
465	fn borrow(&self) -> &[u8] {
466		self.as_bytes()
467	}
468}
469
470impl AsRef<[u8]> for PctStr {
471	fn as_ref(&self) -> &[u8] {
472		self.as_bytes()
473	}
474}
475
476/// Encoding predicate.
477///
478/// Instances of this trait are used along with the [`encode`](`PctString::encode`) function
479/// to decide which character must be percent-encoded.
480///
481/// This crate provides a simple implementation of the trait, [`URIReserved`]
482/// encoding characters reserved in the URI syntax.
483///
484/// # Example
485///
486/// ```
487/// use pct_str::{PctString, URIReserved};
488///
489/// let pct_string = PctString::encode("Hello World!".chars(), URIReserved);
490/// println!("{}", pct_string.as_str()); // => Hello World%21
491/// ```
492///
493/// Custom encoder implementation:
494///
495/// ```
496/// use pct_str::{PctString, URIReserved};
497///
498/// struct CustomEncoder;
499///
500/// impl pct_str::Encoder for CustomEncoder {
501///   fn encode(&self, c: char) -> bool {
502///     URIReserved.encode(c) || c.is_uppercase()
503///   }
504/// }
505///
506/// let pct_string = PctString::encode("Hello World!".chars(), CustomEncoder);
507/// println!("{}", pct_string.as_str()); // => %48ello %57orld%21
508/// ```
509pub trait Encoder {
510	/// Decide if the given character must be encoded.
511	///
512	/// Note that the character `%` is always encoded even if this method returns `false` on it.
513	fn encode(&self, c: char) -> bool;
514}
515
516impl<F: Fn(char) -> bool> Encoder for F {
517	fn encode(&self, c: char) -> bool {
518		self(c)
519	}
520}
521
522/// Owned, mutable percent-encoded string.
523///
524/// This is the equivalent of [`String`] for percent-encoded strings.
525/// It implements [`Deref`](`std::ops::Deref`) to [`PctStr`] meaning that all methods on [`PctStr`] slices are
526/// available on `PctString` values as well.
527pub struct PctString(Vec<u8>);
528
529impl PctString {
530	/// Create a new owned percent-encoded string.
531	///
532	/// The input string is checked for correct percent-encoding.
533	/// If the test fails, a [`InvalidPctString`] error is returned.
534	pub fn new<B: Into<Vec<u8>>>(bytes: B) -> Result<Self, InvalidPctString<Vec<u8>>> {
535		let bytes = bytes.into();
536		if PctStr::validate(bytes.iter().copied()) {
537			Ok(Self(bytes))
538		} else {
539			Err(InvalidPctString(bytes))
540		}
541	}
542
543	pub fn from_string(string: String) -> Result<Self, InvalidPctString<String>> {
544		Self::new(string).map_err(|e| {
545			e.map(|bytes| unsafe {
546				// SAFETY: the bytes come from the UTF-8 encoded input `string`.
547				String::from_utf8_unchecked(bytes)
548			})
549		})
550	}
551
552	/// Creates a new owned percent-encoded string without validation.
553	///
554	/// # Safety
555	///
556	/// The input string must be correctly percent-encoded.
557	pub unsafe fn new_unchecked<B: Into<Vec<u8>>>(bytes: B) -> Self {
558		Self(bytes.into())
559	}
560
561	/// Encode a string into a percent-encoded string.
562	///
563	/// This function takes an [`Encoder`] instance to decide which character of the string must
564	/// be encoded.
565	///
566	/// Note that the character `%` will always be encoded regardless of the provided [`Encoder`].
567	///
568	/// # Example
569	///
570	/// ```
571	/// use pct_str::{PctString, URIReserved};
572	///
573	/// let pct_string = PctString::encode("Hello World!".chars(), URIReserved);
574	/// println!("{}", pct_string.as_str()); // => Hello World%21
575	/// ```
576	pub fn encode<E: Encoder>(src: impl Iterator<Item = char>, encoder: E) -> PctString {
577		use std::fmt::Write;
578
579		let mut buf = String::with_capacity(4);
580		let mut encoded = String::new();
581		for c in src {
582			if encoder.encode(c) || c == '%' {
583				buf.clear();
584				buf.push(c);
585				for byte in buf.bytes() {
586					write!(encoded, "%{:02X}", byte).unwrap();
587				}
588			} else {
589				encoded.push(c);
590			}
591		}
592
593		PctString(encoded.into_bytes())
594	}
595
596	/// Return this string as a borrowed percent-encoded string slice.
597	#[inline]
598	pub fn as_pct_str(&self) -> &PctStr {
599		unsafe {
600			// SAFETY: the bytes have been validated.
601			PctStr::new_unchecked(&self.0)
602		}
603	}
604
605	/// Return the internal string of the [`PctString`], consuming it
606	#[inline]
607	pub fn into_string(self) -> String {
608		unsafe {
609			// SAFETY: the bytes have been validated, and a percent-encoded
610			//         string is a valid UTF-8 string.
611			String::from_utf8_unchecked(self.0)
612		}
613	}
614
615	#[inline]
616	pub fn into_bytes(self) -> Vec<u8> {
617		self.0
618	}
619}
620
621impl std::ops::Deref for PctString {
622	type Target = PctStr;
623
624	#[inline]
625	fn deref(&self) -> &PctStr {
626		self.as_pct_str()
627	}
628}
629
630impl Borrow<PctStr> for PctString {
631	fn borrow(&self) -> &PctStr {
632		self.as_pct_str()
633	}
634}
635
636impl AsRef<PctStr> for PctString {
637	fn as_ref(&self) -> &PctStr {
638		self.as_pct_str()
639	}
640}
641
642impl Borrow<str> for PctString {
643	fn borrow(&self) -> &str {
644		self.as_str()
645	}
646}
647
648impl AsRef<str> for PctString {
649	fn as_ref(&self) -> &str {
650		self.as_str()
651	}
652}
653
654impl Borrow<[u8]> for PctString {
655	fn borrow(&self) -> &[u8] {
656		self.as_bytes()
657	}
658}
659
660impl AsRef<[u8]> for PctString {
661	fn as_ref(&self) -> &[u8] {
662		self.as_bytes()
663	}
664}
665
666impl PartialEq for PctString {
667	#[inline]
668	fn eq(&self, other: &PctString) -> bool {
669		let mut a = self.chars();
670		let mut b = other.chars();
671
672		loop {
673			match (a.next(), b.next()) {
674				(Some(a), Some(b)) if a != b => return false,
675				(Some(_), None) => return false,
676				(None, Some(_)) => return false,
677				(None, None) => break,
678				_ => (),
679			}
680		}
681
682		true
683	}
684}
685
686impl Eq for PctString {}
687
688impl PartialEq<PctStr> for PctString {
689	#[inline]
690	fn eq(&self, other: &PctStr) -> bool {
691		let mut a = self.chars();
692		let mut b = other.chars();
693
694		loop {
695			match (a.next(), b.next()) {
696				(Some(a), Some(b)) if a != b => return false,
697				(Some(_), None) => return false,
698				(None, Some(_)) => return false,
699				(None, None) => break,
700				_ => (),
701			}
702		}
703
704		true
705	}
706}
707
708impl PartialEq<&str> for PctString {
709	#[inline]
710	fn eq(&self, other: &&str) -> bool {
711		let mut a = self.chars();
712		let mut b = other.chars();
713
714		loop {
715			match (a.next(), b.next()) {
716				(Some(a), Some(b)) if a != b => return false,
717				(Some(_), None) => return false,
718				(None, Some(_)) => return false,
719				(None, None) => break,
720				_ => (),
721			}
722		}
723
724		true
725	}
726}
727
728impl PartialEq<str> for PctString {
729	#[inline]
730	fn eq(&self, other: &str) -> bool {
731		self.eq(&other)
732	}
733}
734
735impl PartialOrd for PctString {
736	fn partial_cmp(&self, other: &PctString) -> Option<Ordering> {
737		self.as_pct_str().partial_cmp(other.as_pct_str())
738	}
739}
740
741impl PartialOrd<PctStr> for PctString {
742	fn partial_cmp(&self, other: &PctStr) -> Option<Ordering> {
743		self.as_pct_str().partial_cmp(other)
744	}
745}
746
747impl hash::Hash for PctString {
748	#[inline]
749	fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
750		for c in self.chars() {
751			c.hash(hasher)
752		}
753	}
754}
755
756impl fmt::Display for PctString {
757	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
758		fmt::Display::fmt(self.as_str(), f)
759	}
760}
761
762impl fmt::Debug for PctString {
763	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
764		fmt::Debug::fmt(self.as_str(), f)
765	}
766}
767
768impl FromStr for PctString {
769	type Err = InvalidPctString<String>;
770
771	fn from_str(s: &str) -> Result<Self, Self::Err> {
772		Self::from_string(s.to_string())
773	}
774}
775
776impl TryFrom<String> for PctString {
777	type Error = InvalidPctString<String>;
778
779	fn try_from(value: String) -> Result<Self, Self::Error> {
780		Self::from_string(value)
781	}
782}
783
784impl<'a> TryFrom<&'a str> for PctString {
785	type Error = InvalidPctString<String>;
786
787	fn try_from(value: &'a str) -> Result<Self, Self::Error> {
788		Self::from_string(value.to_owned())
789	}
790}
791
792impl<'a> TryFrom<&'a str> for &'a PctStr {
793	type Error = InvalidPctString<&'a str>;
794
795	fn try_from(value: &'a str) -> Result<Self, Self::Error> {
796		PctStr::new(value)
797	}
798}
799
800/// URI-reserved characters encoder.
801///
802/// This [`Encoder`] encodes characters that are reserved in the syntax of URI according to
803/// [RFC 3986](https://tools.ietf.org/html/rfc3986).
804#[derive(Debug, Clone, Copy, PartialEq, Eq)]
805pub struct URIReserved;
806
807impl Encoder for URIReserved {
808	fn encode(&self, c: char) -> bool {
809		if !c.is_ascii_graphic() {
810			return true;
811		}
812
813		matches!(
814			c,
815			'!' | '#'
816				| '$' | '%' | '&'
817				| '\'' | '(' | ')'
818				| '*' | '+' | ','
819				| '/' | ':' | ';'
820				| '=' | '?' | '@'
821				| '[' | ']'
822		)
823	}
824}
825
826/// IRI-reserved characters encoder.
827///
828/// This [`Encoder`] encodes characters that are reserved in the syntax of IRI according to
829/// [RFC 3987](https://tools.ietf.org/html/rfc3987).
830#[derive(Debug, Clone, Copy, PartialEq, Eq)]
831pub enum IriReserved {
832	Segment,
833	SegmentNoColons,
834	Fragment,
835	Query,
836}
837
838impl Encoder for IriReserved {
839	fn encode(&self, c: char) -> bool {
840		// iunreserved
841		if c.is_ascii_alphanumeric() {
842			return false;
843		}
844
845		match c {
846			// ipchar
847			'@' => return false,
848			// iunreserved
849			'-' | '.' | '_' | '~' => return false,
850			// sub-delims
851			'!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' => return false,
852			'/' | '?' => return *self != IriReserved::Query && *self != IriReserved::Fragment,
853			':' => return *self == IriReserved::SegmentNoColons,
854			_ => { /* fall through */ }
855		}
856
857		match c as u32 {
858			// ucschar
859			0xA0..=0xD7FF
860			| 0xF900..=0xFDCF
861			| 0xFDF0..=0xFFEF
862			| 0x10000..=0x1FFFD
863			| 0x20000..=0x2FFFD
864			| 0x30000..=0x3FFFD
865			| 0x40000..=0x4FFFD
866			| 0x50000..=0x5FFFD
867			| 0x60000..=0x6FFFD
868			| 0x70000..=0x7FFFD
869			| 0x80000..=0x8FFFD
870			| 0x90000..=0x9FFFD
871			| 0xA0000..=0xAFFFD
872			| 0xB0000..=0xBFFFD
873			| 0xC0000..=0xCFFFD
874			| 0xD0000..=0xDFFFD
875			| 0xE1000..=0xEFFFD => false,
876			// iprivate
877			0xE000..=0xF8FF | 0xF0000..=0xFFFFD | 0x100000..=0x10FFFD => {
878				*self != IriReserved::Query
879			}
880			_ => true,
881		}
882	}
883}
884
885#[cfg(test)]
886mod tests {
887	use std::convert::TryInto;
888
889	use super::*;
890
891	#[test]
892	fn iri_encode_cyrillic() {
893		let encoder = IriReserved::Segment;
894		let pct_string = PctString::encode("традиционное польское блюдо".chars(), encoder);
895		assert_eq!(&pct_string, &"традиционное польское блюдо");
896		assert_eq!(&pct_string.as_str(), &"традиционное%20польское%20блюдо");
897	}
898
899	#[test]
900	fn iri_encode_segment() {
901		let encoder = IriReserved::Segment;
902		let pct_string = PctString::encode(
903			"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
904			encoder,
905		);
906
907		assert_eq!(
908			&pct_string,
909			&"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
910		);
911		assert_eq!(
912			&pct_string.as_str(),
913			&"%3Ftest=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
914		);
915	}
916
917	#[test]
918	fn iri_encode_segment_nocolon() {
919		let encoder = IriReserved::SegmentNoColons;
920		let pct_string = PctString::encode(
921			"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
922			encoder,
923		);
924		assert_eq!(
925			&pct_string,
926			&"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
927		);
928		assert_eq!(
929			&pct_string.as_str(),
930			&"%3Ftest=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
931		);
932	}
933
934	#[test]
935	fn iri_encode_fragment() {
936		let encoder = IriReserved::Fragment;
937		let pct_string = PctString::encode(
938			"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
939			encoder,
940		);
941		assert_eq!(
942			&pct_string,
943			&"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
944		);
945		assert_eq!(
946			&pct_string.as_str(),
947			&"?test=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
948		);
949	}
950
951	#[test]
952	fn iri_encode_query() {
953		let encoder = IriReserved::Query;
954		let pct_string = PctString::encode(
955			"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
956			encoder,
957		);
958		assert_eq!(
959			&pct_string,
960			&"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
961		);
962		assert_eq!(
963			&pct_string.as_str(),
964			&"?test=традиционное%20польское%20блюдо&cjk=真正&private=\u{10FFFD}"
965		);
966	}
967
968	#[test]
969	fn uri_encode_cyrillic() {
970		let encoder = URIReserved;
971		let pct_string = PctString::encode("традиционное польское блюдо\0".chars(), encoder);
972		assert_eq!(&pct_string, &"традиционное польское блюдо\0");
973		assert_eq!(&pct_string.as_str(), &"%D1%82%D1%80%D0%B0%D0%B4%D0%B8%D1%86%D0%B8%D0%BE%D0%BD%D0%BD%D0%BE%D0%B5%20%D0%BF%D0%BE%D0%BB%D1%8C%D1%81%D0%BA%D0%BE%D0%B5%20%D0%B1%D0%BB%D1%8E%D0%B4%D0%BE%00");
974	}
975
976	#[test]
977	fn pct_encoding_invalid() {
978		let s = "%FF%FE%20%4F";
979		assert!(PctStr::new(s).is_err());
980		let s = "%36%A";
981		assert!(PctStr::new(s).is_err());
982		let s = "%%32";
983		assert!(PctStr::new(s).is_err());
984		let s = "%%32";
985		assert!(PctStr::new(s).is_err());
986	}
987
988	#[test]
989	fn pct_encoding_valid() {
990		let s = "%00%5C%F4%8F%BF%BD%69";
991		assert!(PctStr::new(s).is_ok());
992		let s = "No percent.";
993		assert!(PctStr::new(s).is_ok());
994		let s = "%e2%82%acwat";
995		assert!(PctStr::new(s).is_ok());
996	}
997
998	#[test]
999	fn try_from() {
1000		let s = "%00%5C%F4%8F%BF%BD%69";
1001		let _pcs = PctString::try_from(s).unwrap();
1002		let _pcs: &PctStr = s.try_into().unwrap();
1003	}
1004
1005	#[test]
1006	fn encode_percent_always() {
1007		struct NoopEncoder;
1008		impl Encoder for NoopEncoder {
1009			fn encode(&self, _: char) -> bool {
1010				false
1011			}
1012		}
1013		let s = "%";
1014		let c = PctString::encode(s.chars(), NoopEncoder);
1015		assert_eq!(c.as_str(), "%25");
1016	}
1017}