Skip to main content

pct_str/
nsized.rs

1use core::{
2	borrow::Borrow,
3	cmp::Ordering,
4	fmt::{self, Debug, Display, Formatter},
5	hash::{Hash, Hasher},
6};
7
8use crate::{
9	InvalidPctString, PctString,
10	util::{TryEncodedBytes, to_digit},
11};
12
13/// Percent-Encoded string slice.
14///
15/// This is the equivalent of [`str`] for percent-encoded strings.
16/// This is an *unsized* type, meaning that it must always be used behind a
17/// pointer like `&` or [`Box`]. For an owned version of this type,
18/// see [`PctString`].
19///
20/// # Examples
21///
22/// ```
23/// use pct_str::PctStr;
24///
25/// let buffer = "Hello%20World%21";
26/// let pct_str = PctStr::new(buffer).unwrap();
27///
28/// // You can compare percent-encoded strings with a regular string.
29/// assert!(pct_str == "Hello World!");
30///
31/// // The underlying string is unchanged.
32/// assert!(pct_str.as_str() == "Hello%20World%21");
33///
34/// // Just as a regular string, you can iterate over the
35/// // encoded characters of `pct_str` with [`PctStr::chars`].
36/// for c in pct_str.chars() {
37///   print!("{}", c);
38/// }
39///
40/// // You can decode the string and every remove percent-encoded characters
41/// // with the [`PctStr::decode`] method.
42/// let decoded_string: String = pct_str.decode();
43/// println!("{}", decoded_string);
44/// ```
45pub struct PctStr([u8]);
46
47impl PctStr {
48	/// Create a new percent-encoded string slice.
49	///
50	/// The input slice is checked for correct percent-encoding.
51	/// If the test fails, a [`InvalidPctString`] error is returned.
52	pub fn new<S: AsRef<[u8]> + ?Sized>(input: &S) -> Result<&PctStr, InvalidPctString<&S>> {
53		let input_bytes = input.as_ref();
54		if Self::validate(input_bytes.iter().copied()) {
55			Ok(unsafe { Self::new_unchecked(input_bytes) })
56		} else {
57			Err(InvalidPctString(input))
58		}
59	}
60
61	/// Create a new percent-encoded string slice without checking for correct encoding.
62	///
63	/// This is an unsafe function. The resulting string slice will have an undefined behaviour
64	/// if the input slice is not percent-encoded.
65	///
66	/// # Safety
67	///
68	/// The input `str` must be a valid percent-encoded string.
69	pub unsafe fn new_unchecked<S: AsRef<[u8]> + ?Sized>(input: &S) -> &PctStr {
70		unsafe { std::mem::transmute::<&[u8], &PctStr>(input.as_ref()) }
71	}
72
73	/// Checks that the given iterator produces a valid percent-encoded string.
74	pub fn validate(input: impl Iterator<Item = u8>) -> bool {
75		let chars = TryEncodedBytes::new(input);
76		utf8_decode::TryDecoder::new(chars).all(|r| r.is_ok())
77	}
78
79	/// Length of the decoded string (character count).
80	///
81	/// Computed in linear time.
82	/// This is different from the byte length, which can be retrieved using
83	/// `value.as_bytes().len()`.
84	#[inline]
85	pub fn len(&self) -> usize {
86		self.chars().count()
87	}
88
89	/// Checks if the string is empty.
90	#[inline]
91	pub fn is_empty(&self) -> bool {
92		self.0.is_empty()
93	}
94
95	/// Returns the underlying percent-encoding bytes.
96	#[inline]
97	pub fn as_bytes(&self) -> &[u8] {
98		&self.0
99	}
100
101	/// Get the underlying percent-encoded string slice.
102	#[inline]
103	pub fn as_str(&self) -> &str {
104		unsafe {
105			// SAFETY: the data has be validated, and all percent-encoded
106			//         strings are valid UTF-8 strings.
107			core::str::from_utf8_unchecked(&self.0)
108		}
109	}
110
111	/// Iterate over the encoded characters of the string.
112	#[inline]
113	pub fn chars(&self) -> Chars<'_> {
114		Chars::new(self.bytes())
115	}
116
117	/// Iterate over the encoded bytes of the string.
118	#[inline]
119	pub fn bytes(&self) -> Bytes<'_> {
120		Bytes(self.0.iter())
121	}
122
123	/// Decoding.
124	///
125	/// Return the string with the percent-encoded characters decoded.
126	pub fn decode(&self) -> String {
127		let mut decoded = String::with_capacity(self.len());
128		for c in self.chars() {
129			decoded.push(c)
130		}
131
132		decoded
133	}
134}
135
136impl PartialEq for PctStr {
137	#[inline]
138	fn eq(&self, other: &PctStr) -> bool {
139		let mut a = self.chars();
140		let mut b = other.chars();
141
142		loop {
143			match (a.next(), b.next()) {
144				(Some(a), Some(b)) if a != b => return false,
145				(Some(_), None) => return false,
146				(None, Some(_)) => return false,
147				(None, None) => break,
148				_ => (),
149			}
150		}
151
152		true
153	}
154}
155
156impl Eq for PctStr {}
157
158impl PartialEq<str> for PctStr {
159	#[inline]
160	fn eq(&self, other: &str) -> bool {
161		let mut a = self.chars();
162		let mut b = other.chars();
163
164		loop {
165			match (a.next(), b.next()) {
166				(Some(a), Some(b)) if a != b => return false,
167				(Some(_), None) => return false,
168				(None, Some(_)) => return false,
169				(None, None) => break,
170				_ => (),
171			}
172		}
173
174		true
175	}
176}
177
178impl PartialEq<PctString> for PctStr {
179	#[inline]
180	fn eq(&self, other: &PctString) -> bool {
181		let mut a = self.chars();
182		let mut b = other.chars();
183
184		loop {
185			match (a.next(), b.next()) {
186				(Some(a), Some(b)) if a != b => return false,
187				(Some(_), None) => return false,
188				(None, Some(_)) => return false,
189				(None, None) => break,
190				_ => (),
191			}
192		}
193
194		true
195	}
196}
197
198impl PartialOrd for PctStr {
199	fn partial_cmp(&self, other: &PctStr) -> Option<Ordering> {
200		Some(self.cmp(other))
201	}
202}
203
204impl Ord for PctStr {
205	fn cmp(&self, other: &PctStr) -> Ordering {
206		let mut self_chars = self.chars();
207		let mut other_chars = other.chars();
208
209		loop {
210			match (self_chars.next(), other_chars.next()) {
211				(None, None) => return Ordering::Equal,
212				(None, Some(_)) => return Ordering::Less,
213				(Some(_), None) => return Ordering::Greater,
214				(Some(a), Some(b)) => match a.cmp(&b) {
215					Ordering::Less => return Ordering::Less,
216					Ordering::Greater => return Ordering::Greater,
217					Ordering::Equal => (),
218				},
219			}
220		}
221	}
222}
223
224impl PartialOrd<PctString> for PctStr {
225	fn partial_cmp(&self, other: &PctString) -> Option<Ordering> {
226		self.partial_cmp(other.as_pct_str())
227	}
228}
229
230impl Hash for PctStr {
231	#[inline]
232	fn hash<H: Hasher>(&self, hasher: &mut H) {
233		for c in self.chars() {
234			c.hash(hasher)
235		}
236	}
237}
238
239impl Display for PctStr {
240	fn fmt(&self, f: &mut Formatter) -> fmt::Result {
241		fmt::Display::fmt(self.as_str(), f)
242	}
243}
244
245impl Debug for PctStr {
246	fn fmt(&self, f: &mut Formatter) -> fmt::Result {
247		Debug::fmt(self.as_str(), f)
248	}
249}
250
251impl ToOwned for PctStr {
252	type Owned = PctString;
253
254	fn to_owned(&self) -> Self::Owned {
255		unsafe { PctString::new_unchecked(self.0.to_owned()) }
256	}
257}
258
259impl Borrow<str> for PctStr {
260	fn borrow(&self) -> &str {
261		self.as_str()
262	}
263}
264
265impl AsRef<str> for PctStr {
266	fn as_ref(&self) -> &str {
267		self.as_str()
268	}
269}
270
271impl AsRef<[u8]> for PctStr {
272	fn as_ref(&self) -> &[u8] {
273		self.as_bytes()
274	}
275}
276
277/// Bytes iterator.
278///
279/// Iterates over the decoded bytes of a percent-encoded string.
280pub struct Bytes<'a>(std::slice::Iter<'a, u8>);
281
282impl<'a> Iterator for Bytes<'a> {
283	type Item = u8;
284
285	fn next(&mut self) -> Option<u8> {
286		if let Some(next) = self.0.next().copied() {
287			match next {
288				b'%' => {
289					let a = self.0.next().copied().unwrap();
290					let a = to_digit(a).unwrap();
291					let b = self.0.next().copied().unwrap();
292					let b = to_digit(b).unwrap();
293					let byte = a << 4 | b;
294					Some(byte)
295				}
296				_ => Some(next),
297			}
298		} else {
299			None
300		}
301	}
302}
303
304impl<'a> std::iter::FusedIterator for Bytes<'a> {}
305
306/// Characters iterator.
307///
308/// Iterates over the decoded characters of a percent-encoded string.
309pub struct Chars<'a> {
310	inner: utf8_decode::Decoder<Bytes<'a>>,
311}
312
313impl<'a> Chars<'a> {
314	fn new(bytes: Bytes<'a>) -> Self {
315		Self {
316			inner: utf8_decode::Decoder::new(bytes),
317		}
318	}
319}
320
321impl<'a> Iterator for Chars<'a> {
322	type Item = char;
323
324	fn next(&mut self) -> Option<char> {
325		// Safe as PctStr guarantees a valid byte sequence
326		self.inner.next().map(|x| x.unwrap())
327	}
328}
329
330impl<'a> std::iter::FusedIterator for Chars<'a> {}