Skip to main content

pct_str/
nsized.rs

1use core::{
2	borrow::Borrow,
3	cmp::Ordering,
4	fmt::{self, Debug, Display, Formatter},
5	hash::{Hash, Hasher},
6};
7
8use crate::{
9	InvalidPctString,
10	util::{TryEncodedBytes, to_digit},
11};
12
13#[cfg(feature = "std")]
14use crate::PctString;
15
16/// Percent-Encoded string slice.
17///
18/// This is the equivalent of [`str`] for percent-encoded strings.
19/// This is an *unsized* type, meaning that it must always be used behind a
20/// pointer like `&` or [`Box`]. For an owned version of this type,
21/// see [`PctString`].
22///
23/// # Examples
24///
25/// ```
26/// use pct_str::PctStr;
27///
28/// let buffer = "Hello%20World%21";
29/// let pct_str = PctStr::new(buffer).unwrap();
30///
31/// // You can compare percent-encoded strings with a regular string.
32/// assert!(pct_str == "Hello World!");
33///
34/// // The underlying string is unchanged.
35/// assert!(pct_str.as_str() == "Hello%20World%21");
36///
37/// // Just as a regular string, you can iterate over the
38/// // encoded characters of `pct_str` with [`PctStr::chars`].
39/// for c in pct_str.chars() {
40///   print!("{}", c);
41/// }
42///
43/// // You can decode the string and every remove percent-encoded characters
44/// // with the [`PctStr::decode`] method.
45/// let decoded_string: String = pct_str.decode();
46/// println!("{}", decoded_string);
47/// ```
48pub struct PctStr([u8]);
49
50impl PctStr {
51	/// Create a new percent-encoded string slice.
52	///
53	/// The input slice is checked for correct percent-encoding.
54	/// If the test fails, a [`InvalidPctString`] error is returned.
55	pub fn new<S: AsRef<[u8]> + ?Sized>(input: &S) -> Result<&PctStr, InvalidPctString<&S>> {
56		let input_bytes = input.as_ref();
57		if Self::validate(input_bytes.iter().copied()) {
58			Ok(unsafe { Self::new_unchecked(input_bytes) })
59		} else {
60			Err(InvalidPctString(input))
61		}
62	}
63
64	/// Create a new percent-encoded string slice without checking for correct encoding.
65	///
66	/// This is an unsafe function. The resulting string slice will have an undefined behaviour
67	/// if the input slice is not percent-encoded.
68	///
69	/// # Safety
70	///
71	/// The input `str` must be a valid percent-encoded string.
72	pub unsafe fn new_unchecked<S: AsRef<[u8]> + ?Sized>(input: &S) -> &PctStr {
73		unsafe { core::mem::transmute::<&[u8], &PctStr>(input.as_ref()) }
74	}
75
76	/// Checks that the given iterator produces a valid percent-encoded string.
77	pub fn validate(input: impl Iterator<Item = u8>) -> bool {
78		let chars = TryEncodedBytes::new(input);
79		utf8_decode::TryDecoder::new(chars).all(|r| r.is_ok())
80	}
81
82	/// Length of the decoded string (character count).
83	///
84	/// Computed in linear time.
85	/// This is different from the byte length, which can be retrieved using
86	/// `value.as_bytes().len()`.
87	#[inline]
88	pub fn len(&self) -> usize {
89		self.chars().count()
90	}
91
92	/// Checks if the string is empty.
93	#[inline]
94	pub fn is_empty(&self) -> bool {
95		self.0.is_empty()
96	}
97
98	/// Returns the underlying percent-encoding bytes.
99	#[inline]
100	pub fn as_bytes(&self) -> &[u8] {
101		&self.0
102	}
103
104	/// Get the underlying percent-encoded string slice.
105	#[inline]
106	pub fn as_str(&self) -> &str {
107		unsafe {
108			// SAFETY: the data has be validated, and all percent-encoded
109			//         strings are valid UTF-8 strings.
110			core::str::from_utf8_unchecked(&self.0)
111		}
112	}
113
114	/// Iterate over the encoded characters of the string.
115	#[inline]
116	pub fn chars(&self) -> Chars<'_> {
117		Chars::new(self.bytes())
118	}
119
120	/// Iterate over the encoded bytes of the string.
121	#[inline]
122	pub fn bytes(&self) -> Bytes<'_> {
123		Bytes(self.0.iter())
124	}
125
126	/// Decoding.
127	///
128	/// Return the string with the percent-encoded characters decoded.
129	#[cfg(feature = "std")]
130	pub fn decode(&self) -> String {
131		let mut decoded = String::with_capacity(self.len());
132		for c in self.chars() {
133			decoded.push(c)
134		}
135
136		decoded
137	}
138}
139
140impl PartialEq for PctStr {
141	#[inline]
142	fn eq(&self, other: &PctStr) -> bool {
143		let mut a = self.chars();
144		let mut b = other.chars();
145
146		loop {
147			match (a.next(), b.next()) {
148				(Some(a), Some(b)) if a != b => return false,
149				(Some(_), None) => return false,
150				(None, Some(_)) => return false,
151				(None, None) => break,
152				_ => (),
153			}
154		}
155
156		true
157	}
158}
159
160impl Eq for PctStr {}
161
162impl PartialEq<str> for PctStr {
163	#[inline]
164	fn eq(&self, other: &str) -> bool {
165		let mut a = self.chars();
166		let mut b = other.chars();
167
168		loop {
169			match (a.next(), b.next()) {
170				(Some(a), Some(b)) if a != b => return false,
171				(Some(_), None) => return false,
172				(None, Some(_)) => return false,
173				(None, None) => break,
174				_ => (),
175			}
176		}
177
178		true
179	}
180}
181
182#[cfg(feature = "std")]
183impl PartialEq<PctString> for PctStr {
184	#[inline]
185	fn eq(&self, other: &PctString) -> bool {
186		let mut a = self.chars();
187		let mut b = other.chars();
188
189		loop {
190			match (a.next(), b.next()) {
191				(Some(a), Some(b)) if a != b => return false,
192				(Some(_), None) => return false,
193				(None, Some(_)) => return false,
194				(None, None) => break,
195				_ => (),
196			}
197		}
198
199		true
200	}
201}
202
203impl PartialOrd for PctStr {
204	fn partial_cmp(&self, other: &PctStr) -> Option<Ordering> {
205		Some(self.cmp(other))
206	}
207}
208
209impl Ord for PctStr {
210	fn cmp(&self, other: &PctStr) -> Ordering {
211		let mut self_chars = self.chars();
212		let mut other_chars = other.chars();
213
214		loop {
215			match (self_chars.next(), other_chars.next()) {
216				(None, None) => return Ordering::Equal,
217				(None, Some(_)) => return Ordering::Less,
218				(Some(_), None) => return Ordering::Greater,
219				(Some(a), Some(b)) => match a.cmp(&b) {
220					Ordering::Less => return Ordering::Less,
221					Ordering::Greater => return Ordering::Greater,
222					Ordering::Equal => (),
223				},
224			}
225		}
226	}
227}
228
229#[cfg(feature = "std")]
230impl PartialOrd<PctString> for PctStr {
231	fn partial_cmp(&self, other: &PctString) -> Option<Ordering> {
232		self.partial_cmp(other.as_pct_str())
233	}
234}
235
236impl Hash for PctStr {
237	#[inline]
238	fn hash<H: Hasher>(&self, hasher: &mut H) {
239		for c in self.chars() {
240			c.hash(hasher)
241		}
242	}
243}
244
245impl Display for PctStr {
246	fn fmt(&self, f: &mut Formatter) -> fmt::Result {
247		fmt::Display::fmt(self.as_str(), f)
248	}
249}
250
251impl Debug for PctStr {
252	fn fmt(&self, f: &mut Formatter) -> fmt::Result {
253		Debug::fmt(self.as_str(), f)
254	}
255}
256
257#[cfg(feature = "std")]
258impl ToOwned for PctStr {
259	type Owned = PctString;
260
261	fn to_owned(&self) -> Self::Owned {
262		unsafe { PctString::new_unchecked(self.0.to_owned()) }
263	}
264}
265
266impl Borrow<str> for PctStr {
267	fn borrow(&self) -> &str {
268		self.as_str()
269	}
270}
271
272impl AsRef<str> for PctStr {
273	fn as_ref(&self) -> &str {
274		self.as_str()
275	}
276}
277
278impl AsRef<[u8]> for PctStr {
279	fn as_ref(&self) -> &[u8] {
280		self.as_bytes()
281	}
282}
283
284/// Bytes iterator.
285///
286/// Iterates over the decoded bytes of a percent-encoded string.
287pub struct Bytes<'a>(core::slice::Iter<'a, u8>);
288
289impl<'a> Iterator for Bytes<'a> {
290	type Item = u8;
291
292	fn next(&mut self) -> Option<u8> {
293		if let Some(next) = self.0.next().copied() {
294			match next {
295				b'%' => {
296					let a = self.0.next().copied().unwrap();
297					let a = to_digit(a).unwrap();
298					let b = self.0.next().copied().unwrap();
299					let b = to_digit(b).unwrap();
300					let byte = a << 4 | b;
301					Some(byte)
302				}
303				_ => Some(next),
304			}
305		} else {
306			None
307		}
308	}
309}
310
311impl<'a> core::iter::FusedIterator for Bytes<'a> {}
312
313/// Characters iterator.
314///
315/// Iterates over the decoded characters of a percent-encoded string.
316pub struct Chars<'a> {
317	inner: utf8_decode::Decoder<Bytes<'a>>,
318}
319
320impl<'a> Chars<'a> {
321	fn new(bytes: Bytes<'a>) -> Self {
322		Self {
323			inner: utf8_decode::Decoder::new(bytes),
324		}
325	}
326}
327
328impl<'a> Iterator for Chars<'a> {
329	type Item = char;
330
331	fn next(&mut self) -> Option<char> {
332		// Safe as PctStr guarantees a valid byte sequence
333		self.inner.next().map(|x| x.unwrap())
334	}
335}
336
337impl<'a> core::iter::FusedIterator for Chars<'a> {}