1use std::borrow::Borrow;
52use std::hash;
53use std::{
54 cmp::{Ord, Ordering, PartialOrd},
55 fmt::Display,
56};
57use std::{convert::TryFrom, fmt, io, str::FromStr};
58
59#[derive(Debug, Clone, thiserror::Error)]
63#[error("invalid percent-encoded string")]
64pub struct InvalidPctString<T>(pub T);
65
66impl<T> InvalidPctString<T> {
67 pub fn map<U>(self, f: impl FnOnce(T) -> U) -> InvalidPctString<U> {
68 InvalidPctString(f(self.0))
69 }
70}
71
72impl<'a, T: ?Sized + ToOwned> InvalidPctString<&'a T> {
73 pub fn into_owned(self) -> InvalidPctString<T::Owned> {
74 self.map(T::to_owned)
75 }
76}
77
78#[inline(always)]
79fn to_digit(b: u8) -> Result<u8, ByteError> {
80 match b {
81 0x30..=0x39 => Ok(b - 0x30),
83 0x41..=0x46 => Ok(b - 0x37),
85 0x61..=0x66 => Ok(b - 0x57),
87 _ => Err(ByteError::InvalidByte(b)),
88 }
89}
90
91pub struct Bytes<'a>(std::slice::Iter<'a, u8>);
95
96#[derive(Debug, Clone)]
97enum ByteError {
98 InvalidByte(u8),
99 IncompleteEncoding,
100}
101
102impl From<ByteError> for io::Error {
103 fn from(e: ByteError) -> Self {
104 io::Error::new(io::ErrorKind::InvalidData, e.to_string())
105 }
106}
107
108impl Display for ByteError {
109 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110 match self {
111 ByteError::InvalidByte(b) => write!(f, "Invalid UTF-8 byte: {:#x}", b),
112 ByteError::IncompleteEncoding => f.write_str("Incomplete percent-encoding segment"),
113 }
114 }
115}
116
117impl std::error::Error for ByteError {}
118
119impl<'a> Iterator for Bytes<'a> {
120 type Item = u8;
121
122 fn next(&mut self) -> Option<u8> {
123 if let Some(next) = self.0.next().copied() {
124 match next {
125 b'%' => {
126 let a = self.0.next().copied().unwrap();
127 let a = to_digit(a).unwrap();
128 let b = self.0.next().copied().unwrap();
129 let b = to_digit(b).unwrap();
130 let byte = a << 4 | b;
131 Some(byte)
132 }
133 _ => Some(next),
134 }
135 } else {
136 None
137 }
138 }
139}
140
141impl<'a> std::iter::FusedIterator for Bytes<'a> {}
142
143struct UntrustedBytes<B>(B);
147
148impl<B> UntrustedBytes<B> {
149 fn new(bytes: B) -> Self {
150 Self(bytes)
151 }
152}
153
154impl<B: Iterator<Item = u8>> UntrustedBytes<B> {
155 fn try_next(&mut self, next: u8) -> io::Result<u8> {
156 match next {
157 b'%' => {
158 let a = self.0.next().ok_or(ByteError::IncompleteEncoding)?;
159 let a = to_digit(a)?;
160 let b = self.0.next().ok_or(ByteError::IncompleteEncoding)?;
161 let b = to_digit(b)?;
162 let byte = a << 4 | b;
163 Ok(byte)
164 }
165 _ => Ok(next),
166 }
167 }
168}
169
170impl<B: Iterator<Item = u8>> Iterator for UntrustedBytes<B> {
171 type Item = io::Result<u8>;
172
173 fn next(&mut self) -> Option<io::Result<u8>> {
174 self.0.next().map(|b| self.try_next(b))
175 }
176}
177
178impl<B: Iterator<Item = u8>> std::iter::FusedIterator for UntrustedBytes<B> {}
179
180pub struct Chars<'a> {
184 inner: utf8_decode::Decoder<Bytes<'a>>,
185}
186
187impl<'a> Chars<'a> {
188 fn new(bytes: Bytes<'a>) -> Self {
189 Self {
190 inner: utf8_decode::Decoder::new(bytes),
191 }
192 }
193}
194
195impl<'a> Iterator for Chars<'a> {
196 type Item = char;
197
198 fn next(&mut self) -> Option<char> {
199 self.inner.next().map(|x| x.unwrap())
201 }
202}
203
204impl<'a> std::iter::FusedIterator for Chars<'a> {}
205
206pub struct PctStr([u8]);
239
240impl PctStr {
241 pub fn new<S: AsRef<[u8]> + ?Sized>(input: &S) -> Result<&PctStr, InvalidPctString<&S>> {
246 let input_bytes = input.as_ref();
247 if Self::validate(input_bytes.iter().copied()) {
248 Ok(unsafe { Self::new_unchecked(input_bytes) })
249 } else {
250 Err(InvalidPctString(input))
251 }
252 }
253
254 pub unsafe fn new_unchecked<S: AsRef<[u8]> + ?Sized>(input: &S) -> &PctStr {
263 std::mem::transmute(input.as_ref())
264 }
265
266 pub fn validate(input: impl Iterator<Item = u8>) -> bool {
268 let chars = UntrustedBytes::new(input);
269 utf8_decode::UnsafeDecoder::new(chars).all(|r| r.is_ok())
270 }
271
272 #[inline]
278 pub fn len(&self) -> usize {
279 self.chars().count()
280 }
281
282 #[inline]
284 pub fn is_empty(&self) -> bool {
285 self.0.is_empty()
286 }
287
288 #[inline]
290 pub fn as_bytes(&self) -> &[u8] {
291 &self.0
292 }
293
294 #[inline]
296 pub fn as_str(&self) -> &str {
297 unsafe {
298 core::str::from_utf8_unchecked(&self.0)
301 }
302 }
303
304 #[inline]
306 pub fn chars(&self) -> Chars {
307 Chars::new(self.bytes())
308 }
309
310 #[inline]
312 pub fn bytes(&self) -> Bytes {
313 Bytes(self.0.iter())
314 }
315
316 pub fn decode(&self) -> String {
320 let mut decoded = String::with_capacity(self.len());
321 for c in self.chars() {
322 decoded.push(c)
323 }
324
325 decoded
326 }
327}
328
329impl PartialEq for PctStr {
330 #[inline]
331 fn eq(&self, other: &PctStr) -> bool {
332 let mut a = self.chars();
333 let mut b = other.chars();
334
335 loop {
336 match (a.next(), b.next()) {
337 (Some(a), Some(b)) if a != b => return false,
338 (Some(_), None) => return false,
339 (None, Some(_)) => return false,
340 (None, None) => break,
341 _ => (),
342 }
343 }
344
345 true
346 }
347}
348
349impl Eq for PctStr {}
350
351impl PartialEq<str> for PctStr {
352 #[inline]
353 fn eq(&self, other: &str) -> bool {
354 let mut a = self.chars();
355 let mut b = other.chars();
356
357 loop {
358 match (a.next(), b.next()) {
359 (Some(a), Some(b)) if a != b => return false,
360 (Some(_), None) => return false,
361 (None, Some(_)) => return false,
362 (None, None) => break,
363 _ => (),
364 }
365 }
366
367 true
368 }
369}
370
371impl PartialEq<PctString> for PctStr {
372 #[inline]
373 fn eq(&self, other: &PctString) -> bool {
374 let mut a = self.chars();
375 let mut b = other.chars();
376
377 loop {
378 match (a.next(), b.next()) {
379 (Some(a), Some(b)) if a != b => return false,
380 (Some(_), None) => return false,
381 (None, Some(_)) => return false,
382 (None, None) => break,
383 _ => (),
384 }
385 }
386
387 true
388 }
389}
390
391impl PartialOrd for PctStr {
392 fn partial_cmp(&self, other: &PctStr) -> Option<Ordering> {
393 Some(self.cmp(other))
394 }
395}
396
397impl Ord for PctStr {
398 fn cmp(&self, other: &PctStr) -> Ordering {
399 let mut self_chars = self.chars();
400 let mut other_chars = other.chars();
401
402 loop {
403 match (self_chars.next(), other_chars.next()) {
404 (None, None) => return Ordering::Equal,
405 (None, Some(_)) => return Ordering::Less,
406 (Some(_), None) => return Ordering::Greater,
407 (Some(a), Some(b)) => match a.cmp(&b) {
408 Ordering::Less => return Ordering::Less,
409 Ordering::Greater => return Ordering::Greater,
410 Ordering::Equal => (),
411 },
412 }
413 }
414 }
415}
416
417impl PartialOrd<PctString> for PctStr {
418 fn partial_cmp(&self, other: &PctString) -> Option<Ordering> {
419 self.partial_cmp(other.as_pct_str())
420 }
421}
422
423impl hash::Hash for PctStr {
424 #[inline]
425 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
426 for c in self.chars() {
427 c.hash(hasher)
428 }
429 }
430}
431
432impl fmt::Display for PctStr {
433 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
434 fmt::Display::fmt(self.as_str(), f)
435 }
436}
437
438impl fmt::Debug for PctStr {
439 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
440 fmt::Debug::fmt(self.as_str(), f)
441 }
442}
443
444impl ToOwned for PctStr {
445 type Owned = PctString;
446
447 fn to_owned(&self) -> Self::Owned {
448 unsafe { PctString::new_unchecked(self.0.to_owned()) }
449 }
450}
451
452impl Borrow<str> for PctStr {
453 fn borrow(&self) -> &str {
454 self.as_str()
455 }
456}
457
458impl AsRef<str> for PctStr {
459 fn as_ref(&self) -> &str {
460 self.as_str()
461 }
462}
463
464impl Borrow<[u8]> for PctStr {
465 fn borrow(&self) -> &[u8] {
466 self.as_bytes()
467 }
468}
469
470impl AsRef<[u8]> for PctStr {
471 fn as_ref(&self) -> &[u8] {
472 self.as_bytes()
473 }
474}
475
476pub trait Encoder {
510 fn encode(&self, c: char) -> bool;
514}
515
516impl<F: Fn(char) -> bool> Encoder for F {
517 fn encode(&self, c: char) -> bool {
518 self(c)
519 }
520}
521
522pub struct PctString(Vec<u8>);
528
529impl PctString {
530 pub fn new<B: Into<Vec<u8>>>(bytes: B) -> Result<Self, InvalidPctString<Vec<u8>>> {
535 let bytes = bytes.into();
536 if PctStr::validate(bytes.iter().copied()) {
537 Ok(Self(bytes))
538 } else {
539 Err(InvalidPctString(bytes))
540 }
541 }
542
543 pub fn from_string(string: String) -> Result<Self, InvalidPctString<String>> {
544 Self::new(string).map_err(|e| {
545 e.map(|bytes| unsafe {
546 String::from_utf8_unchecked(bytes)
548 })
549 })
550 }
551
552 pub unsafe fn new_unchecked<B: Into<Vec<u8>>>(bytes: B) -> Self {
558 Self(bytes.into())
559 }
560
561 pub fn encode<E: Encoder>(src: impl Iterator<Item = char>, encoder: E) -> PctString {
577 use std::fmt::Write;
578
579 let mut buf = String::with_capacity(4);
580 let mut encoded = String::new();
581 for c in src {
582 if encoder.encode(c) || c == '%' {
583 buf.clear();
584 buf.push(c);
585 for byte in buf.bytes() {
586 write!(encoded, "%{:02X}", byte).unwrap();
587 }
588 } else {
589 encoded.push(c);
590 }
591 }
592
593 PctString(encoded.into_bytes())
594 }
595
596 #[inline]
598 pub fn as_pct_str(&self) -> &PctStr {
599 unsafe {
600 PctStr::new_unchecked(&self.0)
602 }
603 }
604
605 #[inline]
607 pub fn into_string(self) -> String {
608 unsafe {
609 String::from_utf8_unchecked(self.0)
612 }
613 }
614
615 #[inline]
616 pub fn into_bytes(self) -> Vec<u8> {
617 self.0
618 }
619}
620
621impl std::ops::Deref for PctString {
622 type Target = PctStr;
623
624 #[inline]
625 fn deref(&self) -> &PctStr {
626 self.as_pct_str()
627 }
628}
629
630impl Borrow<PctStr> for PctString {
631 fn borrow(&self) -> &PctStr {
632 self.as_pct_str()
633 }
634}
635
636impl AsRef<PctStr> for PctString {
637 fn as_ref(&self) -> &PctStr {
638 self.as_pct_str()
639 }
640}
641
642impl Borrow<str> for PctString {
643 fn borrow(&self) -> &str {
644 self.as_str()
645 }
646}
647
648impl AsRef<str> for PctString {
649 fn as_ref(&self) -> &str {
650 self.as_str()
651 }
652}
653
654impl Borrow<[u8]> for PctString {
655 fn borrow(&self) -> &[u8] {
656 self.as_bytes()
657 }
658}
659
660impl AsRef<[u8]> for PctString {
661 fn as_ref(&self) -> &[u8] {
662 self.as_bytes()
663 }
664}
665
666impl PartialEq for PctString {
667 #[inline]
668 fn eq(&self, other: &PctString) -> bool {
669 let mut a = self.chars();
670 let mut b = other.chars();
671
672 loop {
673 match (a.next(), b.next()) {
674 (Some(a), Some(b)) if a != b => return false,
675 (Some(_), None) => return false,
676 (None, Some(_)) => return false,
677 (None, None) => break,
678 _ => (),
679 }
680 }
681
682 true
683 }
684}
685
686impl Eq for PctString {}
687
688impl PartialEq<PctStr> for PctString {
689 #[inline]
690 fn eq(&self, other: &PctStr) -> bool {
691 let mut a = self.chars();
692 let mut b = other.chars();
693
694 loop {
695 match (a.next(), b.next()) {
696 (Some(a), Some(b)) if a != b => return false,
697 (Some(_), None) => return false,
698 (None, Some(_)) => return false,
699 (None, None) => break,
700 _ => (),
701 }
702 }
703
704 true
705 }
706}
707
708impl PartialEq<&str> for PctString {
709 #[inline]
710 fn eq(&self, other: &&str) -> bool {
711 let mut a = self.chars();
712 let mut b = other.chars();
713
714 loop {
715 match (a.next(), b.next()) {
716 (Some(a), Some(b)) if a != b => return false,
717 (Some(_), None) => return false,
718 (None, Some(_)) => return false,
719 (None, None) => break,
720 _ => (),
721 }
722 }
723
724 true
725 }
726}
727
728impl PartialEq<str> for PctString {
729 #[inline]
730 fn eq(&self, other: &str) -> bool {
731 self.eq(&other)
732 }
733}
734
735impl PartialOrd for PctString {
736 fn partial_cmp(&self, other: &PctString) -> Option<Ordering> {
737 self.as_pct_str().partial_cmp(other.as_pct_str())
738 }
739}
740
741impl PartialOrd<PctStr> for PctString {
742 fn partial_cmp(&self, other: &PctStr) -> Option<Ordering> {
743 self.as_pct_str().partial_cmp(other)
744 }
745}
746
747impl hash::Hash for PctString {
748 #[inline]
749 fn hash<H: hash::Hasher>(&self, hasher: &mut H) {
750 for c in self.chars() {
751 c.hash(hasher)
752 }
753 }
754}
755
756impl fmt::Display for PctString {
757 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
758 fmt::Display::fmt(self.as_str(), f)
759 }
760}
761
762impl fmt::Debug for PctString {
763 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
764 fmt::Debug::fmt(self.as_str(), f)
765 }
766}
767
768impl FromStr for PctString {
769 type Err = InvalidPctString<String>;
770
771 fn from_str(s: &str) -> Result<Self, Self::Err> {
772 Self::from_string(s.to_string())
773 }
774}
775
776impl TryFrom<String> for PctString {
777 type Error = InvalidPctString<String>;
778
779 fn try_from(value: String) -> Result<Self, Self::Error> {
780 Self::from_string(value)
781 }
782}
783
784impl<'a> TryFrom<&'a str> for PctString {
785 type Error = InvalidPctString<String>;
786
787 fn try_from(value: &'a str) -> Result<Self, Self::Error> {
788 Self::from_string(value.to_owned())
789 }
790}
791
792impl<'a> TryFrom<&'a str> for &'a PctStr {
793 type Error = InvalidPctString<&'a str>;
794
795 fn try_from(value: &'a str) -> Result<Self, Self::Error> {
796 PctStr::new(value)
797 }
798}
799
800#[derive(Debug, Clone, Copy, PartialEq, Eq)]
805pub struct URIReserved;
806
807impl Encoder for URIReserved {
808 fn encode(&self, c: char) -> bool {
809 if !c.is_ascii_graphic() {
810 return true;
811 }
812
813 matches!(
814 c,
815 '!' | '#'
816 | '$' | '%' | '&'
817 | '\'' | '(' | ')'
818 | '*' | '+' | ','
819 | '/' | ':' | ';'
820 | '=' | '?' | '@'
821 | '[' | ']'
822 )
823 }
824}
825
826#[derive(Debug, Clone, Copy, PartialEq, Eq)]
831pub enum IriReserved {
832 Segment,
833 SegmentNoColons,
834 Fragment,
835 Query,
836}
837
838impl Encoder for IriReserved {
839 fn encode(&self, c: char) -> bool {
840 if c.is_ascii_alphanumeric() {
842 return false;
843 }
844
845 match c {
846 '@' => return false,
848 '-' | '.' | '_' | '~' => return false,
850 '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' => return false,
852 '/' | '?' => return *self != IriReserved::Query && *self != IriReserved::Fragment,
853 ':' => return *self == IriReserved::SegmentNoColons,
854 _ => { }
855 }
856
857 match c as u32 {
858 0xA0..=0xD7FF
860 | 0xF900..=0xFDCF
861 | 0xFDF0..=0xFFEF
862 | 0x10000..=0x1FFFD
863 | 0x20000..=0x2FFFD
864 | 0x30000..=0x3FFFD
865 | 0x40000..=0x4FFFD
866 | 0x50000..=0x5FFFD
867 | 0x60000..=0x6FFFD
868 | 0x70000..=0x7FFFD
869 | 0x80000..=0x8FFFD
870 | 0x90000..=0x9FFFD
871 | 0xA0000..=0xAFFFD
872 | 0xB0000..=0xBFFFD
873 | 0xC0000..=0xCFFFD
874 | 0xD0000..=0xDFFFD
875 | 0xE1000..=0xEFFFD => false,
876 0xE000..=0xF8FF | 0xF0000..=0xFFFFD | 0x100000..=0x10FFFD => {
878 *self != IriReserved::Query
879 }
880 _ => true,
881 }
882 }
883}
884
885#[cfg(test)]
886mod tests {
887 use std::convert::TryInto;
888
889 use super::*;
890
891 #[test]
892 fn iri_encode_cyrillic() {
893 let encoder = IriReserved::Segment;
894 let pct_string = PctString::encode("традиционное польское блюдо".chars(), encoder);
895 assert_eq!(&pct_string, &"традиционное польское блюдо");
896 assert_eq!(&pct_string.as_str(), &"традиционное%20польское%20блюдо");
897 }
898
899 #[test]
900 fn iri_encode_segment() {
901 let encoder = IriReserved::Segment;
902 let pct_string = PctString::encode(
903 "?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
904 encoder,
905 );
906
907 assert_eq!(
908 &pct_string,
909 &"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
910 );
911 assert_eq!(
912 &pct_string.as_str(),
913 &"%3Ftest=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
914 );
915 }
916
917 #[test]
918 fn iri_encode_segment_nocolon() {
919 let encoder = IriReserved::SegmentNoColons;
920 let pct_string = PctString::encode(
921 "?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
922 encoder,
923 );
924 assert_eq!(
925 &pct_string,
926 &"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
927 );
928 assert_eq!(
929 &pct_string.as_str(),
930 &"%3Ftest=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
931 );
932 }
933
934 #[test]
935 fn iri_encode_fragment() {
936 let encoder = IriReserved::Fragment;
937 let pct_string = PctString::encode(
938 "?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
939 encoder,
940 );
941 assert_eq!(
942 &pct_string,
943 &"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
944 );
945 assert_eq!(
946 &pct_string.as_str(),
947 &"?test=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
948 );
949 }
950
951 #[test]
952 fn iri_encode_query() {
953 let encoder = IriReserved::Query;
954 let pct_string = PctString::encode(
955 "?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
956 encoder,
957 );
958 assert_eq!(
959 &pct_string,
960 &"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
961 );
962 assert_eq!(
963 &pct_string.as_str(),
964 &"?test=традиционное%20польское%20блюдо&cjk=真正&private=\u{10FFFD}"
965 );
966 }
967
968 #[test]
969 fn uri_encode_cyrillic() {
970 let encoder = URIReserved;
971 let pct_string = PctString::encode("традиционное польское блюдо\0".chars(), encoder);
972 assert_eq!(&pct_string, &"традиционное польское блюдо\0");
973 assert_eq!(&pct_string.as_str(), &"%D1%82%D1%80%D0%B0%D0%B4%D0%B8%D1%86%D0%B8%D0%BE%D0%BD%D0%BD%D0%BE%D0%B5%20%D0%BF%D0%BE%D0%BB%D1%8C%D1%81%D0%BA%D0%BE%D0%B5%20%D0%B1%D0%BB%D1%8E%D0%B4%D0%BE%00");
974 }
975
976 #[test]
977 fn pct_encoding_invalid() {
978 let s = "%FF%FE%20%4F";
979 assert!(PctStr::new(s).is_err());
980 let s = "%36%A";
981 assert!(PctStr::new(s).is_err());
982 let s = "%%32";
983 assert!(PctStr::new(s).is_err());
984 let s = "%%32";
985 assert!(PctStr::new(s).is_err());
986 }
987
988 #[test]
989 fn pct_encoding_valid() {
990 let s = "%00%5C%F4%8F%BF%BD%69";
991 assert!(PctStr::new(s).is_ok());
992 let s = "No percent.";
993 assert!(PctStr::new(s).is_ok());
994 let s = "%e2%82%acwat";
995 assert!(PctStr::new(s).is_ok());
996 }
997
998 #[test]
999 fn try_from() {
1000 let s = "%00%5C%F4%8F%BF%BD%69";
1001 let _pcs = PctString::try_from(s).unwrap();
1002 let _pcs: &PctStr = s.try_into().unwrap();
1003 }
1004
1005 #[test]
1006 fn encode_percent_always() {
1007 struct NoopEncoder;
1008 impl Encoder for NoopEncoder {
1009 fn encode(&self, _: char) -> bool {
1010 false
1011 }
1012 }
1013 let s = "%";
1014 let c = PctString::encode(s.chars(), NoopEncoder);
1015 assert_eq!(c.as_str(), "%25");
1016 }
1017}