Skip to main content

pct_str/encoder/
iri.rs

1use super::Encoder;
2
3/// IRI-reserved characters encoder.
4///
5/// This [`Encoder`] encodes characters that are reserved in the syntax of IRI
6/// according to [RFC 3987](https://tools.ietf.org/html/rfc3987).
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum IriReserved {
9	/// Encode characters reserved in any IRI component.
10	Any,
11
12	/// Encode characters reserved in a path segment.
13	Path,
14
15	/// Encode characters reserved in a query.
16	Query,
17
18	/// Encode characters reserved in a fragment.
19	Fragment,
20}
21
22impl IriReserved {
23	fn is_reserved_but_safe(&self, c: char) -> bool {
24		match self {
25			Self::Any => false,
26			Self::Path => is_sub_delim(c) || c == '@',
27			Self::Query => is_sub_delim(c) || is_iprivate(c) || matches!(c, ':' | '@' | '/' | '?'),
28			Self::Fragment => is_sub_delim(c) || matches!(c, ':' | '@' | '/' | '?'),
29		}
30	}
31}
32
33impl Encoder for IriReserved {
34	fn encode(&self, c: char) -> bool {
35		!is_iunreserved(c) && !self.is_reserved_but_safe(c)
36	}
37}
38
39fn is_sub_delim(c: char) -> bool {
40	matches!(
41		c,
42		'!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '='
43	)
44}
45
46fn is_iprivate(c: char) -> bool {
47	matches!(c, '\u{E000}'..='\u{F8FF}' | '\u{F0000}'..='\u{FFFFD}' | '\u{100000}'..='\u{10FFFD}')
48}
49
50fn is_iunreserved(c: char) -> bool {
51	c.is_ascii_alphanumeric() || matches!(c, '-' | '.' | '_' | '~') || is_ucschar(c)
52}
53
54fn is_ucschar(c: char) -> bool {
55	matches!(c,
56		  '\u{00A0}'..='\u{0D7FF}'
57		| '\u{F900}'..='\u{FDCF}'
58		| '\u{FDF0}'..='\u{FFEF}'
59		| '\u{10000}'..='\u{1FFFD}'
60		| '\u{20000}'..='\u{2FFFD}'
61		| '\u{30000}'..='\u{3FFFD}'
62		| '\u{40000}'..='\u{4FFFD}'
63		| '\u{50000}'..='\u{5FFFD}'
64		| '\u{60000}'..='\u{6FFFD}'
65		| '\u{70000}'..='\u{7FFFD}'
66		| '\u{80000}'..='\u{8FFFD}'
67		| '\u{90000}'..='\u{9FFFD}'
68		| '\u{A0000}'..='\u{AFFFD}'
69		| '\u{B0000}'..='\u{BFFFD}'
70		| '\u{C0000}'..='\u{CFFFD}'
71		| '\u{D0000}'..='\u{DFFFD}'
72		| '\u{E1000}'..='\u{EFFFD}'
73	)
74}
75
76#[cfg(test)]
77mod tests {
78	use crate::PctString;
79
80	use super::IriReserved;
81
82	#[test]
83	fn iri_encode_cyrillic() {
84		let encoder = IriReserved::Path;
85		let pct_string = PctString::encode("традиционное польское блюдо".chars(), encoder);
86		assert_eq!(&pct_string, &"традиционное польское блюдо");
87		assert_eq!(&pct_string.as_str(), &"традиционное%20польское%20блюдо");
88	}
89
90	#[test]
91	fn iri_encode_segment() {
92		let encoder = IriReserved::Path;
93		let pct_string = PctString::encode(
94			"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
95			encoder,
96		);
97
98		assert_eq!(
99			&pct_string,
100			&"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
101		);
102		assert_eq!(
103			&pct_string.as_str(),
104			&"%3Ftest=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
105		);
106	}
107
108	#[test]
109	fn iri_encode_segment_nocolon() {
110		let encoder = IriReserved::Path;
111		let pct_string = PctString::encode(
112			"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
113			encoder,
114		);
115		assert_eq!(
116			&pct_string,
117			&"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
118		);
119		assert_eq!(
120			&pct_string.as_str(),
121			&"%3Ftest=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
122		);
123	}
124
125	#[test]
126	fn iri_encode_fragment() {
127		let encoder = IriReserved::Fragment;
128		let pct_string = PctString::encode(
129			"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
130			encoder,
131		);
132		assert_eq!(
133			&pct_string,
134			&"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
135		);
136		assert_eq!(
137			&pct_string.as_str(),
138			&"?test=традиционное%20польское%20блюдо&cjk=真正&private=%F4%8F%BF%BD"
139		);
140	}
141
142	#[test]
143	fn iri_encode_query() {
144		let encoder = IriReserved::Query;
145		let pct_string = PctString::encode(
146			"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}".chars(),
147			encoder,
148		);
149		assert_eq!(
150			&pct_string,
151			&"?test=традиционное польское блюдо&cjk=真正&private=\u{10FFFD}"
152		);
153		assert_eq!(
154			&pct_string.as_str(),
155			&"?test=традиционное%20польское%20блюдо&cjk=真正&private=\u{10FFFD}"
156		);
157	}
158}