1use crate::{
4 component::Scheme,
5 imp::{HostMeta, Meta, RiMaybeRef, RmrRef},
6 parse,
7 pct_enc::{
8 self,
9 encoder::{Data, IData},
10 Decode, DecodedChunk, DecodedUtf8Chunk, Encode, EncodedChunk, Encoder, Table,
11 },
12 resolve,
13};
14use alloc::string::String;
15use borrow_or_share::Bos;
16use core::{
17 fmt::{self, Write},
18 num::NonZeroUsize,
19};
20
21#[derive(Clone, Copy, Debug, Eq, PartialEq)]
23pub enum NormalizeError {
24 PathUnderflow,
28}
29
30impl fmt::Display for NormalizeError {
31 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32 let msg = match self {
33 Self::PathUnderflow => "underflow occurred in path resolution",
34 };
35 f.write_str(msg)
36 }
37}
38
39#[cfg(feature = "impl-error")]
40impl crate::Error for NormalizeError {}
41
42#[derive(Clone, Copy)]
44#[allow(missing_debug_implementations)]
45#[must_use]
46pub struct Normalizer {
47 allow_path_underflow: bool,
48 default_port_f: fn(&Scheme) -> Option<u16>,
49}
50
51impl Normalizer {
52 pub fn new() -> Self {
54 Self {
55 allow_path_underflow: true,
56 default_port_f: Scheme::default_port,
57 }
58 }
59
60 pub fn allow_path_underflow(mut self, value: bool) -> Self {
78 self.allow_path_underflow = value;
79 self
80 }
81
82 pub fn default_port_with(mut self, f: fn(&Scheme) -> Option<u16>) -> Self {
106 self.default_port_f = f;
107 self
108 }
109
110 pub fn normalize<R: RiMaybeRef>(&self, r: &R) -> Result<R::WithVal<String>, NormalizeError>
121 where
122 R::Val: Bos<str>,
123 {
124 normalize(
125 r.make_ref(),
126 R::CONSTRAINTS.ascii_only,
127 self.allow_path_underflow,
128 self.default_port_f,
129 )
130 .map(RiMaybeRef::from_pair)
131 }
132}
133
134impl Default for Normalizer {
135 fn default() -> Self {
136 Self::new()
137 }
138}
139
140pub(crate) fn normalize(
141 r: RmrRef<'_, '_>,
142 ascii_only: bool,
143 allow_path_underflow: bool,
144 default_port_f: fn(&Scheme) -> Option<u16>,
145) -> Result<(String, Meta), NormalizeError> {
146 let mut buf = String::with_capacity(r.as_str().len());
149
150 let path = r.path().as_str();
151 let mut path_buf = String::with_capacity(path.len());
152
153 let data_table = if ascii_only {
154 Data::TABLE
155 } else {
156 IData::TABLE
157 };
158
159 if r.has_scheme() && path.starts_with('/') {
160 normalize_estr(&mut buf, path, false, data_table);
161
162 let underflow_occurred = resolve::remove_dot_segments(&mut path_buf, 0, &[&buf]);
163 if underflow_occurred && !allow_path_underflow {
164 return Err(NormalizeError::PathUnderflow);
165 }
166
167 buf.clear();
168 } else {
169 normalize_estr(&mut path_buf, path, false, data_table);
171 }
172
173 let mut meta = Meta::default();
174
175 if let Some(scheme) = r.scheme_opt() {
176 buf.push_str(scheme.as_str());
177 buf.make_ascii_lowercase();
178 meta.scheme_end = NonZeroUsize::new(buf.len());
179 buf.push(':');
180 }
181
182 if let Some(auth) = r.authority() {
183 buf.push_str("//");
184
185 if let Some(userinfo) = auth.userinfo() {
186 normalize_estr(&mut buf, userinfo.as_str(), false, data_table);
187 buf.push('@');
188 }
189
190 let mut auth_meta = auth.meta();
191 auth_meta.host_bounds.0 = buf.len();
192 match auth_meta.host_meta {
193 HostMeta::Ipv4(..) => buf.push_str(auth.host()),
195 #[cfg(feature = "net")]
196 HostMeta::Ipv6(addr) => write!(buf, "[{addr}]").unwrap(),
197 #[cfg(not(feature = "net"))]
198 HostMeta::Ipv6() => {
199 buf.push('[');
200 write_v6(&mut buf, parse::parse_v6(&auth.host().as_bytes()[1..]));
201 buf.push(']');
202 }
203 HostMeta::IpvFuture => {
204 let start = buf.len();
205 buf.push_str(auth.host());
206
207 buf[start..].make_ascii_lowercase();
208 }
209 HostMeta::RegName => {
210 let start = buf.len();
211 let host = auth.host();
212 normalize_estr(&mut buf, host, true, data_table);
213
214 if buf.len() < start + host.len() {
215 auth_meta.host_meta = parse::parse_v4_or_reg_name(&buf.as_bytes()[start..]);
217 }
218 }
219 }
220 auth_meta.host_bounds.1 = buf.len();
221 meta.auth_meta = Some(auth_meta);
222
223 if let Some(port) = auth.port() {
224 if !port.is_empty() {
225 let mut eq_default = false;
226 if let Some(scheme) = r.scheme_opt() {
227 if let Some(default) = default_port_f(scheme) {
228 eq_default = port.as_str().parse().ok() == Some(default);
229 }
230 }
231 if !eq_default {
232 buf.push(':');
233 buf.push_str(port.as_str());
234 }
235 }
236 }
237 }
238
239 meta.path_bounds.0 = buf.len();
240 if r.has_scheme() && !r.has_authority() && path_buf.starts_with("//") {
242 buf.push_str("/.");
243 }
244 buf.push_str(&path_buf);
245 meta.path_bounds.1 = buf.len();
246
247 if let Some(query) = r.query() {
248 buf.push('?');
249
250 const IQUERY_DATA: &Table = &IData::TABLE.or_iprivate();
251 let query_data_table = if ascii_only { Data::TABLE } else { IQUERY_DATA };
252
253 normalize_estr(&mut buf, query.as_str(), false, query_data_table);
254 meta.query_end = NonZeroUsize::new(buf.len());
255 }
256
257 if let Some(fragment) = r.fragment() {
258 buf.push('#');
259 normalize_estr(&mut buf, fragment.as_str(), false, data_table);
260 }
261
262 Ok((buf, meta))
263}
264
265fn normalize_estr(buf: &mut String, s: &str, to_ascii_lowercase: bool, table: &Table) {
266 if table.allows_non_ascii() {
267 Decode::new(s).decode_utf8(|chunk| match chunk {
268 DecodedUtf8Chunk::Unencoded(s) => {
269 let i = buf.len();
270 buf.push_str(s);
271 if to_ascii_lowercase {
272 buf[i..].make_ascii_lowercase();
273 }
274 }
275 DecodedUtf8Chunk::Decoded { valid, invalid } => {
276 for chunk in Encode::new(table, valid) {
277 match chunk {
278 EncodedChunk::Unencoded(s) => {
279 let i = buf.len();
280 buf.push_str(s);
281 if to_ascii_lowercase {
282 buf[i..].make_ascii_lowercase();
283 }
284 }
285 EncodedChunk::PctEncoded(s) => buf.push_str(s),
286 }
287 }
288 for &x in invalid {
289 buf.push_str(pct_enc::encode_byte(x));
290 }
291 }
292 });
293 } else {
294 for chunk in Decode::new(s) {
295 match chunk {
296 DecodedChunk::Unencoded(s) => {
297 let i = buf.len();
298 buf.push_str(s);
299 if to_ascii_lowercase {
300 buf[i..].make_ascii_lowercase();
301 }
302 }
303 DecodedChunk::PctDecoded(mut x) => {
304 if table.allows_ascii(x) {
305 if to_ascii_lowercase {
306 x.make_ascii_lowercase();
307 }
308 buf.push(x as char);
309 } else {
310 buf.push_str(pct_enc::encode_byte(x));
311 }
312 }
313 }
314 }
315 }
316}
317
318#[cfg(not(feature = "net"))]
320fn write_v6(buf: &mut String, segments: [u16; 8]) {
321 if let [0, 0, 0, 0, 0, 0xffff, ab, cd] = segments {
322 let [a, b] = ab.to_be_bytes();
323 let [c, d] = cd.to_be_bytes();
324 write!(buf, "::ffff:{a}.{b}.{c}.{d}").unwrap();
325 } else {
326 #[derive(Copy, Clone, Default)]
327 struct Span {
328 start: usize,
329 len: usize,
330 }
331
332 let zeroes = {
334 let mut longest = Span::default();
335 let mut current = Span::default();
336
337 for (i, &segment) in segments.iter().enumerate() {
338 if segment == 0 {
339 if current.len == 0 {
340 current.start = i;
341 }
342
343 current.len += 1;
344
345 if current.len > longest.len {
346 longest = current;
347 }
348 } else {
349 current = Span::default();
350 }
351 }
352
353 longest
354 };
355
356 #[inline]
358 fn write_subslice(buf: &mut String, chunk: &[u16]) {
359 if let Some((first, tail)) = chunk.split_first() {
360 write!(buf, "{first:x}").unwrap();
361 for segment in tail {
362 write!(buf, ":{segment:x}").unwrap();
363 }
364 }
365 }
366
367 if zeroes.len > 1 {
368 write_subslice(buf, &segments[..zeroes.start]);
369 buf.push_str("::");
370 write_subslice(buf, &segments[zeroes.start + zeroes.len..]);
371 } else {
372 write_subslice(buf, &segments);
373 }
374 }
375}