1use core::fmt;
4use core::ops::Range;
5
6use crate::parser::str::{find_split_hole, rfind};
7use crate::spec::{Spec, UriSpec};
8
9use super::pct_case::PctCaseNormalized;
10use super::{Error, NormalizationMode, NormalizationOp};
11
12#[derive(Debug, Clone, Copy)]
14pub(crate) enum Path<'a> {
15 Done(&'a str),
17 NeedsProcessing(PathToNormalize<'a>),
19}
20
21#[derive(Debug, Clone, Copy)]
27pub(crate) struct PathToNormalize<'a>(Option<&'a str>, &'a str);
28
29impl<'a> PathToNormalize<'a> {
30 #[inline]
32 #[must_use]
33 pub(crate) fn from_single_path(path: &'a str) -> Self {
34 Self(None, path)
35 }
36
37 #[must_use]
39 pub(crate) fn from_paths_to_be_resolved(base: &'a str, reference: &'a str) -> Self {
40 if reference.starts_with('/') {
41 return Self(None, reference);
42 }
43
44 match rfind(base.as_bytes(), b'/') {
45 Some(last_slash_pos) => Self(Some(&base[..=last_slash_pos]), reference),
46 None => Self(None, reference),
47 }
48 }
49
50 #[inline]
52 #[must_use]
53 fn is_empty(&self) -> bool {
54 self.0.is_none() && self.1.is_empty()
56 }
57
58 #[inline]
60 #[must_use]
61 pub(super) fn len(&self) -> usize {
62 self.len_prefix() + self.1.len()
63 }
64
65 #[inline]
69 #[must_use]
70 fn len_prefix(&self) -> usize {
71 self.0.map_or(0, |s| s.len())
72 }
73
74 #[must_use]
76 fn byte_at(&self, mut i: usize) -> Option<u8> {
77 if let Some(prefix) = self.0 {
78 if i < prefix.len() {
79 return Some(prefix.as_bytes()[i]);
80 }
81 i -= prefix.len();
82 }
83 self.1.as_bytes().get(i).copied()
84 }
85
86 #[must_use]
88 fn find_next_slash(&self, scan_start: usize) -> Option<usize> {
89 if let Some(prefix) = self.0 {
90 let prefix_len = prefix.len();
91 if scan_start < prefix_len {
92 prefix[scan_start..].find('/').map(|rel| rel + scan_start)
93 } else {
94 let local_i = scan_start - prefix_len;
95 self.1[local_i..].find('/').map(|rel| rel + scan_start)
96 }
97 } else {
98 self.1[scan_start..].find('/').map(|rel| rel + scan_start)
99 }
100 }
101
102 fn remove_start(&mut self, len: usize) {
104 if let Some(prefix) = self.0 {
105 if let Some(suffix_trim_len) = len.checked_sub(prefix.len()) {
106 self.0 = None;
107 self.1 = &self.1[suffix_trim_len..];
108 } else {
109 self.0 = Some(&prefix[len..]);
110 }
111 } else {
112 self.1 = &self.1[len..];
113 }
114 }
115
116 fn remove_ignorable_prefix(&mut self) {
122 while let Some(seg) = PathSegmentsIter::new(self).next() {
123 if seg.has_leading_slash {
124 break;
126 }
127 match seg.kind(self) {
128 SegmentKind::Dot | SegmentKind::DotDot => {
129 let skip = self.len().min(seg.range.end + 1);
131 self.remove_start(skip);
132 }
133 SegmentKind::Normal => break,
134 }
135 }
136 }
137}
138
139impl PathToNormalize<'_> {
140 pub(crate) fn fmt_write_normalize<S: Spec, W: fmt::Write>(
142 &self,
143 f: &mut W,
144 op: NormalizationOp,
145 authority_is_present: bool,
146 ) -> fmt::Result {
147 debug_assert!(
148 self.0.map_or(true, |s| s.ends_with('/')),
149 "[validity] the prefix field of `PathToNormalize` should end with a slash"
150 );
151
152 if self.is_empty() {
153 return Ok(());
154 }
155
156 if (op.mode == NormalizationMode::PreserveAuthoritylessRelativePath)
157 && !authority_is_present
158 && self.byte_at(0) != Some(b'/')
159 {
160 debug_assert!(
163 op.mode.case_pct_normalization(),
164 "[consistency] case/pct normalization should still be applied"
165 );
166 if let Some(prefix) = self.0 {
167 write!(f, "{}", PctCaseNormalized::<S>::new(prefix))?;
168 }
169 write!(f, "{}", PctCaseNormalized::<S>::new(self.1))?;
170 return Ok(());
171 }
172
173 let mut rest = *self;
174
175 rest.remove_ignorable_prefix();
180 if rest.is_empty() {
181 if authority_is_present {
185 f.write_char('/')?;
186 }
187 return Ok(());
188 }
189
190 let mut only_a_slash_is_written = None;
194 let mut too_deep_area_may_have_dot_segments = true;
195 while !rest.is_empty() && too_deep_area_may_have_dot_segments {
196 const QUEUE_SIZE: usize = 8;
200
201 {
202 let mut skipped_len = 0;
204 for seg in PathSegmentsIter::new(&rest) {
205 match seg.kind(&rest) {
206 SegmentKind::Dot | SegmentKind::DotDot => {
207 debug_assert!(
208 seg.has_leading_slash,
209 "[consistency] `.` or `..` segments without a
210 leading slash have already been skipped"
211 );
212 skipped_len = seg.range.end;
213 }
214 _ => break,
215 }
216 }
217 rest.remove_start(skipped_len);
218 if rest.is_empty() {
219 if !authority_is_present && (only_a_slash_is_written == Some(true)) {
222 f.write_str(".//")?;
226 } else {
227 f.write_char('/')?;
228 }
229 break;
230 }
231 }
232
233 let mut queue: [Option<&'_ str>; QUEUE_SIZE] = Default::default();
234 let mut level: usize = 0;
235 let mut first_segment_has_leading_slash = false;
236
237 let mut end = 0;
239 for seg in PathSegmentsIter::new(&rest) {
240 let kind = seg.kind(&rest);
241 match kind {
242 SegmentKind::Dot => {
243 too_deep_area_may_have_dot_segments = true;
244 }
245 SegmentKind::DotDot => {
246 level = level.saturating_sub(1);
247 too_deep_area_may_have_dot_segments = true;
248 if level < queue.len() {
249 queue[level] = None;
250 }
251 }
252 SegmentKind::Normal => {
253 if level < queue.len() {
254 queue[level] = Some(seg.segment(&rest));
255 too_deep_area_may_have_dot_segments = false;
256 end = seg.range.end;
257 if level == 0 {
258 first_segment_has_leading_slash = seg.has_leading_slash;
259 }
260 }
261 level += 1;
262 }
263 }
264 }
265
266 for segname in queue.iter().flatten() {
268 Self::emit_segment::<S, _>(
269 f,
270 &mut only_a_slash_is_written,
271 first_segment_has_leading_slash,
272 segname,
273 authority_is_present,
274 op,
275 )?;
276 }
277
278 rest.remove_start(end);
279 }
280
281 if !rest.is_empty() {
282 assert!(
284 !too_deep_area_may_have_dot_segments,
285 "[consistency] loop condition of the previous loop"
286 );
287 for seg in PathSegmentsIter::new(&rest) {
289 assert_eq!(
290 seg.kind(&rest),
291 SegmentKind::Normal,
292 "[consistency] already confirmed that there are no more dot segments"
293 );
294 let segname = seg.segment(&rest);
295 Self::emit_segment::<S, _>(
296 f,
297 &mut only_a_slash_is_written,
298 seg.has_leading_slash,
299 segname,
300 authority_is_present,
301 op,
302 )?;
303 }
304 }
305
306 Ok(())
307 }
308
309 fn emit_segment<S: Spec, W: fmt::Write>(
313 f: &mut W,
314 only_a_slash_is_written: &mut Option<bool>,
315 first_segment_has_leading_slash: bool,
316 segname: &str,
317 authority_is_present: bool,
318 op: NormalizationOp,
319 ) -> fmt::Result {
320 match *only_a_slash_is_written {
323 None => {
324 if first_segment_has_leading_slash {
328 f.write_char('/')?;
329 }
330 *only_a_slash_is_written =
331 Some(first_segment_has_leading_slash && segname.is_empty());
332 }
333 Some(only_a_slash) => {
334 if only_a_slash && !authority_is_present {
335 f.write_str("./")?;
341 *only_a_slash_is_written = Some(false);
342 }
343 f.write_char('/')?;
344 }
345 }
346
347 if op.mode.case_pct_normalization() {
349 write!(f, "{}", PctCaseNormalized::<S>::new(segname))
350 } else {
351 f.write_str(segname)
352 }
353 }
354
355 pub(crate) fn ensure_rfc3986_normalizable_with_authority_absent(&self) -> Result<(), Error> {
359 #[derive(Default)]
361 struct PrefixRetriever {
362 buf: [u8; 3],
364 cursor: usize,
366 }
367 impl PrefixRetriever {
368 #[inline]
370 #[must_use]
371 fn as_bytes(&self) -> &[u8] {
372 &self.buf[..self.cursor]
373 }
374 }
375 impl fmt::Write for PrefixRetriever {
376 fn write_str(&mut self, s: &str) -> fmt::Result {
377 if !s.is_empty() && (self.cursor >= self.buf.len()) {
378 return Err(fmt::Error);
380 }
381 self.buf[self.cursor..]
382 .iter_mut()
383 .zip(s.bytes())
384 .for_each(|(dest, src)| *dest = src);
385 self.cursor = self.cursor.saturating_add(s.len()).min(self.buf.len());
386 Ok(())
387 }
388 }
389
390 let mut prefix = PrefixRetriever::default();
391 let _ = self.fmt_write_normalize::<UriSpec, _>(
394 &mut prefix,
395 NormalizationOp {
396 mode: NormalizationMode::None,
397 },
398 false,
400 );
401
402 if prefix.as_bytes() == b"/./" {
403 Err(Error::new())
404 } else {
405 Ok(())
406 }
407 }
408}
409
410#[derive(Debug, Clone, Copy)]
412pub(crate) enum PathCharacteristic {
413 CommonAbsolute,
415 CommonRelative,
417 RelativeFirstSegmentHasColon,
419 StartsWithDoubleSlash,
421}
422
423impl PathCharacteristic {
424 #[inline]
426 #[must_use]
427 pub(crate) fn is_absolute(self) -> bool {
428 matches!(self, Self::CommonAbsolute | Self::StartsWithDoubleSlash)
429 }
430
431 pub(crate) fn from_path_to_display<S: Spec>(
433 path: &PathToNormalize<'_>,
434 op: NormalizationOp,
435 authority_is_present: bool,
436 ) -> Self {
437 #[derive(Default, Clone, Copy)]
439 struct Writer {
440 result: Option<PathCharacteristic>,
442 is_absolute: Option<bool>,
444 }
445 impl fmt::Write for Writer {
446 fn write_str(&mut self, mut s: &str) -> fmt::Result {
447 if self.result.is_some() {
448 return Err(fmt::Error);
450 }
451 while !s.is_empty() {
452 if self.is_absolute.is_none() {
453 match s.strip_prefix('/') {
455 Some(rest) => {
456 self.is_absolute = Some(true);
457 s = rest;
458 }
459 None => {
460 self.is_absolute = Some(false);
461 }
462 }
463 continue;
464 }
465 if self.is_absolute == Some(true) {
466 let result = if s.starts_with('/') {
467 PathCharacteristic::StartsWithDoubleSlash
468 } else {
469 PathCharacteristic::CommonAbsolute
470 };
471 self.result = Some(result);
472 return Err(fmt::Error);
473 }
474 match find_split_hole(s, b'/') {
476 Some((first_seg, _rest)) => {
477 let result = if first_seg.contains(':') {
478 PathCharacteristic::RelativeFirstSegmentHasColon
479 } else {
480 PathCharacteristic::CommonRelative
481 };
482 self.result = Some(result);
483 return Err(fmt::Error);
484 }
485 None => {
486 if s.contains(':') {
488 self.result =
489 Some(PathCharacteristic::RelativeFirstSegmentHasColon);
490 return Err(fmt::Error);
491 }
492 break;
493 }
494 }
495 }
496 Ok(())
497 }
498 }
499
500 let mut writer = Writer::default();
501 match path.fmt_write_normalize::<S, _>(&mut writer, op, authority_is_present) {
502 Ok(_) => PathCharacteristic::CommonRelative,
504 Err(_) => writer
505 .result
506 .expect("[consistency] the formatting quits early by `Err` when the check is done"),
507 }
508 }
509}
510
511#[derive(Debug, Clone, Copy, PartialEq, Eq)]
513enum SegmentKind {
514 Dot,
516 DotDot,
518 Normal,
520}
521
522impl SegmentKind {
523 #[must_use]
525 fn from_segment(s: &str) -> Self {
526 match s {
527 "." | "%2E" | "%2e" => SegmentKind::Dot,
528 ".." | ".%2E" | ".%2e" | "%2E." | "%2E%2E" | "%2E%2e" | "%2e." | "%2e%2E"
529 | "%2e%2e" => SegmentKind::DotDot,
530 _ => SegmentKind::Normal,
531 }
532 }
533}
534
535#[derive(Debug, Clone)]
537struct PathSegment {
538 has_leading_slash: bool,
540 range: Range<usize>,
542}
543
544impl PathSegment {
545 #[inline]
547 #[must_use]
548 fn segment<'a>(&self, path: &PathToNormalize<'a>) -> &'a str {
549 if let Some(prefix) = path.0 {
550 let prefix_len = prefix.len();
551 if self.range.end <= prefix_len {
552 &prefix[self.range.clone()]
553 } else {
554 let range = (self.range.start - prefix_len)..(self.range.end - prefix_len);
555 &path.1[range]
556 }
557 } else {
558 &path.1[self.range.clone()]
559 }
560 }
561
562 #[inline]
564 #[must_use]
565 fn kind(&self, path: &PathToNormalize<'_>) -> SegmentKind {
566 SegmentKind::from_segment(self.segment(path))
567 }
568}
569
570struct PathSegmentsIter<'a> {
572 path: &'a PathToNormalize<'a>,
574 cursor: usize,
576}
577
578impl<'a> PathSegmentsIter<'a> {
579 #[inline]
581 #[must_use]
582 fn new(path: &'a PathToNormalize<'a>) -> Self {
583 Self { path, cursor: 0 }
584 }
585}
586
587impl Iterator for PathSegmentsIter<'_> {
588 type Item = PathSegment;
589
590 fn next(&mut self) -> Option<Self::Item> {
591 let path_len = self.path.len();
592 if self.cursor >= path_len {
593 return None;
594 }
595 let has_leading_slash = self.path.byte_at(self.cursor) == Some(b'/');
596
597 let prefix_len = self.path.len_prefix();
598 if (prefix_len != 0) && (self.cursor == prefix_len - 1) {
599 debug_assert!(has_leading_slash);
600 let end = self.path.1.find('/').unwrap_or(self.path.1.len()) + prefix_len;
601 self.cursor = end;
602 return Some(PathSegment {
603 has_leading_slash,
604 range: prefix_len..end,
605 });
606 }
607
608 if has_leading_slash {
609 self.cursor += 1;
611 };
612 let start = self.cursor;
613 self.cursor = self.path.find_next_slash(self.cursor).unwrap_or(path_len);
614
615 Some(PathSegment {
616 has_leading_slash,
617 range: start..self.cursor,
618 })
619 }
620}