salvo_core/routing/filters/
path.rs

1//! Path filter implementation.
2
3use std::collections::HashMap;
4use std::fmt::{self, Debug, Formatter};
5use std::sync::{Arc, LazyLock};
6
7use indexmap::IndexSet;
8use parking_lot::RwLock;
9use regex::Regex;
10
11use crate::async_trait;
12use crate::http::Request;
13use crate::routing::{Filter, PathState};
14
15/// PathWisp
16pub trait PathWisp: Send + Sync + fmt::Debug + 'static {
17    #[doc(hidden)]
18    fn type_id(&self) -> std::any::TypeId {
19        std::any::TypeId::of::<Self>()
20    }
21    #[doc(hidden)]
22    fn type_name(&self) -> &'static str {
23        std::any::type_name::<Self>()
24    }
25    /// Validate the wisp. Panic if invalid.
26    fn validate(&self) -> Result<(), String> {
27        Ok(())
28    }
29    /// Detect is that path matched.
30    fn detect(&self, state: &mut PathState) -> bool;
31}
32/// WispBuilder
33pub trait WispBuilder: Send + Sync {
34    /// Build `PathWisp`.
35    fn build(&self, name: String, sign: String, args: Vec<String>) -> Result<WispKind, String>;
36}
37
38type WispBuilderMap = RwLock<HashMap<String, Arc<Box<dyn WispBuilder>>>>;
39static WISP_BUILDERS: LazyLock<WispBuilderMap> = LazyLock::new(|| {
40    let mut map: HashMap<String, Arc<Box<dyn WispBuilder>>> = HashMap::with_capacity(8);
41    map.insert(
42        "num".into(),
43        Arc::new(Box::new(CharsWispBuilder::new(is_num))),
44    );
45    map.insert(
46        "hex".into(),
47        Arc::new(Box::new(CharsWispBuilder::new(is_hex))),
48    );
49    RwLock::new(map)
50});
51
52#[inline]
53fn is_num(ch: char) -> bool {
54    ch.is_ascii_digit()
55}
56#[inline]
57fn is_hex(ch: char) -> bool {
58    ch.is_ascii_hexdigit()
59}
60
61/// Enum of all wisp kinds.
62pub enum WispKind {
63    /// ConstWisp.
64    Const(ConstWisp),
65    /// NamedWisp.
66    Named(NamedWisp),
67    /// CharsWisp.
68    Chars(CharsWisp),
69    /// RegexWisp.
70    Regex(RegexWisp),
71    /// CombWisp.
72    Comb(CombWisp),
73}
74impl PathWisp for WispKind {
75    #[inline]
76    fn validate(&self) -> Result<(), String> {
77        match self {
78            Self::Const(wisp) => wisp.validate(),
79            Self::Named(wisp) => wisp.validate(),
80            Self::Chars(wisp) => wisp.validate(),
81            Self::Regex(wisp) => wisp.validate(),
82            Self::Comb(wisp) => wisp.validate(),
83        }
84    }
85    #[inline]
86    fn detect(&self, state: &mut PathState) -> bool {
87        match self {
88            Self::Const(wisp) => wisp.detect(state),
89            Self::Named(wisp) => wisp.detect(state),
90            Self::Chars(wisp) => wisp.detect(state),
91            Self::Regex(wisp) => wisp.detect(state),
92            Self::Comb(wisp) => wisp.detect(state),
93        }
94    }
95}
96impl Debug for WispKind {
97    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
98        match self {
99            Self::Const(wisp) => wisp.fmt(f),
100            Self::Named(wisp) => wisp.fmt(f),
101            Self::Chars(wisp) => wisp.fmt(f),
102            Self::Regex(wisp) => wisp.fmt(f),
103            Self::Comb(wisp) => wisp.fmt(f),
104        }
105    }
106}
107impl From<ConstWisp> for WispKind {
108    #[inline]
109    fn from(wisp: ConstWisp) -> Self {
110        Self::Const(wisp)
111    }
112}
113impl From<NamedWisp> for WispKind {
114    #[inline]
115    fn from(wisp: NamedWisp) -> Self {
116        Self::Named(wisp)
117    }
118}
119impl From<CharsWisp> for WispKind {
120    #[inline]
121    fn from(wisp: CharsWisp) -> Self {
122        Self::Chars(wisp)
123    }
124}
125impl From<RegexWisp> for WispKind {
126    #[inline]
127    fn from(wisp: RegexWisp) -> Self {
128        Self::Regex(wisp)
129    }
130}
131impl From<CombWisp> for WispKind {
132    #[inline]
133    fn from(wisp: CombWisp) -> Self {
134        Self::Comb(wisp)
135    }
136}
137
138/// RegexWispBuilder
139#[derive(Debug)]
140pub struct RegexWispBuilder(Regex);
141impl RegexWispBuilder {
142    /// Create new `RegexWispBuilder`.
143    #[inline]
144    #[must_use]
145    pub fn new(checker: Regex) -> Self {
146        Self(checker)
147    }
148}
149impl WispBuilder for RegexWispBuilder {
150    fn build(&self, name: String, _sign: String, _args: Vec<String>) -> Result<WispKind, String> {
151        Ok(RegexWisp {
152            name,
153            regex: self.0.clone(),
154        }
155        .into())
156    }
157}
158
159/// CharsWispBuilder
160pub struct CharsWispBuilder(Arc<dyn Fn(char) -> bool + Send + Sync + 'static>);
161impl CharsWispBuilder {
162    /// Create new `CharsWispBuilder`.
163    #[inline]
164    pub fn new<C>(checker: C) -> Self
165    where
166        C: Fn(char) -> bool + Send + Sync + 'static,
167    {
168        Self(Arc::new(checker))
169    }
170}
171impl Debug for CharsWispBuilder {
172    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
173        f.debug_struct("CharsWispBuilder").finish()
174    }
175}
176impl WispBuilder for CharsWispBuilder {
177    fn build(&self, name: String, _sign: String, args: Vec<String>) -> Result<WispKind, String> {
178        if args.is_empty() {
179            return Ok(CharsWisp {
180                name,
181                checker: self.0.clone(),
182                min_width: 1,
183                max_width: None,
184            }
185            .into());
186        }
187        let ps = args[0]
188            .splitn(2, "..")
189            .map(|s| s.trim())
190            .collect::<Vec<_>>();
191        let (min_width, max_width) = if ps.is_empty() {
192            (1, None)
193        } else {
194            let min = if ps[0].is_empty() {
195                1
196            } else {
197                let min = ps[0]
198                    .parse::<usize>()
199                    .map_err(|_| format!("parse range for {name} failed"))?;
200                if min < 1 {
201                    return Err("min_width must greater or equal to 1".to_owned());
202                }
203                min
204            };
205            if ps.len() == 1 {
206                (min, None)
207            } else {
208                let max = ps[1];
209                if max.is_empty() {
210                    (min, None)
211                } else {
212                    let trimmed_max = max.trim_start_matches('=');
213                    let max = if trimmed_max == max {
214                        let max = trimmed_max
215                            .parse::<usize>()
216                            .map_err(|_| format!("parse range for {name} failed"))?;
217                        if max <= 1 {
218                            return Err("min_width must greater than 1".to_owned());
219                        }
220                        max - 1
221                    } else {
222                        let max = trimmed_max
223                            .parse::<usize>()
224                            .map_err(|_| format!("parse range for {name} failed"))?;
225                        if max < 1 {
226                            return Err("min_width must greater or equal to 1".to_owned());
227                        }
228                        max
229                    };
230                    (min, Some(max))
231                }
232            }
233        };
234        Ok(CharsWisp {
235            name,
236            checker: self.0.clone(),
237            min_width,
238            max_width,
239        }
240        .into())
241    }
242}
243
244/// Chars wisp matches characters in URL segment.
245pub struct CharsWisp {
246    name: String,
247    checker: Arc<dyn Fn(char) -> bool + Send + Sync + 'static>,
248    min_width: usize,
249    max_width: Option<usize>,
250}
251impl Debug for CharsWisp {
252    #[inline]
253    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
254        write!(
255            f,
256            "CharsWisp {{ name: {:?}, min_width: {:?}, max_width: {:?} }}",
257            self.name, self.min_width, self.max_width
258        )
259    }
260}
261impl PathWisp for CharsWisp {
262    fn detect(&self, state: &mut PathState) -> bool {
263        let Some(picked) = state.pick() else {
264            return false;
265        };
266        if let Some(max_width) = self.max_width {
267            let mut chars = Vec::with_capacity(max_width);
268            for ch in picked.chars() {
269                if (self.checker)(ch) {
270                    chars.push(ch);
271                }
272                if chars.len() == max_width {
273                    state.forward(max_width);
274                    state.params.insert(&self.name, chars.into_iter().collect());
275                    #[cfg(feature = "matched-path")]
276                    state.matched_parts.push(format!("{{{}}}", self.name));
277                    return true;
278                }
279            }
280            if chars.len() >= self.min_width {
281                state.forward(chars.len());
282                state.params.insert(&self.name, chars.into_iter().collect());
283                #[cfg(feature = "matched-path")]
284                state.matched_parts.push(format!("{{{}}}", self.name));
285                true
286            } else {
287                false
288            }
289        } else {
290            let mut chars = Vec::with_capacity(16);
291            for ch in picked.chars() {
292                if (self.checker)(ch) {
293                    chars.push(ch);
294                }
295            }
296            if chars.len() >= self.min_width {
297                state.forward(chars.len());
298                state.params.insert(&self.name, chars.into_iter().collect());
299                #[cfg(feature = "matched-path")]
300                state.matched_parts.push(format!("{{{}}}", self.name));
301                true
302            } else {
303                false
304            }
305        }
306    }
307}
308
309/// Comb wisp is a group of other kind of wisps in the same url segment.
310#[derive(Debug)]
311pub struct CombWisp {
312    names: Vec<String>,
313    comb_regex: Regex,
314    wild_regex: Option<Regex>,
315    wild_start: Option<String>,
316}
317impl CombWisp {
318    /// Create new `CombWisp`.
319    ///
320    /// # Panics
321    /// If contains unsupported `WispKind``.
322    pub fn new(wisps: Vec<WispKind>) -> Result<Self, String> {
323        let mut comb_regex = "^".to_owned();
324        let mut names = Vec::with_capacity(wisps.len());
325        let mut is_prev_named = false;
326        let mut is_greedy = false;
327        let mut wild_start = None;
328        let mut wild_regex = None;
329        let any_chars_regex = Regex::new(".*").expect("regex should worked");
330        for wisp in wisps {
331            match wisp {
332                WispKind::Const(wisp) => {
333                    if is_greedy {
334                        return Err(format!(
335                            "ConstWisp `{}` follows a greedy wisp in CombWisp",
336                            wisp.0
337                        ));
338                    }
339                    is_prev_named = false;
340                    comb_regex.push_str(&regex::escape(&wisp.0))
341                }
342                WispKind::Named(wisp) => {
343                    if is_greedy {
344                        return Err(format!(
345                            "NamedWisp `{}` follows a greedy wisp in CombWisp",
346                            wisp.0
347                        ));
348                    }
349                    if is_prev_named {
350                        return Err(format!(
351                            "NamedWisp `{}` should not be added after another NamedWisp when it is CombWisp's children",
352                            wisp.0
353                        ));
354                    }
355                    is_prev_named = true;
356                    if wisp.0.starts_with('*') {
357                        is_greedy = true;
358                        let (star_mark, name) = crate::routing::split_wild_name(&wisp.0);
359                        wild_regex = Some(any_chars_regex.clone());
360                        wild_start = Some(star_mark.to_owned());
361                        names.push(name.to_owned());
362                    } else {
363                        comb_regex.push_str(&format!("(?<{}>.*)", &regex::escape(&wisp.0)));
364                        names.push(wisp.0);
365                    }
366                }
367                WispKind::Regex(wisp) => {
368                    if is_greedy {
369                        return Err(format!(
370                            "RegexWisp `{}` follows a greedy wisp in CombWisp",
371                            wisp.name
372                        ));
373                    }
374                    is_prev_named = false;
375                    if wisp.name.starts_with('*') {
376                        is_greedy = true;
377                        let (star_mark, name) = crate::routing::split_wild_name(&wisp.name);
378                        wild_regex = Some(wisp.regex);
379                        wild_start = Some(star_mark.to_owned());
380                        names.push(name.to_owned());
381                    } else {
382                        let regex = wisp
383                            .regex
384                            .as_str()
385                            .trim_start_matches('^')
386                            .trim_end_matches('$');
387                        comb_regex.push_str(&format!("(?<{}>{})", wisp.name, regex));
388                        names.push(wisp.name);
389                    }
390                }
391                WispKind::Chars(wisp) => {
392                    return Err(format!(
393                        "unsupported CharsWisp `{}` add to CombWisp",
394                        wisp.name
395                    ));
396                }
397                WispKind::Comb(_) => {
398                    return Err(format!("unsupported wisp: {wisp:?} add to CombWisp"));
399                }
400            }
401        }
402        if wild_regex.is_none() {
403            comb_regex.push('$');
404        }
405        Regex::new(&comb_regex)
406            .map(|comb_regex| Self {
407                names,
408                comb_regex,
409                wild_regex,
410                wild_start,
411            })
412            .map_err(|e| format!("Regex error: {e}"))
413    }
414}
415impl PathWisp for CombWisp {
416    #[inline]
417    fn detect(&self, state: &mut PathState) -> bool {
418        let Some(picked) = state.pick().map(|s| s.to_owned()) else {
419            return false;
420        };
421        let mut wild_path = if self.wild_regex.is_some() {
422            state.all_rest().unwrap_or_default().to_string()
423        } else {
424            "".to_owned()
425        };
426        let caps = self.comb_regex.captures(&picked);
427        if let Some(caps) = caps {
428            let take_count = if self.wild_regex.is_some() {
429                self.names.len() - 1
430            } else {
431                self.names.len()
432            };
433            #[cfg(feature = "matched-path")]
434            let mut start = 0;
435            #[cfg(feature = "matched-path")]
436            let mut matched_part = "".to_owned();
437            for name in self.names.iter().take(take_count) {
438                if let Some(value) = caps.name(name) {
439                    state.params.insert(name, value.as_str().to_owned());
440                    if self.wild_regex.is_some() {
441                        wild_path = wild_path.trim_start_matches(value.as_str()).to_owned();
442                    }
443                    #[cfg(feature = "matched-path")]
444                    {
445                        if value.start() > start {
446                            matched_part.push_str(&picked[start..value.start()]);
447                        }
448                        matched_part.push_str(&format!("{{{name}}}"));
449                        start = value.end();
450                    }
451                } else {
452                    return false;
453                }
454            }
455            #[cfg(feature = "matched-path")]
456            {
457                if start < picked.len() {
458                    matched_part.push_str(&picked[start..]);
459                }
460                if !matched_part.is_empty() {
461                    state.matched_parts.push(matched_part);
462                }
463            }
464            let len = if let Some(cap) = caps.get(0) {
465                cap.as_str().len()
466            } else {
467                return false;
468            };
469            state.forward(len);
470        } else {
471            return false;
472        }
473        if let (Some(wild_name), Some(wild_regex), Some(wild_start)) = (
474            self.names.last(),
475            self.wild_regex.as_ref(),
476            self.wild_start.as_ref(),
477        ) {
478            if wild_start.starts_with("*?")
479                && wild_path
480                    .trim_start_matches('/')
481                    .trim_end_matches('/')
482                    .contains('/')
483            {
484                return false;
485            }
486            if !wild_path.is_empty() || !wild_start.starts_with("*+") {
487                let cap = wild_regex.captures(&wild_path).and_then(|caps| caps.get(0));
488                if let Some(cap) = cap {
489                    let cap = cap.as_str().to_owned();
490                    state.forward(cap.len());
491                    state.params.insert(wild_name, cap);
492                    #[cfg(feature = "matched-path")]
493                    state.matched_parts.push(format!("{{{wild_name}}}"));
494                    true
495                } else {
496                    false
497                }
498            } else {
499                false
500            }
501        } else {
502            true
503        }
504    }
505}
506
507/// Named wisp match part in url segment and give it a name.
508#[derive(Debug, Eq, PartialEq)]
509pub struct NamedWisp(pub String);
510impl PathWisp for NamedWisp {
511    #[inline]
512    fn detect(&self, state: &mut PathState) -> bool {
513        if self.0.starts_with('*') {
514            let rest = state.all_rest().unwrap_or_default();
515            if self.0.starts_with("*?")
516                && rest
517                    .trim_start_matches('/')
518                    .trim_end_matches('/')
519                    .contains('/')
520            {
521                return false;
522            }
523            if !rest.is_empty() || !self.0.starts_with("*+") {
524                let rest = rest.to_string();
525                state.params.insert(&self.0, rest);
526                state.cursor.0 = state.parts.len();
527                #[cfg(feature = "matched-path")]
528                state.matched_parts.push(format!("{{{}}}", self.0));
529                true
530            } else {
531                false
532            }
533        } else {
534            let picked = state.pick();
535            if picked.is_none() {
536                return false;
537            }
538            let picked = picked.expect("picked should not be `None`").to_owned();
539            state.forward(picked.len());
540            state.params.insert(&self.0, picked);
541            #[cfg(feature = "matched-path")]
542            state.matched_parts.push(format!("{{{}}}", self.0));
543            true
544        }
545    }
546}
547
548/// Regex wisp match part in url segment use regex pattern and give it a name.
549#[derive(Debug)]
550#[non_exhaustive]
551pub struct RegexWisp {
552    /// The name of the wisp.
553    pub name: String,
554    /// The regex pattern.
555    pub regex: Regex,
556}
557impl RegexWisp {
558    #[inline]
559    fn new(name: String, regex: &str) -> Result<Self, String> {
560        let regex = if !regex.starts_with('^') {
561            &*format!("^{regex}")
562        } else {
563            regex
564        };
565        let regex = if !regex.ends_with('$') {
566            &*format!("{regex}$")
567        } else {
568            regex
569        };
570        Ok(Self {
571            name,
572            regex: Regex::new(regex).map_err(|e| format!("invalid regex: `{regex}`, {e}"))?,
573        })
574    }
575}
576impl PartialEq for RegexWisp {
577    #[inline]
578    fn eq(&self, other: &Self) -> bool {
579        self.regex.as_str() == other.regex.as_str()
580    }
581}
582impl PathWisp for RegexWisp {
583    #[inline]
584    fn detect(&self, state: &mut PathState) -> bool {
585        if self.name.starts_with('*') {
586            let rest = state.all_rest().unwrap_or_default();
587            if self.name.starts_with("*?")
588                && rest
589                    .trim_start_matches('/')
590                    .trim_end_matches('/')
591                    .contains('/')
592            {
593                return false;
594            }
595            if !rest.is_empty() || !self.name.starts_with("*+") {
596                let cap = self.regex.captures(&rest).and_then(|caps| caps.get(0));
597
598                if let Some(cap) = cap {
599                    let cap = cap.as_str().to_owned();
600                    state.forward(cap.len());
601                    state.params.insert(&self.name, cap);
602                    #[cfg(feature = "matched-path")]
603                    state.matched_parts.push(format!("{{{}}}", self.name));
604                    true
605                } else {
606                    false
607                }
608            } else {
609                false
610            }
611        } else {
612            let Some(picked) = state.pick() else {
613                return false;
614            };
615            let cap = self.regex.captures(picked).and_then(|caps| caps.get(0));
616            if let Some(cap) = cap {
617                let cap = cap.as_str().to_owned();
618                state.forward(cap.len());
619                state.params.insert(&self.name, cap);
620                #[cfg(feature = "matched-path")]
621                state.matched_parts.push(format!("{{{}}}", self.name));
622                true
623            } else {
624                false
625            }
626        }
627    }
628}
629
630/// Const wisp is used for match the const string in the path.
631#[derive(Eq, PartialEq, Debug)]
632pub struct ConstWisp(pub String);
633impl PathWisp for ConstWisp {
634    #[inline]
635    fn detect(&self, state: &mut PathState) -> bool {
636        let Some(picked) = state.pick() else {
637            return false;
638        };
639        if picked.starts_with(&self.0) {
640            state.forward(self.0.len());
641            #[cfg(feature = "matched-path")]
642            state.matched_parts.push(self.0.clone());
643            true
644        } else {
645            false
646        }
647    }
648}
649
650struct PathParser {
651    offset: usize,
652    path: Vec<char>,
653}
654impl PathParser {
655    #[inline]
656    fn new(raw_value: &str) -> Self {
657        Self {
658            offset: 0,
659            path: raw_value.trim_start_matches('/').chars().collect(),
660        }
661    }
662    #[inline]
663    fn next(&mut self, skip_blanks: bool) -> Option<char> {
664        if self.offset < self.path.len() - 1 {
665            self.offset += 1;
666            if skip_blanks {
667                self.skip_blanks();
668            }
669            Some(self.path[self.offset])
670        } else {
671            self.offset = self.path.len();
672            None
673        }
674    }
675    #[inline]
676    fn peek(&self, skip_blanks: bool) -> Option<char> {
677        if self.offset < self.path.len() - 1 {
678            if skip_blanks {
679                let mut offset = self.offset + 1;
680                let mut ch = self.path[offset];
681                while ch == ' ' || ch == '\t' {
682                    offset += 1;
683                    if offset >= self.path.len() {
684                        return None;
685                    }
686                    ch = self.path[offset]
687                }
688                Some(ch)
689            } else {
690                Some(self.path[self.offset + 1])
691            }
692        } else {
693            None
694        }
695    }
696    #[inline]
697    fn curr(&self) -> Option<char> {
698        self.path.get(self.offset).copied()
699    }
700    #[inline]
701    fn scan_ident(&mut self) -> Result<String, String> {
702        let mut ident = "".to_owned();
703        let mut ch = self
704            .curr()
705            .ok_or_else(|| "current position is out of index when scan ident".to_owned())?;
706        while !['/', ':', '|', '{', '}', '<', '>', '[', ']', '(', ')'].contains(&ch) {
707            ident.push(ch);
708            if let Some(c) = self.next(false) {
709                ch = c;
710            } else {
711                break;
712            }
713        }
714        if ident.is_empty() {
715            Err("ident segment is empty".to_owned())
716        } else {
717            Ok(ident)
718        }
719    }
720    #[inline]
721    fn scan_regex(&mut self) -> Result<String, String> {
722        let mut regex = "".to_owned();
723        let mut ch = self
724            .curr()
725            .ok_or_else(|| "current position is out of index when scan regex".to_owned())?;
726        let mut escaping = false;
727        let mut brace_opening = false;
728        loop {
729            regex.push(ch);
730            if let Some(c) = self.next(false) {
731                ch = c;
732                if ch == '{' && !escaping {
733                    brace_opening = true;
734                } else if ch == '}' && !escaping {
735                    if !brace_opening {
736                        break;
737                    }
738                    brace_opening = false;
739                }
740                escaping = !escaping && ch == '\\';
741            } else {
742                break;
743            }
744        }
745        if regex.is_empty() {
746            Err("regex segment is empty".to_owned())
747        } else {
748            Ok(regex)
749        }
750    }
751    #[inline]
752    fn scan_const(&mut self) -> Result<String, String> {
753        let mut cnst = "".to_owned();
754        let mut ch = self
755            .curr()
756            .ok_or_else(|| "current position is out of index when scan const".to_owned())?;
757        while ch != '/' {
758            if ch == '{' || ch == '}' {
759                // match `{{` or `}}`
760                if self.peek(false) == Some(ch) {
761                    self.next(false);
762                } else {
763                    return Ok(cnst);
764                }
765            }
766            cnst.push(ch);
767            if let Some(c) = self.next(false) {
768                ch = c;
769            } else {
770                break;
771            }
772        }
773        if cnst.is_empty() {
774            Err("const segment is empty".to_owned())
775        } else {
776            Ok(cnst)
777        }
778    }
779    #[inline]
780    fn skip_blanks(&mut self) {
781        if let Some(mut ch) = self.curr() {
782            while ch == ' ' || ch == '\t' {
783                if self.offset < self.path.len() - 1 {
784                    self.offset += 1;
785                    ch = self.path[self.offset];
786                } else {
787                    break;
788                }
789            }
790        }
791    }
792    #[inline]
793    fn skip_slashes(&mut self) {
794        if let Some(mut ch) = self.curr() {
795            while ch == '/' {
796                if let Some(c) = self.next(false) {
797                    ch = c;
798                } else {
799                    break;
800                }
801            }
802        }
803    }
804    fn scan_wisps(&mut self) -> Result<Vec<WispKind>, String> {
805        let mut ch = self
806            .curr()
807            .ok_or_else(|| "current position is out of index when scan part".to_owned())?;
808        let mut wisps: Vec<WispKind> = vec![];
809        while ch != '/' {
810            if ch == '{' {
811                if self.peek(false) == Some('{') {
812                    let part = self.scan_const().unwrap_or_default();
813                    if part.is_empty() {
814                        return Err("const part is empty string".to_owned());
815                    }
816                    wisps.push(ConstWisp(part).into());
817                    continue;
818                }
819                self.next(true)
820                    .ok_or_else(|| "char is needed after <".to_owned())?;
821                let name = self.scan_ident()?;
822                if name.is_empty() {
823                    return Err("name is empty string".to_owned());
824                }
825                self.skip_blanks();
826                ch = self
827                    .curr()
828                    .ok_or_else(|| "current position is out of index".to_owned())?;
829                if ch == ':' {
830                    //start to scan fn part
831                    self.next(false);
832                    let sign = self.scan_ident()?;
833                    self.skip_blanks();
834                    let lb = self
835                        .curr()
836                        .ok_or_else(|| "path ended unexpectedly".to_owned())?;
837                    let args = if lb == '[' || lb == '(' {
838                        let rb = if lb == '[' { ']' } else { ')' };
839                        let mut args = "".to_owned();
840                        ch = self.next(true).ok_or_else(|| {
841                            "current position is out of index when scan ident".to_owned()
842                        })?;
843                        while ch != rb {
844                            args.push(ch);
845                            if let Some(c) = self.next(false) {
846                                ch = c;
847                            } else {
848                                break;
849                            }
850                        }
851                        if self.next(false).is_none() {
852                            return Err(format!("ended unexpectedly, should end with: {rb}"));
853                        }
854                        if args.is_empty() {
855                            vec![]
856                        } else {
857                            args.split(',').map(|s| s.trim().to_owned()).collect()
858                        }
859                    } else if lb == '}' {
860                        vec![]
861                    } else {
862                        return Err(format!(
863                            "except any char of '/,[,(', but found {:?} at offset: {}",
864                            self.curr(),
865                            self.offset
866                        ));
867                    };
868                    let builders = WISP_BUILDERS.read();
869                    let builder = builders
870                        .get(&sign)
871                        .ok_or_else(|| {
872                            format!("WISP_BUILDERS does not contains fn part with sign {sign}")
873                        })?
874                        .clone();
875
876                    wisps.push(builder.build(name, sign, args)?);
877                } else if ch == '|' {
878                    // start to scan regex part
879                    self.next(false);
880                    let regex = &self.scan_regex()?;
881                    wisps.push(RegexWisp::new(name, regex)?.into());
882                } else if ch == '}' {
883                    wisps.push(NamedWisp(name).into());
884                }
885                if let Some(c) = self.curr() {
886                    if c != '}' {
887                        return Err(format!(
888                            "except '}}' to end regex part or fn part, but found {:?} at offset: {}",
889                            c, self.offset
890                        ));
891                    } else {
892                        self.next(false);
893                    }
894                } else {
895                    break;
896                }
897            } else {
898                let part = self.scan_const().unwrap_or_default();
899                if part.is_empty() {
900                    return Err("const part is empty string".to_owned());
901                }
902                wisps.push(ConstWisp(part).into());
903            }
904            if let Some(c) = self.curr() {
905                if c == '/' {
906                    break;
907                }
908                ch = c;
909            } else {
910                break;
911            }
912        }
913        Ok(wisps)
914    }
915
916    fn parse(&mut self) -> Result<Vec<WispKind>, String> {
917        let mut wisps: Vec<WispKind> = vec![];
918        if self.path.is_empty() {
919            return Ok(wisps);
920        }
921        loop {
922            self.skip_slashes();
923            if self.curr().map(|c| c == '/').unwrap_or(false) {
924                return Err(format!(
925                    "'/' is not allowed after '/' at offset `{}`",
926                    self.offset
927                ));
928            }
929            let mut scanned = self.scan_wisps()?;
930            if scanned.len() > 1 {
931                wisps.push(CombWisp::new(scanned)?.into());
932            } else if let Some(wisp) = scanned.pop() {
933                wisps.push(wisp);
934            } else {
935                return Err("scan parts is empty".to_owned());
936            }
937            if self.curr().map(|c| c != '/').unwrap_or(false) {
938                return Err(format!(
939                    "expect '/', but found {:?} at offset `{}`",
940                    self.curr(),
941                    self.offset
942                ));
943            }
944            if self.next(true).is_none() {
945                break;
946            }
947        }
948        let mut all_names = IndexSet::new();
949        self.validate(&wisps, &mut all_names)?;
950        Ok(wisps)
951    }
952    fn validate(&self, wisps: &[WispKind], all_names: &mut IndexSet<String>) -> Result<(), String> {
953        if !wisps.is_empty() {
954            let wild_name = all_names.iter().find(|v| v.starts_with('*'));
955            if let Some(wild_name) = wild_name {
956                return Err(format!(
957                    "wildcard name `{}` must added at the last in url: `{}`",
958                    wild_name,
959                    self.path.iter().collect::<String>()
960                ));
961            }
962        }
963        for (index, wisp) in wisps.iter().enumerate() {
964            let name = match wisp {
965                WispKind::Named(wisp) => Some(&wisp.0),
966                WispKind::Chars(wisp) => Some(&wisp.name),
967                WispKind::Regex(wisp) => Some(&wisp.name),
968                _ => None,
969            };
970
971            if let Some(name) = name {
972                if name.starts_with('*') && index != wisps.len() - 1 {
973                    return Err(format!(
974                        "wildcard name `{}` must added at the last in url: `{}`",
975                        name,
976                        self.path.iter().collect::<String>()
977                    ));
978                }
979                if all_names.contains(name) {
980                    return Err(format!(
981                        "name `{}` is duplicated with previous one in url: `{}`",
982                        name,
983                        self.path.iter().collect::<String>()
984                    ));
985                }
986                all_names.insert(name.clone());
987            }
988        }
989        let wild_names = all_names
990            .iter()
991            .filter(|v| v.starts_with('*'))
992            .map(|c| &**c)
993            .collect::<Vec<_>>();
994        if wild_names.len() > 1 {
995            return Err(format!(
996                "many wildcard names: `[{}]` found in url: {}, only one wildcard name is allowed",
997                wild_names.join(", "),
998                self.path.iter().collect::<String>()
999            ));
1000        } else if let Some(wild_name) = wild_names.first() {
1001            if wild_name != all_names.last().expect("all_names should not be empty") {
1002                return Err(format!(
1003                    "wildcard name: `{}` should be the last one in url: `{}`",
1004                    wild_name,
1005                    self.path.iter().collect::<String>()
1006                ));
1007            }
1008        }
1009        Ok(())
1010    }
1011}
1012
1013/// Filter request by it's path information.
1014pub struct PathFilter {
1015    raw_value: String,
1016    path_wisps: Vec<WispKind>,
1017}
1018
1019impl Debug for PathFilter {
1020    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1021        write!(f, "path:{}", &self.raw_value)
1022    }
1023}
1024#[async_trait]
1025impl Filter for PathFilter {
1026    #[inline]
1027    async fn filter(&self, _req: &mut Request, state: &mut PathState) -> bool {
1028        self.detect(state)
1029    }
1030}
1031impl PathFilter {
1032    /// Create new `PathFilter`.
1033    #[inline]
1034    pub fn new(value: impl Into<String>) -> Self {
1035        let raw_value = value.into();
1036        if raw_value.is_empty() {
1037            tracing::warn!("you should not add empty string as path filter");
1038        } else if raw_value == "/" {
1039            tracing::warn!("you should not add '/' as path filter");
1040        }
1041        let mut parser = PathParser::new(&raw_value);
1042        let path_wisps = match parser.parse() {
1043            Ok(path_wisps) => path_wisps,
1044            Err(e) => {
1045                panic!("{e}, raw_value: {raw_value}");
1046            }
1047        };
1048        Self {
1049            raw_value,
1050            path_wisps,
1051        }
1052    }
1053    /// Register new path wisp builder.
1054    #[inline]
1055    pub fn register_wisp_builder<B>(name: impl Into<String>, builder: B)
1056    where
1057        B: WispBuilder + 'static,
1058    {
1059        let mut builders = WISP_BUILDERS.write();
1060        builders.insert(name.into(), Arc::new(Box::new(builder)));
1061    }
1062    /// Register new path part regex.
1063    #[inline]
1064    pub fn register_wisp_regex(name: impl Into<String>, regex: Regex) {
1065        let mut builders = WISP_BUILDERS.write();
1066        builders.insert(
1067            name.into(),
1068            Arc::new(Box::new(RegexWispBuilder::new(regex))),
1069        );
1070    }
1071    /// Detect is that path is match.
1072    pub fn detect(&self, state: &mut PathState) -> bool {
1073        let original_cursor = state.cursor;
1074        for ps in &self.path_wisps {
1075            let row = state.cursor.0;
1076            if ps.detect(state) {
1077                if row == state.cursor.0 && row != state.parts.len() {
1078                    state.cursor = original_cursor;
1079                    return false;
1080                }
1081            } else {
1082                state.cursor = original_cursor;
1083                return false;
1084            }
1085        }
1086        true
1087    }
1088}
1089
1090#[cfg(test)]
1091mod tests {
1092    use super::PathParser;
1093    use crate::routing::{PathFilter, PathState};
1094
1095    #[test]
1096    fn test_parse_empty() {
1097        let segments = PathParser::new("").parse().unwrap();
1098        assert!(segments.is_empty());
1099    }
1100    #[test]
1101    fn test_parse_root() {
1102        let segments = PathParser::new("/").parse().unwrap();
1103        assert!(segments.is_empty());
1104    }
1105
1106    #[test]
1107    fn test_parse_single_const() {
1108        let segments = PathParser::new("/hello").parse().unwrap();
1109        assert_eq!(format!("{segments:?}"), r#"[ConstWisp("hello")]"#);
1110    }
1111    #[test]
1112    fn test_parse_multi_const() {
1113        let segments = PathParser::new("/hello/world").parse().unwrap();
1114        assert_eq!(
1115            format!("{segments:?}"),
1116            r#"[ConstWisp("hello"), ConstWisp("world")]"#
1117        );
1118    }
1119    #[test]
1120    fn test_parse_single_regex() {
1121        let segments = PathParser::new(r"/{abc|\d+}").parse().unwrap();
1122        assert_eq!(
1123            format!("{segments:?}"),
1124            r#"[RegexWisp { name: "abc", regex: Regex("^\\d+$") }]"#
1125        );
1126    }
1127    #[test]
1128    fn test_parse_wildcard_regex() {
1129        let segments = PathParser::new(r"/{abc|\d+\.+}").parse().unwrap();
1130        assert_eq!(
1131            format!("{segments:?}"),
1132            r#"[RegexWisp { name: "abc", regex: Regex("^\\d+\\.+$") }]"#
1133        );
1134    }
1135    #[test]
1136    fn test_parse_single_regex_with_prefix() {
1137        let segments = PathParser::new(r"/prefix_{abc|\d+}").parse().unwrap();
1138        assert_eq!(
1139            format!("{segments:?}"),
1140            r#"[CombWisp { names: ["abc"], comb_regex: Regex("^prefix_(?<abc>\\d+)$"), wild_regex: None, wild_start: None }]"#
1141        );
1142    }
1143    #[test]
1144    fn test_parse_single_regex_with_suffix() {
1145        let segments = PathParser::new(r"/{abc|\d+}_suffix.png").parse().unwrap();
1146        assert_eq!(
1147            format!("{segments:?}"),
1148            r#"[CombWisp { names: ["abc"], comb_regex: Regex("^(?<abc>\\d+)_suffix\\.png$"), wild_regex: None, wild_start: None }]"#
1149        );
1150    }
1151    #[test]
1152    fn test_parse_single_regex_with_prefix_and_suffix() {
1153        let segments = PathParser::new(r"/prefix{abc|\d+}suffix.png")
1154            .parse()
1155            .unwrap();
1156        assert_eq!(
1157            format!("{segments:?}"),
1158            r#"[CombWisp { names: ["abc"], comb_regex: Regex("^prefix(?<abc>\\d+)suffix\\.png$"), wild_regex: None, wild_start: None }]"#
1159        );
1160    }
1161    #[test]
1162    fn test_parse_dot_after_param() {
1163        let segments = PathParser::new(r"/{pid}/show/{table_name}.bu")
1164            .parse()
1165            .unwrap();
1166        assert_eq!(
1167            format!("{segments:?}"),
1168            r#"[NamedWisp("pid"), ConstWisp("show"), CombWisp { names: ["table_name"], comb_regex: Regex("^(?<table_name>.*)\\.bu$"), wild_regex: None, wild_start: None }]"#
1169        );
1170    }
1171    #[test]
1172    fn test_parse_multi_regex() {
1173        let segments = PathParser::new(r"/first{id}/prefix{abc|\d+}")
1174            .parse()
1175            .unwrap();
1176        assert_eq!(
1177            format!("{segments:?}"),
1178            r#"[CombWisp { names: ["id"], comb_regex: Regex("^first(?<id>.*)$"), wild_regex: None, wild_start: None }, CombWisp { names: ["abc"], comb_regex: Regex("^prefix(?<abc>\\d+)$"), wild_regex: None, wild_start: None }]"#
1179        );
1180    }
1181    #[test]
1182    fn test_parse_multi_regex_with_prefix() {
1183        let segments = PathParser::new(r"/first{id}/prefix{abc|\d+}")
1184            .parse()
1185            .unwrap();
1186        assert_eq!(
1187            format!("{segments:?}"),
1188            r#"[CombWisp { names: ["id"], comb_regex: Regex("^first(?<id>.*)$"), wild_regex: None, wild_start: None }, CombWisp { names: ["abc"], comb_regex: Regex("^prefix(?<abc>\\d+)$"), wild_regex: None, wild_start: None }]"#
1189        );
1190    }
1191    #[test]
1192    fn test_parse_multi_regex_with_suffix() {
1193        let segments = PathParser::new(r"/first{id|\d+}/prefix{abc|\d+}")
1194            .parse()
1195            .unwrap();
1196        assert_eq!(
1197            format!("{segments:?}"),
1198            r#"[CombWisp { names: ["id"], comb_regex: Regex("^first(?<id>\\d+)$"), wild_regex: None, wild_start: None }, CombWisp { names: ["abc"], comb_regex: Regex("^prefix(?<abc>\\d+)$"), wild_regex: None, wild_start: None }]"#
1199        );
1200    }
1201    #[test]
1202    fn test_parse_multi_regex_with_prefix_and_suffix() {
1203        let segments = PathParser::new(r"/first{id}/prefix{abc|\d+}ext")
1204            .parse()
1205            .unwrap();
1206        assert_eq!(
1207            format!("{segments:?}"),
1208            r#"[CombWisp { names: ["id"], comb_regex: Regex("^first(?<id>.*)$"), wild_regex: None, wild_start: None }, CombWisp { names: ["abc"], comb_regex: Regex("^prefix(?<abc>\\d+)ext$"), wild_regex: None, wild_start: None }]"#
1209        );
1210    }
1211    #[test]
1212    fn test_parse_rest() {
1213        let segments = PathParser::new(r"/first{id}/{**rest}").parse().unwrap();
1214        assert_eq!(
1215            format!("{segments:?}"),
1216            r#"[CombWisp { names: ["id"], comb_regex: Regex("^first(?<id>.*)$"), wild_regex: None, wild_start: None }, NamedWisp("**rest")]"#
1217        );
1218
1219        let segments = PathParser::new(r"/first{id}/{*+rest}").parse().unwrap();
1220        assert_eq!(
1221            format!("{segments:?}"),
1222            r#"[CombWisp { names: ["id"], comb_regex: Regex("^first(?<id>.*)$"), wild_regex: None, wild_start: None }, NamedWisp("*+rest")]"#
1223        );
1224
1225        let segments = PathParser::new(r"/first{id}/{*?rest}").parse().unwrap();
1226        assert_eq!(
1227            format!("{segments:?}"),
1228            r#"[CombWisp { names: ["id"], comb_regex: Regex("^first(?<id>.*)$"), wild_regex: None, wild_start: None }, NamedWisp("*?rest")]"#
1229        );
1230    }
1231    #[test]
1232    fn test_parse_num() {
1233        assert!(PathParser::new(r"/first{id:num}").parse().is_err());
1234    }
1235    #[test]
1236    fn test_parse_named_follow_another_panic() {
1237        assert!(PathParser::new(r"/first{id}{id2}ext2").parse().is_err());
1238    }
1239
1240    #[test]
1241    fn test_parse_comb_1() {
1242        let segments = PathParser::new(r"/first{id}world{**rest}").parse().unwrap();
1243        assert_eq!(
1244            format!("{segments:?}"),
1245            r#"[CombWisp { names: ["id", "rest"], comb_regex: Regex("^first(?<id>.*)world"), wild_regex: Some(Regex(".*")), wild_start: Some("**") }]"#
1246        );
1247
1248        let filter = PathFilter::new("/first{id}world{**rest}");
1249        let mut state = PathState::new("first123world.ext");
1250        assert!(filter.detect(&mut state));
1251    }
1252
1253    #[test]
1254    fn test_parse_comb_2() {
1255        let filter = PathFilter::new("/abc/hello{id}world{**rest}");
1256        let mut state = PathState::new("abc/hello123world.ext");
1257        assert!(filter.detect(&mut state));
1258    }
1259
1260    #[test]
1261    fn test_parse_comb_3() {
1262        let filter = PathFilter::new("/{id}/{name}!hello.bu");
1263        let mut state = PathState::new("123/gold!hello.bu");
1264        assert!(filter.detect(&mut state));
1265    }
1266    #[test]
1267    fn test_parse_comb_4() {
1268        let filter = PathFilter::new("/abc/l{**rest}");
1269        let mut state = PathState::new("abc/llo1");
1270        assert!(filter.detect(&mut state));
1271
1272        let mut state = PathState::new("abc/hello1");
1273        assert!(!filter.detect(&mut state));
1274    }
1275    #[test]
1276    fn test_parse_comb_5() {
1277        let filter = PathFilter::new(r"/abc/t{**rest|\d+}");
1278        let mut state = PathState::new("abc/t11");
1279        assert!(!filter.detect(&mut state));
1280
1281        let mut state = PathState::new("abc/tlo1");
1282        assert!(!filter.detect(&mut state));
1283        let mut state = PathState::new("abc/t11a");
1284        assert!(!filter.detect(&mut state));
1285    }
1286
1287    #[test]
1288    fn test_parse_rest2_failed() {
1289        assert!(
1290            PathParser::new(r"/first{id}{*ext}/{**rest}")
1291                .parse()
1292                .is_err()
1293        );
1294    }
1295
1296    #[test]
1297    fn test_parse_rest_failed1() {
1298        assert!(
1299            PathParser::new(r"/first{id}ext2/{**rest}{id}")
1300                .parse()
1301                .is_err()
1302        );
1303    }
1304    #[test]
1305    fn test_parse_rest_failed2() {
1306        assert!(
1307            PathParser::new(r"/first{id}ext2/{**rest}wefwe")
1308                .parse()
1309                .is_err()
1310        );
1311    }
1312    #[test]
1313    fn test_parse_many_slashes() {
1314        let wisps = PathParser::new(r"/first///second//{id}").parse().unwrap();
1315        assert_eq!(wisps.len(), 3);
1316    }
1317
1318    #[test]
1319    fn test_detect_consts() {
1320        let filter = PathFilter::new("/hello/world");
1321        let mut state = PathState::new("hello/world");
1322        assert!(filter.detect(&mut state));
1323    }
1324    #[test]
1325    fn test_detect_consts0() {
1326        let filter = PathFilter::new("/hello/world/");
1327        let mut state = PathState::new("hello/world");
1328        assert!(filter.detect(&mut state));
1329    }
1330    #[test]
1331    fn test_detect_consts1() {
1332        let filter = PathFilter::new("/hello/world");
1333        let mut state = PathState::new("hello/world/");
1334        assert!(filter.detect(&mut state));
1335    }
1336    #[test]
1337    fn test_detect_consts2() {
1338        let filter = PathFilter::new("/hello/world2");
1339        let mut state = PathState::new("hello/world");
1340        assert!(!filter.detect(&mut state));
1341    }
1342
1343    #[test]
1344    fn test_detect_const_and_named() {
1345        let filter = PathFilter::new("/hello/world{id}");
1346        let mut state = PathState::new("hello/worldabc");
1347        filter.detect(&mut state);
1348    }
1349
1350    #[test]
1351    fn test_detect_many() {
1352        let filter = PathFilter::new("/users/{id}/emails");
1353        let mut state = PathState::new("/users/29/emails");
1354        assert!(filter.detect(&mut state));
1355    }
1356    #[test]
1357    fn test_detect_many_slashes() {
1358        let filter = PathFilter::new("/users/{id}/emails");
1359        let mut state = PathState::new("/users///29//emails");
1360        assert!(filter.detect(&mut state));
1361    }
1362    #[test]
1363    fn test_detect_named_regex() {
1364        PathFilter::register_wisp_regex(
1365            "guid",
1366            regex::Regex::new("[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}").unwrap(),
1367        );
1368        let filter = PathFilter::new("/users/{id:guid}");
1369        let mut state = PathState::new("/users/123e4567-h89b-12d3-a456-9AC7CBDCEE52");
1370        assert!(!filter.detect(&mut state));
1371
1372        let mut state = PathState::new("/users/123e4567-e89b-12d3-a456-9AC7CBDCEE52");
1373        assert!(filter.detect(&mut state));
1374        assert_eq!(
1375            state.matched_parts,
1376            vec!["users".to_owned(), "{id}".to_owned()]
1377        );
1378    }
1379    #[test]
1380    fn test_detect_wildcard() {
1381        let filter = PathFilter::new("/users/{id}/{**rest}");
1382        let mut state = PathState::new("/users/12/facebook/insights/23");
1383        assert!(filter.detect(&mut state));
1384        assert_eq!(
1385            state.matched_parts,
1386            vec!["users".to_owned(), "{id}".to_owned(), "{**rest}".to_owned()]
1387        );
1388        let mut state = PathState::new("/users/12/");
1389        assert!(filter.detect(&mut state));
1390        let mut state = PathState::new("/users/12");
1391        assert!(filter.detect(&mut state));
1392        assert_eq!(
1393            state.matched_parts,
1394            vec!["users".to_owned(), "{id}".to_owned(), "{**rest}".to_owned()]
1395        );
1396
1397        let filter = PathFilter::new("/users/{id}/{*+rest}");
1398        let mut state = PathState::new("/users/12/facebook/insights/23");
1399        assert!(filter.detect(&mut state));
1400        let mut state = PathState::new("/users/12/");
1401        assert!(!filter.detect(&mut state));
1402        let mut state = PathState::new("/users/12");
1403        assert!(!filter.detect(&mut state));
1404
1405        let filter = PathFilter::new("/users/{id}/{*?rest}");
1406        let mut state = PathState::new("/users/12/facebook/insights/23");
1407        assert!(!filter.detect(&mut state));
1408        let mut state = PathState::new("/users/12/");
1409        assert!(filter.detect(&mut state));
1410        let mut state = PathState::new("/users/12");
1411        assert!(filter.detect(&mut state));
1412        let mut state = PathState::new("/users/12/abc");
1413        assert!(filter.detect(&mut state));
1414        assert_eq!(
1415            state.matched_parts,
1416            vec!["users".to_owned(), "{id}".to_owned(), "{*?rest}".to_owned()]
1417        );
1418    }
1419}