url_pattern/
lib.rs

1// SPDX-License-Identifier: MIT
2
3/*!
4url-pattern is a VERY INCOMPLETE implementation of the WHATWG [URL Pattern](https://urlpattern.spec.whatwg.org/) standard.
5Seriously **DON'T USE THIS** (yet)!
6*/
7
8mod parser;
9mod tokenizer;
10
11use crate::parser::{Modifier, Parser, Part};
12use crate::tokenizer::{tokenize, Policy};
13
14/// <https://urlpattern.spec.whatwg.org/#options>
15#[derive(Default, Clone)]
16pub struct Options {
17    pub delimiter: Option<char>,
18    pub prefix: Option<char>,
19    pub ignore_case: bool
20}
21
22/// https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp
23fn generate_segment_wildcard_regexp(opts: &Options) -> String {
24    format!("[^{}]+?", escape_regexp(&opts.delimiter.map_or_else(|| String::new(), |chr| chr.to_string())))
25}
26
27/// https://urlpattern.spec.whatwg.org/#full-wildcard-regexp-value
28fn full_wildcard_regexp() -> &'static str {
29    ".*"
30}
31
32fn escape_regexp(str: &str) -> String {
33    // TODO:
34    str.replace("/", "\\/")
35}
36
37/// https://urlpattern.spec.whatwg.org/#generate-a-regular-expression-and-name-list
38fn generate_regexp(parts: &[Part], opts: &Options) -> String {
39    let mut result: String = "^".into();
40
41    for part in parts {
42        let (_, modifier, regexp, prefix, suffix) = match part {
43            // If part’s type is "fixed-text":
44            Part::FixedText { value, modifier } => {
45                result.push_str(
46                    if let Some(modifier) = modifier {
47                        format!("(?:{}){modifier}", &escape_regexp(value))
48                    } else {
49                        escape_regexp(value)
50                    }
51                    .as_ref(),
52                );
53                continue;
54            }
55            Part::SegmentWildcard {
56                name,
57                modifier,
58                prefix,
59                suffix,
60            } => (
61                name,
62                modifier,
63                generate_segment_wildcard_regexp(opts),
64                escape_regexp(prefix),
65                escape_regexp(suffix),
66            ),
67            Part::FullWildcard {
68                name,
69                modifier,
70                prefix,
71                suffix,
72            } => (
73                name,
74                modifier,
75                full_wildcard_regexp().into(),
76                escape_regexp(prefix),
77                escape_regexp(suffix),
78            ),
79            Part::RegExp { .. } => todo!()
80        };
81
82        // If part’s prefix is the empty string and part’s suffix is the empty string:
83        // ..
84        if prefix.is_empty() && suffix.is_empty() {
85            result.push_str(
86                match modifier {
87                    None => format!("({regexp})"),
88                    Some(modifier @ Modifier::Optional) => format!("({regexp}){modifier}"),
89                    Some(modifier) => format!("((?:{regexp}){modifier})"),
90                }
91                .as_ref(),
92            );
93            continue;
94        }
95
96        result.push_str(
97            match modifier {
98                None => format!("(?:{prefix}({regexp}){suffix})"),
99                Some(modifier @ Modifier::Optional) => {
100                    format!("(?:{prefix}({regexp}){suffix}){modifier}")
101                }
102                Some(Modifier::ZeroOrMore) => {
103                    format!("(?:{prefix}((?:{regexp})(?:{suffix}{prefix}(?:{regexp}))*){suffix})?")
104                }
105                Some(Modifier::OneOrMore) => {
106                    format!("(?:{prefix}((?:{regexp})(?:{suffix}{prefix}(?:{regexp}))*){suffix})")
107                }
108            }
109            .as_ref(),
110        );
111    }
112
113    result.push('$');
114
115    result
116}
117
118
119/// Parses a pattern string and returns a regular expression for matching that
120/// pattern.
121pub fn regexp_for_pattern(input: &str, options: &Options) -> String {
122    let tokens = tokenize(input, Policy::Strict);
123
124    let mut parser = Parser::new(&tokens, options);
125    parser.parse();
126
127    generate_regexp(&parser.parts, options)
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133
134    // TODO: Verify that of all these results are correct!!
135
136    #[test]
137    fn smoke_test() {
138        let opts = Options {
139            delimiter: Some('/'),
140            prefix: Some('/'),
141            ignore_case: false,
142        };
143
144        let result = regexp_for_pattern("abc", &opts);
145        assert_eq!(result, "^abc$");
146
147        let result = regexp_for_pattern("{foo}", &opts);
148        assert_eq!(result, "^foo$");
149
150        let result = regexp_for_pattern("{bar}?", &opts);
151        assert_eq!(result, "^(?:bar)?$");
152
153        let result = regexp_for_pattern("/:bar", &opts);
154        assert_eq!(result, r"^(?:\/([^\/]+?))$");
155
156        let result = regexp_for_pattern("/:foo/:bar", &opts);
157        assert_eq!(result, r"^(?:\/([^\/]+?))(?:\/([^\/]+?))$");
158
159        let result = regexp_for_pattern("/:foo/:bar?", &opts);
160        assert_eq!(result, r"^(?:\/([^\/]+?))(?:\/([^\/]+?))?$");
161
162        let result = regexp_for_pattern("/:foo?/:bar?", &opts);
163        assert_eq!(result, r"^(?:\/([^\/]+?))?(?:\/([^\/]+?))?$");
164
165        let result = regexp_for_pattern("/:foo?/:bar", &opts);
166        assert_eq!(result, r"^(?:\/([^\/]+?))?(?:\/([^\/]+?))$");
167    }
168
169    #[test]
170    fn parse_example_1() {
171        // From https://urlpattern.spec.whatwg.org/#parse-example-1
172        let opts = Options {
173            delimiter: Some('/'),
174            prefix: Some('/'),
175            ignore_case: false,
176        };
177
178        // assert_eq!(regexp_for_pattern("/:foo(bar)?", &opts), "bla");
179        assert_eq!(regexp_for_pattern("/", &opts), r"^\/$");
180        assert_eq!(regexp_for_pattern(":foo", &opts), r"^([^\/]+?)$");
181        // assert_eq!(regexp_for_pattern("(bar)", &opts), "bla");
182        assert_eq!(regexp_for_pattern("/:foo", &opts), r"^(?:\/([^\/]+?))$");
183        // assert_eq!(regexp_for_pattern("/(bar)", &opts), "bla");
184        assert_eq!(regexp_for_pattern("/:foo?", &opts), r"^(?:\/([^\/]+?))?$");
185        // assert_eq!(regexp_for_pattern("/(bar)?", &opts), "bla");
186    }
187
188    #[test]
189    fn parse_example_2() {
190        // From https://urlpattern.spec.whatwg.org/#parsing-example-2
191        let opts = Options {
192            delimiter: Some('/'),
193            prefix: Some('/'),
194            ignore_case: false,
195        };
196
197        // assert_eq!(regexp_for_pattern("{a:foo(bar)b}?", &opts), r"^(?:a([^\/]+?)(bar)b)?$");
198        assert_eq!(regexp_for_pattern("{:foo}?", &opts), r"^([^\/]+?)?$");
199        // assert_eq!(regexp_for_pattern("{(bar)}?", &opts), "bla");
200        assert_eq!(regexp_for_pattern("{ab}?", &opts), r"^(?:ab)?$");
201    }
202}