Skip to main content

ploidy_core/parse/
path.rs

1use miette::SourceSpan;
2use winnow::{
3    Parser, Stateful,
4    combinator::eof,
5    error::{ContextError, ParseError},
6};
7
8use crate::arena::Arena;
9
10/// Parser input threaded with an allocation [`Arena`].
11type Input<'a> = Stateful<&'a str, &'a Arena>;
12
13/// Parses a path template, like `/v1/pets/{petId}/toy`.
14///
15/// The grammar for path templating is adapted directly from
16/// [the OpenAPI spec][spec], and supports trailing literal
17/// query parameters as an extension.
18///
19/// [spec]: https://spec.openapis.org/oas/v3.2.0.html#x4-8-2-path-templating
20pub fn parse<'a>(arena: &'a Arena, input: &'a str) -> Result<ParsedPath<'a>, BadPath> {
21    let stateful = Input {
22        input,
23        state: arena,
24    };
25    (self::parser::path, eof)
26        .map(|((segments, query), _)| ParsedPath {
27            segments: arena.alloc_slice_copy(&segments),
28            query: arena.alloc_slice_copy(&query),
29        })
30        .parse(stateful)
31        .map_err(BadPath::from_parse_error)
32}
33
34/// A parsed path template.
35#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
36pub struct ParsedPath<'a> {
37    /// The slash-delimited path segments.
38    pub segments: &'a [PathSegment<'a>],
39    /// Literal query parameters that follow the path.
40    pub query: &'a [PathQueryParameter<'a>],
41}
42
43/// A literal query parameter parsed from the path template.
44#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
45pub struct PathQueryParameter<'a> {
46    pub name: &'a str,
47    pub value: &'a str,
48}
49
50/// A slash-delimited path segment that contains zero or more
51/// template fragments.
52#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
53pub struct PathSegment<'input>(&'input [PathFragment<'input>]);
54
55impl<'input> PathSegment<'input> {
56    /// Returns the template fragments within this segment.
57    pub fn fragments(&self) -> &'input [PathFragment<'input>] {
58        self.0
59    }
60}
61
62/// A fragment within a path segment.
63#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
64pub enum PathFragment<'input> {
65    /// Literal text.
66    Literal(&'input str),
67    /// Template parameter name.
68    Param(&'input str),
69}
70
71mod parser {
72    use super::*;
73
74    use std::borrow::Cow;
75
76    use winnow::{
77        Parser,
78        combinator::{alt, delimited, opt, preceded, repeat},
79        token::take_while,
80    };
81
82    pub fn path<'a>(
83        input: &mut Input<'a>,
84    ) -> winnow::Result<(Vec<PathSegment<'a>>, Vec<PathQueryParameter<'a>>)> {
85        let segments = template.parse_next(input)?;
86        let query = opt(preceded(
87            '?',
88            take_while(0.., is_query_char).map(|query: &str| {
89                form_urlencoded::parse(query.as_bytes())
90                    .map(|(name, value)| PathQueryParameter {
91                        name: match name {
92                            Cow::Borrowed(name) => name,
93                            Cow::Owned(name) => input.state.alloc_str(&name),
94                        },
95                        value: match value {
96                            Cow::Borrowed(value) => value,
97                            Cow::Owned(value) => input.state.alloc_str(&value),
98                        },
99                    })
100                    .collect()
101            }),
102        ))
103        .parse_next(input)?;
104        Ok((segments, query.unwrap_or_default()))
105    }
106
107    fn template<'a>(input: &mut Input<'a>) -> winnow::Result<Vec<PathSegment<'a>>> {
108        alt((
109            ('/', segment, template)
110                .map(|(_, head, tail)| std::iter::once(head).chain(tail).collect()),
111            ('/', segment).map(|(_, segment)| vec![segment]),
112            '/'.map(|_| vec![PathSegment::default()]),
113        ))
114        .parse_next(input)
115    }
116
117    fn segment<'a>(input: &mut Input<'a>) -> winnow::Result<PathSegment<'a>> {
118        repeat(1.., fragment)
119            .map(|fragments: Vec<_>| PathSegment(input.state.alloc_slice_copy(&fragments)))
120            .parse_next(input)
121    }
122
123    fn fragment<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
124        alt((param, literal)).parse_next(input)
125    }
126
127    pub fn param<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
128        delimited('{', take_while(1.., |c| c != '{' && c != '}'), '}')
129            .map(PathFragment::Param)
130            .parse_next(input)
131    }
132
133    pub fn literal<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
134        take_while(1.., is_path_char)
135            .verify_map(|text: &str| {
136                let decoded = percent_encoding::percent_decode_str(text)
137                    .decode_utf8()
138                    .ok()?;
139                Some(PathFragment::Literal(match decoded {
140                    Cow::Borrowed(s) => s,
141                    Cow::Owned(s) => input.state.alloc_str(&s),
142                }))
143            })
144            .parse_next(input)
145    }
146
147    /// Returns whether `c` is allowed in a URL path segment per
148    /// the WHATWG URL Standard's [path percent-encode set][set].
149    ///
150    /// [set]: https://url.spec.whatwg.org/#path-percent-encode-set
151    fn is_path_char(c: char) -> bool {
152        is_query_char(c) && !matches!(c, '/' | '?' | '^' | '`' | '{' | '}')
153    }
154
155    /// Returns whether `c` is allowed in a URL query string per
156    /// the WHATWG URL Standard's [query percent-encode set][set].
157    ///
158    /// [set]: https://url.spec.whatwg.org/#query-percent-encode-set
159    fn is_query_char(c: char) -> bool {
160        !matches!(
161            c,
162            '\x00'..='\x1f' | ('\x7f'..) | ' ' | '"' | '#' | '<' | '>'
163        )
164    }
165}
166
167/// An error returned when a path template can't be parsed.
168#[derive(Debug, miette::Diagnostic, thiserror::Error)]
169#[error("invalid URL path template")]
170pub struct BadPath {
171    #[source_code]
172    code: String,
173    #[label]
174    span: SourceSpan,
175}
176
177impl BadPath {
178    fn from_parse_error(error: ParseError<Input<'_>, ContextError>) -> Self {
179        let stateful = error.input();
180        Self {
181            code: stateful.input.to_owned(),
182            span: error.char_span().into(),
183        }
184    }
185}
186
187#[cfg(test)]
188mod test {
189    use super::*;
190
191    use crate::tests::assert_matches;
192
193    #[test]
194    fn test_root_path() {
195        let arena = Arena::new();
196        let result = parse(&arena, "/").unwrap();
197
198        assert_matches!(result.segments, [PathSegment([])]);
199        assert!(result.query.is_empty());
200    }
201
202    #[test]
203    fn test_simple_literal() {
204        let arena = Arena::new();
205        let result = parse(&arena, "/users").unwrap();
206
207        assert_matches!(
208            result.segments,
209            [PathSegment([PathFragment::Literal("users")])],
210        );
211    }
212
213    #[test]
214    fn test_trailing_slash() {
215        let arena = Arena::new();
216        let result = parse(&arena, "/users/").unwrap();
217
218        assert_matches!(
219            result.segments,
220            [
221                PathSegment([PathFragment::Literal("users")]),
222                PathSegment([]),
223            ],
224        );
225    }
226
227    #[test]
228    fn test_simple_template() {
229        let arena = Arena::new();
230        let result = parse(&arena, "/users/{userId}").unwrap();
231
232        assert_matches!(
233            result.segments,
234            [
235                PathSegment([PathFragment::Literal("users")]),
236                PathSegment([PathFragment::Param("userId")]),
237            ],
238        );
239    }
240
241    #[test]
242    fn test_nested_path() {
243        let arena = Arena::new();
244        let result = parse(&arena, "/api/v1/resources/{resourceId}").unwrap();
245
246        assert_matches!(
247            result.segments,
248            [
249                PathSegment([PathFragment::Literal("api")]),
250                PathSegment([PathFragment::Literal("v1")]),
251                PathSegment([PathFragment::Literal("resources")]),
252                PathSegment([PathFragment::Param("resourceId")]),
253            ],
254        );
255    }
256
257    #[test]
258    fn test_multiple_templates() {
259        let arena = Arena::new();
260        let result = parse(&arena, "/users/{userId}/posts/{postId}").unwrap();
261
262        assert_matches!(
263            result.segments,
264            [
265                PathSegment([PathFragment::Literal("users")]),
266                PathSegment([PathFragment::Param("userId")]),
267                PathSegment([PathFragment::Literal("posts")]),
268                PathSegment([PathFragment::Param("postId")]),
269            ],
270        );
271    }
272
273    #[test]
274    fn test_literal_with_extension() {
275        let arena = Arena::new();
276        let result = parse(
277            &arena,
278            "/v1/storage/workspace/{workspace}/documents/download/{documentId}.pdf",
279        )
280        .unwrap();
281
282        assert_matches!(
283            result.segments,
284            [
285                PathSegment([PathFragment::Literal("v1")]),
286                PathSegment([PathFragment::Literal("storage")]),
287                PathSegment([PathFragment::Literal("workspace")]),
288                PathSegment([PathFragment::Param("workspace")]),
289                PathSegment([PathFragment::Literal("documents")]),
290                PathSegment([PathFragment::Literal("download")]),
291                PathSegment([
292                    PathFragment::Param("documentId"),
293                    PathFragment::Literal(".pdf"),
294                ]),
295            ],
296        );
297    }
298
299    #[test]
300    fn test_mixed_literal_and_param() {
301        let arena = Arena::new();
302        let result = parse(
303            &arena,
304            "/v1/storage/workspace/{workspace}/documents/download/report-{documentId}.pdf",
305        )
306        .unwrap();
307
308        assert_matches!(
309            result.segments,
310            [
311                PathSegment([PathFragment::Literal("v1")]),
312                PathSegment([PathFragment::Literal("storage")]),
313                PathSegment([PathFragment::Literal("workspace")]),
314                PathSegment([PathFragment::Param("workspace")]),
315                PathSegment([PathFragment::Literal("documents")]),
316                PathSegment([PathFragment::Literal("download")]),
317                PathSegment([
318                    PathFragment::Literal("report-"),
319                    PathFragment::Param("documentId"),
320                    PathFragment::Literal(".pdf"),
321                ]),
322            ],
323        );
324    }
325
326    #[test]
327    fn test_double_slash() {
328        let arena = Arena::new();
329        // Empty path segments aren't allowed.
330        assert!(parse(&arena, "/users//a").is_err());
331    }
332
333    #[test]
334    fn test_invalid_chars_in_template() {
335        let arena = Arena::new();
336        // Parameter names can contain any character except for
337        // `{` and `}`, per the `template-expression-param-name` terminal.
338        assert!(parse(&arena, "/users/{user/{id}}").is_err());
339    }
340
341    #[test]
342    fn test_path_with_single_query_param() {
343        let arena = Arena::new();
344        let result = parse(&arena, "/v1/messages?beta=true").unwrap();
345
346        assert_matches!(
347            result,
348            ParsedPath {
349                segments: [
350                    PathSegment([PathFragment::Literal("v1")]),
351                    PathSegment([PathFragment::Literal("messages")]),
352                ],
353                query: [PathQueryParameter {
354                    name: "beta",
355                    value: "true",
356                }],
357            },
358        );
359    }
360
361    #[test]
362    fn test_path_with_multiple_query_params() {
363        let arena = Arena::new();
364        let result = parse(&arena, "/v1/items?beta=true&version=2").unwrap();
365
366        assert_matches!(
367            result,
368            ParsedPath {
369                segments: [
370                    PathSegment([PathFragment::Literal("v1")]),
371                    PathSegment([PathFragment::Literal("items")]),
372                ],
373                query: [
374                    PathQueryParameter {
375                        name: "beta",
376                        value: "true",
377                    },
378                    PathQueryParameter {
379                        name: "version",
380                        value: "2",
381                    },
382                ],
383            },
384        );
385    }
386
387    #[test]
388    fn test_path_with_template_and_query_param() {
389        let arena = Arena::new();
390        let result = parse(&arena, "/v1/models/{model_id}?beta=true").unwrap();
391
392        assert_matches!(
393            result,
394            ParsedPath {
395                segments: [
396                    PathSegment([PathFragment::Literal("v1")]),
397                    PathSegment([PathFragment::Literal("models")]),
398                    PathSegment([PathFragment::Param("model_id")]),
399                ],
400                query: [PathQueryParameter {
401                    name: "beta",
402                    value: "true",
403                }],
404            },
405        );
406    }
407
408    #[test]
409    fn test_path_with_valueless_query_param() {
410        let arena = Arena::new();
411        let result = parse(&arena, "/v1/items?beta").unwrap();
412
413        assert_matches!(
414            result,
415            ParsedPath {
416                segments: [
417                    PathSegment([PathFragment::Literal("v1")]),
418                    PathSegment([PathFragment::Literal("items")]),
419                ],
420                query: [PathQueryParameter {
421                    name: "beta",
422                    value: "",
423                }],
424            },
425        );
426    }
427
428    #[test]
429    fn test_path_with_trailing_question_mark() {
430        let arena = Arena::new();
431        let result = parse(&arena, "/foo?").unwrap();
432
433        assert_matches!(
434            result,
435            ParsedPath {
436                segments: [PathSegment([PathFragment::Literal("foo")])],
437                query: [],
438            },
439        );
440    }
441
442    #[test]
443    fn test_path_with_percent_encoded_query_params() {
444        let arena = Arena::new();
445        let result = parse(&arena, "/foo?a%20b=c%20d").unwrap();
446
447        assert_matches!(
448            result,
449            ParsedPath {
450                segments: [PathSegment([PathFragment::Literal("foo")])],
451                query: [PathQueryParameter {
452                    name: "a b",
453                    value: "c d",
454                }],
455            },
456        );
457    }
458
459    #[test]
460    fn test_root_path_with_query_param() {
461        let arena = Arena::new();
462        let result = parse(&arena, "/?beta=true").unwrap();
463
464        assert_matches!(
465            result,
466            ParsedPath {
467                segments: [PathSegment([])],
468                query: [PathQueryParameter {
469                    name: "beta",
470                    value: "true",
471                }],
472            },
473        );
474    }
475}