Skip to main content

ploidy_core/parse/
path.rs

1use std::fmt::{Display, Formatter, Result as FmtResult, Write};
2
3use itertools::Itertools;
4use miette::SourceSpan;
5use percent_encoding::{AsciiSet, CONTROLS, PercentEncode, utf8_percent_encode};
6use winnow::{
7    Parser, Stateful,
8    combinator::eof,
9    error::{ContextError, ParseError},
10};
11
12use crate::arena::Arena;
13
14/// Parser input threaded with an allocation [`Arena`].
15type Input<'a> = Stateful<&'a str, &'a Arena>;
16
17/// Parses a path template, like `/v1/pets/{petId}/toy`.
18///
19/// The grammar for path templating is adapted directly from
20/// [the OpenAPI spec][spec], and supports trailing literal
21/// query parameters as an extension.
22///
23/// [spec]: https://spec.openapis.org/oas/v3.2.0.html#x4-8-2-path-templating
24pub fn parse<'a>(arena: &'a Arena, input: &'a str) -> Result<ParsedPath<'a>, BadPath> {
25    let stateful = Input {
26        input,
27        state: arena,
28    };
29    (self::parser::path, eof)
30        .map(|((segments, query), _)| ParsedPath {
31            segments: arena.alloc_slice_copy(&segments),
32            query: arena.alloc_slice_copy(&query),
33        })
34        .parse(stateful)
35        .map_err(BadPath::from_parse_error)
36}
37
38/// A parsed path template.
39#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
40pub struct ParsedPath<'a> {
41    /// The slash-delimited path segments.
42    pub segments: &'a [PathSegment<'a>],
43    /// Literal query parameters that follow the path.
44    pub query: &'a [PathQueryParameter<'a>],
45}
46
47impl<'a> ParsedPath<'a> {
48    /// Returns the path's segments coalesced into runs.
49    #[inline]
50    pub fn runs(&self) -> PathRuns<'_, 'a> {
51        PathRuns {
52            rest: self.segments,
53        }
54    }
55}
56
57impl Display for ParsedPath<'_> {
58    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
59        for segment in self.segments {
60            f.write_char('/')?;
61            match segment {
62                PathSegment::Literal(text) => {
63                    write!(f, "{}", path_percent_encode(text))?;
64                }
65                PathSegment::Templated(fragments) => {
66                    fragments.iter().try_for_each(|fragment| match fragment {
67                        PathFragment::Literal(text) => {
68                            write!(f, "{}", path_percent_encode(text))
69                        }
70                        PathFragment::Param(name) => write!(f, "{{{name}}}"),
71                    })?;
72                }
73            }
74        }
75
76        if !self.query.is_empty() {
77            let mut serializer = form_urlencoded::Serializer::new(String::new());
78            for param in self.query {
79                serializer.append_pair(param.name, param.value);
80            }
81            f.write_char('?')?;
82            f.write_str(&serializer.finish())?;
83        }
84
85        Ok(())
86    }
87}
88
89/// A literal query parameter parsed from the path template.
90#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
91pub struct PathQueryParameter<'a> {
92    pub name: &'a str,
93    pub value: &'a str,
94}
95
96/// A slash-delimited path segment.
97#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
98pub enum PathSegment<'a> {
99    /// A segment containing only literal text, possibly empty.
100    Literal(&'a str),
101    /// A segment containing at least one parameter fragment.
102    Templated(&'a [PathFragment<'a>]),
103}
104
105/// A fragment within a path segment.
106#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
107pub enum PathFragment<'input> {
108    /// Literal text.
109    Literal(&'input str),
110    /// Template parameter name.
111    Param(&'input str),
112}
113
114/// A run of path segments.
115#[derive(Clone, Debug, Eq, Hash, PartialEq)]
116pub enum PathRun<'a> {
117    /// The text of consecutive literal-only segments.
118    Literals(Vec<&'a str>),
119    /// A segment with at least one parameter fragment.
120    Templated(&'a [PathFragment<'a>]),
121}
122
123/// Iterates over a path's segments in runs.
124#[derive(Clone, Copy, Debug)]
125pub struct PathRuns<'path, 'input> {
126    rest: &'path [PathSegment<'input>],
127}
128
129impl<'path, 'input> Iterator for PathRuns<'path, 'input> {
130    type Item = PathRun<'input>;
131
132    fn next(&mut self) -> Option<Self::Item> {
133        match self.rest {
134            [] => None,
135            segments @ [PathSegment::Literal(_), ..] => {
136                let literals = segments
137                    .iter()
138                    .map_while(|segment| match segment {
139                        &PathSegment::Literal(text) => Some(text),
140                        PathSegment::Templated(_) => None,
141                    })
142                    .collect_vec();
143                self.rest = &self.rest[literals.len()..];
144                Some(PathRun::Literals(literals))
145            }
146            [PathSegment::Templated(fragments), tail @ ..] => {
147                self.rest = tail;
148                Some(PathRun::Templated(fragments))
149            }
150        }
151    }
152}
153
154mod parser {
155    use super::*;
156
157    use std::borrow::Cow;
158
159    use winnow::{
160        Parser,
161        combinator::{alt, delimited, opt, preceded, repeat},
162        token::take_while,
163    };
164
165    pub fn path<'a>(
166        input: &mut Input<'a>,
167    ) -> winnow::Result<(Vec<PathSegment<'a>>, Vec<PathQueryParameter<'a>>)> {
168        let segments = template.parse_next(input)?;
169        let query = opt(preceded(
170            '?',
171            take_while(0.., is_query_char).map(|query: &str| {
172                form_urlencoded::parse(query.as_bytes())
173                    .map(|(name, value)| PathQueryParameter {
174                        name: match name {
175                            Cow::Borrowed(name) => name,
176                            Cow::Owned(name) => input.state.alloc_str(&name),
177                        },
178                        value: match value {
179                            Cow::Borrowed(value) => value,
180                            Cow::Owned(value) => input.state.alloc_str(&value),
181                        },
182                    })
183                    .collect()
184            }),
185        ))
186        .parse_next(input)?;
187        Ok((segments, query.unwrap_or_default()))
188    }
189
190    fn template<'a>(input: &mut Input<'a>) -> winnow::Result<Vec<PathSegment<'a>>> {
191        alt((
192            ('/', segment, template)
193                .map(|(_, head, tail)| std::iter::once(head).chain(tail).collect()),
194            ('/', segment).map(|(_, segment)| vec![segment]),
195            '/'.map(|_| vec![PathSegment::Literal("")]),
196        ))
197        .parse_next(input)
198    }
199
200    fn segment<'a>(input: &mut Input<'a>) -> winnow::Result<PathSegment<'a>> {
201        repeat(1.., fragment)
202            .map(|fragments: Vec<_>| match &*fragments {
203                // Maximal munch can't produce adjacent literal fragments,
204                // so a literal-only segment has exactly one fragment.
205                [PathFragment::Literal(text)] => PathSegment::Literal(text),
206                _ => PathSegment::Templated(input.state.alloc_slice_copy(&fragments)),
207            })
208            .parse_next(input)
209    }
210
211    fn fragment<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
212        alt((param, literal)).parse_next(input)
213    }
214
215    pub fn param<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
216        delimited('{', take_while(1.., |c| c != '{' && c != '}'), '}')
217            .map(PathFragment::Param)
218            .parse_next(input)
219    }
220
221    pub fn literal<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
222        take_while(1.., is_path_char)
223            .verify_map(|text: &str| {
224                let decoded = percent_encoding::percent_decode_str(text)
225                    .decode_utf8()
226                    .ok()?;
227                Some(PathFragment::Literal(match decoded {
228                    Cow::Borrowed(s) => s,
229                    Cow::Owned(s) => input.state.alloc_str(&s),
230                }))
231            })
232            .parse_next(input)
233    }
234
235    /// Returns whether `c` is allowed in a URL path segment per
236    /// the WHATWG URL Standard's [path percent-encode set][set].
237    ///
238    /// [set]: https://url.spec.whatwg.org/#path-percent-encode-set
239    fn is_path_char(c: char) -> bool {
240        is_query_char(c) && !matches!(c, '/' | '?' | '^' | '`' | '{' | '}')
241    }
242
243    /// Returns whether `c` is allowed in a URL query string per
244    /// the WHATWG URL Standard's [query percent-encode set][set].
245    ///
246    /// [set]: https://url.spec.whatwg.org/#query-percent-encode-set
247    fn is_query_char(c: char) -> bool {
248        !matches!(
249            c,
250            '\x00'..='\x1f' | ('\x7f'..) | ' ' | '"' | '#' | '<' | '>'
251        )
252    }
253}
254
255/// An error returned when a path template can't be parsed.
256#[derive(Debug, miette::Diagnostic, thiserror::Error)]
257#[error("invalid URL path template")]
258pub struct BadPath {
259    #[source_code]
260    code: String,
261    #[label]
262    span: SourceSpan,
263}
264
265impl BadPath {
266    fn from_parse_error(error: ParseError<Input<'_>, ContextError>) -> Self {
267        let stateful = error.input();
268        Self {
269            code: stateful.input.to_owned(),
270            span: error.char_span().into(),
271        }
272    }
273}
274
275fn path_percent_encode(text: &str) -> PercentEncode<'_> {
276    // The WHATWG URL path percent-encode set, plus `/` and `%`.
277    const PATH_SEGMENT_PERCENT_ENCODE_SET: &AsciiSet = &CONTROLS
278        .add(b' ')
279        .add(b'"')
280        .add(b'#')
281        .add(b'<')
282        .add(b'>')
283        .add(b'?')
284        .add(b'^')
285        .add(b'`')
286        .add(b'{')
287        .add(b'}')
288        .add(b'/')
289        .add(b'%');
290    utf8_percent_encode(text, PATH_SEGMENT_PERCENT_ENCODE_SET)
291}
292
293#[cfg(test)]
294mod test {
295    use super::*;
296
297    use crate::tests::assert_matches;
298
299    #[test]
300    fn test_root_path() {
301        let arena = Arena::new();
302        let result = parse(&arena, "/").unwrap();
303
304        assert_matches!(result.segments, [PathSegment::Literal("")]);
305        assert!(result.query.is_empty());
306    }
307
308    #[test]
309    fn test_simple_literal() {
310        let arena = Arena::new();
311        let result = parse(&arena, "/users").unwrap();
312
313        assert_matches!(result.segments, [PathSegment::Literal("users")],);
314    }
315
316    #[test]
317    fn test_trailing_slash() {
318        let arena = Arena::new();
319        let result = parse(&arena, "/users/").unwrap();
320
321        assert_matches!(
322            result.segments,
323            [PathSegment::Literal("users"), PathSegment::Literal(""),],
324        );
325    }
326
327    #[test]
328    fn test_simple_template() {
329        let arena = Arena::new();
330        let result = parse(&arena, "/users/{userId}").unwrap();
331
332        assert_matches!(
333            result.segments,
334            [
335                PathSegment::Literal("users"),
336                PathSegment::Templated([PathFragment::Param("userId")]),
337            ],
338        );
339    }
340
341    #[test]
342    fn test_nested_path() {
343        let arena = Arena::new();
344        let result = parse(&arena, "/api/v1/resources/{resourceId}").unwrap();
345
346        assert_matches!(
347            result.segments,
348            [
349                PathSegment::Literal("api"),
350                PathSegment::Literal("v1"),
351                PathSegment::Literal("resources"),
352                PathSegment::Templated([PathFragment::Param("resourceId")]),
353            ],
354        );
355    }
356
357    #[test]
358    fn test_multiple_templates() {
359        let arena = Arena::new();
360        let result = parse(&arena, "/users/{userId}/posts/{postId}").unwrap();
361
362        assert_matches!(
363            result.segments,
364            [
365                PathSegment::Literal("users"),
366                PathSegment::Templated([PathFragment::Param("userId")]),
367                PathSegment::Literal("posts"),
368                PathSegment::Templated([PathFragment::Param("postId")]),
369            ],
370        );
371    }
372
373    #[test]
374    fn test_literal_with_extension() {
375        let arena = Arena::new();
376        let result = parse(
377            &arena,
378            "/v1/storage/workspace/{workspace}/documents/download/{documentId}.pdf",
379        )
380        .unwrap();
381
382        assert_matches!(
383            result.segments,
384            [
385                PathSegment::Literal("v1"),
386                PathSegment::Literal("storage"),
387                PathSegment::Literal("workspace"),
388                PathSegment::Templated([PathFragment::Param("workspace")]),
389                PathSegment::Literal("documents"),
390                PathSegment::Literal("download"),
391                PathSegment::Templated([
392                    PathFragment::Param("documentId"),
393                    PathFragment::Literal(".pdf"),
394                ]),
395            ],
396        );
397    }
398
399    #[test]
400    fn test_runs_coalesce_literals() {
401        let arena = Arena::new();
402        let result = parse(
403            &arena,
404            "/v1/storage/workspace/{workspace}/documents/download/{documentId}.pdf",
405        )
406        .unwrap();
407
408        let mut runs = result.runs();
409
410        assert_eq!(
411            runs.next(),
412            Some(PathRun::Literals(vec!["v1", "storage", "workspace"])),
413        );
414        assert_matches!(
415            runs.next(),
416            Some(PathRun::Templated([PathFragment::Param("workspace")])),
417        );
418        assert_eq!(
419            runs.next(),
420            Some(PathRun::Literals(vec!["documents", "download"])),
421        );
422        assert_matches!(
423            runs.next(),
424            Some(PathRun::Templated([
425                PathFragment::Param("documentId"),
426                PathFragment::Literal(".pdf"),
427            ])),
428        );
429        assert_matches!(runs.next(), None);
430    }
431
432    #[test]
433    fn test_runs_empty_segments() {
434        let arena = Arena::new();
435        let result = parse(&arena, "/users/").unwrap();
436
437        let mut runs = result.runs();
438
439        assert_eq!(runs.next(), Some(PathRun::Literals(vec!["users", ""])));
440        assert_matches!(runs.next(), None);
441    }
442
443    #[test]
444    fn test_mixed_literal_and_param() {
445        let arena = Arena::new();
446        let result = parse(
447            &arena,
448            "/v1/storage/workspace/{workspace}/documents/download/report-{documentId}.pdf",
449        )
450        .unwrap();
451
452        assert_matches!(
453            result.segments,
454            [
455                PathSegment::Literal("v1"),
456                PathSegment::Literal("storage"),
457                PathSegment::Literal("workspace"),
458                PathSegment::Templated([PathFragment::Param("workspace")]),
459                PathSegment::Literal("documents"),
460                PathSegment::Literal("download"),
461                PathSegment::Templated([
462                    PathFragment::Literal("report-"),
463                    PathFragment::Param("documentId"),
464                    PathFragment::Literal(".pdf"),
465                ]),
466            ],
467        );
468    }
469
470    #[test]
471    fn test_double_slash() {
472        let arena = Arena::new();
473        // Empty path segments aren't allowed.
474        assert!(parse(&arena, "/users//a").is_err());
475    }
476
477    #[test]
478    fn test_invalid_chars_in_template() {
479        let arena = Arena::new();
480        // Parameter names can contain any character except for
481        // `{` and `}`, per the `template-expression-param-name` terminal.
482        assert!(parse(&arena, "/users/{user/{id}}").is_err());
483    }
484
485    #[test]
486    fn test_path_with_single_query_param() {
487        let arena = Arena::new();
488        let result = parse(&arena, "/v1/messages?beta=true").unwrap();
489
490        assert_matches!(
491            result,
492            ParsedPath {
493                segments: [PathSegment::Literal("v1"), PathSegment::Literal("messages"),],
494                query: [PathQueryParameter {
495                    name: "beta",
496                    value: "true",
497                }],
498            },
499        );
500    }
501
502    #[test]
503    fn test_path_with_multiple_query_params() {
504        let arena = Arena::new();
505        let result = parse(&arena, "/v1/items?beta=true&version=2").unwrap();
506
507        assert_matches!(
508            result,
509            ParsedPath {
510                segments: [PathSegment::Literal("v1"), PathSegment::Literal("items")],
511                query: [
512                    PathQueryParameter {
513                        name: "beta",
514                        value: "true",
515                    },
516                    PathQueryParameter {
517                        name: "version",
518                        value: "2",
519                    },
520                ],
521            },
522        );
523    }
524
525    #[test]
526    fn test_path_with_template_and_query_param() {
527        let arena = Arena::new();
528        let result = parse(&arena, "/v1/models/{model_id}?beta=true").unwrap();
529
530        assert_matches!(
531            result,
532            ParsedPath {
533                segments: [
534                    PathSegment::Literal("v1"),
535                    PathSegment::Literal("models"),
536                    PathSegment::Templated([PathFragment::Param("model_id")]),
537                ],
538                query: [PathQueryParameter {
539                    name: "beta",
540                    value: "true",
541                }],
542            },
543        );
544    }
545
546    #[test]
547    fn test_display_preserves_path_params() {
548        let arena = Arena::new();
549        let result = parse(
550            &arena,
551            "/v1/storage/{workspace}/documents/report-{documentId}.pdf?beta=true&expand",
552        )
553        .unwrap();
554
555        assert_eq!(
556            result.to_string(),
557            "/v1/storage/{workspace}/documents/report-{documentId}.pdf?beta=true&expand="
558        );
559    }
560
561    #[test]
562    fn test_display_encodes_literals() {
563        let arena = Arena::new();
564        let result = parse(
565            &arena,
566            "/foo%20bar/a%2Fb?name=John%20Doe&filter=%7Bactive%7D",
567        )
568        .unwrap();
569
570        assert_eq!(
571            result.to_string(),
572            "/foo%20bar/a%2Fb?name=John+Doe&filter=%7Bactive%7D"
573        );
574    }
575
576    #[test]
577    fn test_path_with_valueless_query_param() {
578        let arena = Arena::new();
579        let result = parse(&arena, "/v1/items?beta").unwrap();
580
581        assert_matches!(
582            result,
583            ParsedPath {
584                segments: [PathSegment::Literal("v1"), PathSegment::Literal("items"),],
585                query: [PathQueryParameter {
586                    name: "beta",
587                    value: "",
588                }],
589            },
590        );
591    }
592
593    #[test]
594    fn test_path_with_trailing_question_mark() {
595        let arena = Arena::new();
596        let result = parse(&arena, "/foo?").unwrap();
597
598        assert_matches!(
599            result,
600            ParsedPath {
601                segments: [PathSegment::Literal("foo")],
602                query: [],
603            },
604        );
605    }
606
607    #[test]
608    fn test_path_with_percent_encoded_query_params() {
609        let arena = Arena::new();
610        let result = parse(&arena, "/foo?a%20b=c%20d").unwrap();
611
612        assert_matches!(
613            result,
614            ParsedPath {
615                segments: [PathSegment::Literal("foo")],
616                query: [PathQueryParameter {
617                    name: "a b",
618                    value: "c d",
619                }],
620            },
621        );
622    }
623
624    #[test]
625    fn test_root_path_with_query_param() {
626        let arena = Arena::new();
627        let result = parse(&arena, "/?beta=true").unwrap();
628
629        assert_matches!(
630            result,
631            ParsedPath {
632                segments: [PathSegment::Literal("")],
633                query: [PathQueryParameter {
634                    name: "beta",
635                    value: "true",
636                }],
637            },
638        );
639    }
640}