use std::fmt::{Display, Formatter, Result as FmtResult, Write};
use itertools::Itertools;
use miette::SourceSpan;
use percent_encoding::{AsciiSet, CONTROLS, PercentEncode, utf8_percent_encode};
use winnow::{
Parser, Stateful,
combinator::eof,
error::{ContextError, ParseError},
};
use crate::arena::Arena;
type Input<'a> = Stateful<&'a str, &'a Arena>;
pub fn parse<'a>(arena: &'a Arena, input: &'a str) -> Result<ParsedPath<'a>, BadPath> {
let stateful = Input {
input,
state: arena,
};
(self::parser::path, eof)
.map(|((segments, query), _)| ParsedPath {
segments: arena.alloc_slice_copy(&segments),
query: arena.alloc_slice_copy(&query),
})
.parse(stateful)
.map_err(BadPath::from_parse_error)
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub struct ParsedPath<'a> {
pub segments: &'a [PathSegment<'a>],
pub query: &'a [PathQueryParameter<'a>],
}
impl<'a> ParsedPath<'a> {
#[inline]
pub fn runs(&self) -> PathRuns<'_, 'a> {
PathRuns {
rest: self.segments,
}
}
}
impl Display for ParsedPath<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
for segment in self.segments {
f.write_char('/')?;
match segment {
PathSegment::Literal(text) => {
write!(f, "{}", path_percent_encode(text))?;
}
PathSegment::Templated(fragments) => {
fragments.iter().try_for_each(|fragment| match fragment {
PathFragment::Literal(text) => {
write!(f, "{}", path_percent_encode(text))
}
PathFragment::Param(name) => write!(f, "{{{name}}}"),
})?;
}
}
}
if !self.query.is_empty() {
let mut serializer = form_urlencoded::Serializer::new(String::new());
for param in self.query {
serializer.append_pair(param.name, param.value);
}
f.write_char('?')?;
f.write_str(&serializer.finish())?;
}
Ok(())
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub struct PathQueryParameter<'a> {
pub name: &'a str,
pub value: &'a str,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum PathSegment<'a> {
Literal(&'a str),
Templated(&'a [PathFragment<'a>]),
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum PathFragment<'input> {
Literal(&'input str),
Param(&'input str),
}
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub enum PathRun<'a> {
Literals(Vec<&'a str>),
Templated(&'a [PathFragment<'a>]),
}
#[derive(Clone, Copy, Debug)]
pub struct PathRuns<'path, 'input> {
rest: &'path [PathSegment<'input>],
}
impl<'path, 'input> Iterator for PathRuns<'path, 'input> {
type Item = PathRun<'input>;
fn next(&mut self) -> Option<Self::Item> {
match self.rest {
[] => None,
segments @ [PathSegment::Literal(_), ..] => {
let literals = segments
.iter()
.map_while(|segment| match segment {
&PathSegment::Literal(text) => Some(text),
PathSegment::Templated(_) => None,
})
.collect_vec();
self.rest = &self.rest[literals.len()..];
Some(PathRun::Literals(literals))
}
[PathSegment::Templated(fragments), tail @ ..] => {
self.rest = tail;
Some(PathRun::Templated(fragments))
}
}
}
}
mod parser {
use super::*;
use std::borrow::Cow;
use winnow::{
Parser,
combinator::{alt, delimited, opt, preceded, repeat},
token::take_while,
};
pub fn path<'a>(
input: &mut Input<'a>,
) -> winnow::Result<(Vec<PathSegment<'a>>, Vec<PathQueryParameter<'a>>)> {
let segments = template.parse_next(input)?;
let query = opt(preceded(
'?',
take_while(0.., is_query_char).map(|query: &str| {
form_urlencoded::parse(query.as_bytes())
.map(|(name, value)| PathQueryParameter {
name: match name {
Cow::Borrowed(name) => name,
Cow::Owned(name) => input.state.alloc_str(&name),
},
value: match value {
Cow::Borrowed(value) => value,
Cow::Owned(value) => input.state.alloc_str(&value),
},
})
.collect()
}),
))
.parse_next(input)?;
Ok((segments, query.unwrap_or_default()))
}
fn template<'a>(input: &mut Input<'a>) -> winnow::Result<Vec<PathSegment<'a>>> {
alt((
('/', segment, template)
.map(|(_, head, tail)| std::iter::once(head).chain(tail).collect()),
('/', segment).map(|(_, segment)| vec![segment]),
'/'.map(|_| vec![PathSegment::Literal("")]),
))
.parse_next(input)
}
fn segment<'a>(input: &mut Input<'a>) -> winnow::Result<PathSegment<'a>> {
repeat(1.., fragment)
.map(|fragments: Vec<_>| match &*fragments {
[PathFragment::Literal(text)] => PathSegment::Literal(text),
_ => PathSegment::Templated(input.state.alloc_slice_copy(&fragments)),
})
.parse_next(input)
}
fn fragment<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
alt((param, literal)).parse_next(input)
}
pub fn param<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
delimited('{', take_while(1.., |c| c != '{' && c != '}'), '}')
.map(PathFragment::Param)
.parse_next(input)
}
pub fn literal<'a>(input: &mut Input<'a>) -> winnow::Result<PathFragment<'a>> {
take_while(1.., is_path_char)
.verify_map(|text: &str| {
let decoded = percent_encoding::percent_decode_str(text)
.decode_utf8()
.ok()?;
Some(PathFragment::Literal(match decoded {
Cow::Borrowed(s) => s,
Cow::Owned(s) => input.state.alloc_str(&s),
}))
})
.parse_next(input)
}
fn is_path_char(c: char) -> bool {
is_query_char(c) && !matches!(c, '/' | '?' | '^' | '`' | '{' | '}')
}
fn is_query_char(c: char) -> bool {
!matches!(
c,
'\x00'..='\x1f' | ('\x7f'..) | ' ' | '"' | '#' | '<' | '>'
)
}
}
#[derive(Debug, miette::Diagnostic, thiserror::Error)]
#[error("invalid URL path template")]
pub struct BadPath {
#[source_code]
code: String,
#[label]
span: SourceSpan,
}
impl BadPath {
fn from_parse_error(error: ParseError<Input<'_>, ContextError>) -> Self {
let stateful = error.input();
Self {
code: stateful.input.to_owned(),
span: error.char_span().into(),
}
}
}
fn path_percent_encode(text: &str) -> PercentEncode<'_> {
const PATH_SEGMENT_PERCENT_ENCODE_SET: &AsciiSet = &CONTROLS
.add(b' ')
.add(b'"')
.add(b'#')
.add(b'<')
.add(b'>')
.add(b'?')
.add(b'^')
.add(b'`')
.add(b'{')
.add(b'}')
.add(b'/')
.add(b'%');
utf8_percent_encode(text, PATH_SEGMENT_PERCENT_ENCODE_SET)
}
#[cfg(test)]
mod test {
use super::*;
use crate::tests::assert_matches;
#[test]
fn test_root_path() {
let arena = Arena::new();
let result = parse(&arena, "/").unwrap();
assert_matches!(result.segments, [PathSegment::Literal("")]);
assert!(result.query.is_empty());
}
#[test]
fn test_simple_literal() {
let arena = Arena::new();
let result = parse(&arena, "/users").unwrap();
assert_matches!(result.segments, [PathSegment::Literal("users")],);
}
#[test]
fn test_trailing_slash() {
let arena = Arena::new();
let result = parse(&arena, "/users/").unwrap();
assert_matches!(
result.segments,
[PathSegment::Literal("users"), PathSegment::Literal(""),],
);
}
#[test]
fn test_simple_template() {
let arena = Arena::new();
let result = parse(&arena, "/users/{userId}").unwrap();
assert_matches!(
result.segments,
[
PathSegment::Literal("users"),
PathSegment::Templated([PathFragment::Param("userId")]),
],
);
}
#[test]
fn test_nested_path() {
let arena = Arena::new();
let result = parse(&arena, "/api/v1/resources/{resourceId}").unwrap();
assert_matches!(
result.segments,
[
PathSegment::Literal("api"),
PathSegment::Literal("v1"),
PathSegment::Literal("resources"),
PathSegment::Templated([PathFragment::Param("resourceId")]),
],
);
}
#[test]
fn test_multiple_templates() {
let arena = Arena::new();
let result = parse(&arena, "/users/{userId}/posts/{postId}").unwrap();
assert_matches!(
result.segments,
[
PathSegment::Literal("users"),
PathSegment::Templated([PathFragment::Param("userId")]),
PathSegment::Literal("posts"),
PathSegment::Templated([PathFragment::Param("postId")]),
],
);
}
#[test]
fn test_literal_with_extension() {
let arena = Arena::new();
let result = parse(
&arena,
"/v1/storage/workspace/{workspace}/documents/download/{documentId}.pdf",
)
.unwrap();
assert_matches!(
result.segments,
[
PathSegment::Literal("v1"),
PathSegment::Literal("storage"),
PathSegment::Literal("workspace"),
PathSegment::Templated([PathFragment::Param("workspace")]),
PathSegment::Literal("documents"),
PathSegment::Literal("download"),
PathSegment::Templated([
PathFragment::Param("documentId"),
PathFragment::Literal(".pdf"),
]),
],
);
}
#[test]
fn test_runs_coalesce_literals() {
let arena = Arena::new();
let result = parse(
&arena,
"/v1/storage/workspace/{workspace}/documents/download/{documentId}.pdf",
)
.unwrap();
let mut runs = result.runs();
assert_eq!(
runs.next(),
Some(PathRun::Literals(vec!["v1", "storage", "workspace"])),
);
assert_matches!(
runs.next(),
Some(PathRun::Templated([PathFragment::Param("workspace")])),
);
assert_eq!(
runs.next(),
Some(PathRun::Literals(vec!["documents", "download"])),
);
assert_matches!(
runs.next(),
Some(PathRun::Templated([
PathFragment::Param("documentId"),
PathFragment::Literal(".pdf"),
])),
);
assert_matches!(runs.next(), None);
}
#[test]
fn test_runs_empty_segments() {
let arena = Arena::new();
let result = parse(&arena, "/users/").unwrap();
let mut runs = result.runs();
assert_eq!(runs.next(), Some(PathRun::Literals(vec!["users", ""])));
assert_matches!(runs.next(), None);
}
#[test]
fn test_mixed_literal_and_param() {
let arena = Arena::new();
let result = parse(
&arena,
"/v1/storage/workspace/{workspace}/documents/download/report-{documentId}.pdf",
)
.unwrap();
assert_matches!(
result.segments,
[
PathSegment::Literal("v1"),
PathSegment::Literal("storage"),
PathSegment::Literal("workspace"),
PathSegment::Templated([PathFragment::Param("workspace")]),
PathSegment::Literal("documents"),
PathSegment::Literal("download"),
PathSegment::Templated([
PathFragment::Literal("report-"),
PathFragment::Param("documentId"),
PathFragment::Literal(".pdf"),
]),
],
);
}
#[test]
fn test_double_slash() {
let arena = Arena::new();
assert!(parse(&arena, "/users//a").is_err());
}
#[test]
fn test_invalid_chars_in_template() {
let arena = Arena::new();
assert!(parse(&arena, "/users/{user/{id}}").is_err());
}
#[test]
fn test_path_with_single_query_param() {
let arena = Arena::new();
let result = parse(&arena, "/v1/messages?beta=true").unwrap();
assert_matches!(
result,
ParsedPath {
segments: [PathSegment::Literal("v1"), PathSegment::Literal("messages"),],
query: [PathQueryParameter {
name: "beta",
value: "true",
}],
},
);
}
#[test]
fn test_path_with_multiple_query_params() {
let arena = Arena::new();
let result = parse(&arena, "/v1/items?beta=true&version=2").unwrap();
assert_matches!(
result,
ParsedPath {
segments: [PathSegment::Literal("v1"), PathSegment::Literal("items")],
query: [
PathQueryParameter {
name: "beta",
value: "true",
},
PathQueryParameter {
name: "version",
value: "2",
},
],
},
);
}
#[test]
fn test_path_with_template_and_query_param() {
let arena = Arena::new();
let result = parse(&arena, "/v1/models/{model_id}?beta=true").unwrap();
assert_matches!(
result,
ParsedPath {
segments: [
PathSegment::Literal("v1"),
PathSegment::Literal("models"),
PathSegment::Templated([PathFragment::Param("model_id")]),
],
query: [PathQueryParameter {
name: "beta",
value: "true",
}],
},
);
}
#[test]
fn test_display_preserves_path_params() {
let arena = Arena::new();
let result = parse(
&arena,
"/v1/storage/{workspace}/documents/report-{documentId}.pdf?beta=true&expand",
)
.unwrap();
assert_eq!(
result.to_string(),
"/v1/storage/{workspace}/documents/report-{documentId}.pdf?beta=true&expand="
);
}
#[test]
fn test_display_encodes_literals() {
let arena = Arena::new();
let result = parse(
&arena,
"/foo%20bar/a%2Fb?name=John%20Doe&filter=%7Bactive%7D",
)
.unwrap();
assert_eq!(
result.to_string(),
"/foo%20bar/a%2Fb?name=John+Doe&filter=%7Bactive%7D"
);
}
#[test]
fn test_path_with_valueless_query_param() {
let arena = Arena::new();
let result = parse(&arena, "/v1/items?beta").unwrap();
assert_matches!(
result,
ParsedPath {
segments: [PathSegment::Literal("v1"), PathSegment::Literal("items"),],
query: [PathQueryParameter {
name: "beta",
value: "",
}],
},
);
}
#[test]
fn test_path_with_trailing_question_mark() {
let arena = Arena::new();
let result = parse(&arena, "/foo?").unwrap();
assert_matches!(
result,
ParsedPath {
segments: [PathSegment::Literal("foo")],
query: [],
},
);
}
#[test]
fn test_path_with_percent_encoded_query_params() {
let arena = Arena::new();
let result = parse(&arena, "/foo?a%20b=c%20d").unwrap();
assert_matches!(
result,
ParsedPath {
segments: [PathSegment::Literal("foo")],
query: [PathQueryParameter {
name: "a b",
value: "c d",
}],
},
);
}
#[test]
fn test_root_path_with_query_param() {
let arena = Arena::new();
let result = parse(&arena, "/?beta=true").unwrap();
assert_matches!(
result,
ParsedPath {
segments: [PathSegment::Literal("")],
query: [PathQueryParameter {
name: "beta",
value: "true",
}],
},
);
}
}