uri_template_ex/
lib.rs

1use parse_display::Display;
2use regex::{Regex, escape};
3use std::fmt::Write;
4use std::ops::Range;
5use std::str::{self, CharIndices};
6use std::sync::LazyLock;
7use std::{borrow::Cow, fmt};
8
9mod vars;
10
11mod tests_readme;
12
13pub use vars::Vars;
14
15/// RFC6570 Level 2
16#[derive(Clone)]
17pub struct UriTemplate {
18    source: String,
19    segments: Vec<Segment>,
20    exprs: Vec<Expr>,
21    regex: Regex,
22}
23impl std::fmt::Debug for UriTemplate {
24    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
25        write!(f, "\"{}\"", self.source)
26    }
27}
28impl fmt::Display for UriTemplate {
29    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
30        write!(f, "{}", self.source)
31    }
32}
33
34#[derive(Debug, Clone)]
35enum Segment {
36    Literals { len: usize },
37    LiteralsNeedEncode { len: usize },
38    Expr,
39}
40impl Segment {
41    fn expand(
42        &self,
43        source: &str,
44        source_index: &mut usize,
45        exprs: &[Expr],
46        expr_index: &mut usize,
47        vars: &mut impl Vars,
48        out: &mut String,
49    ) {
50        match self {
51            Segment::Literals { len } => {
52                out.push_str(&source[*source_index..*source_index + len]);
53                *source_index += len;
54            }
55            Segment::LiteralsNeedEncode { len } => {
56                for c in source[*source_index..*source_index + len].chars() {
57                    encode_char(c, out);
58                }
59                *source_index += len;
60            }
61            Segment::Expr => {
62                let expr = &exprs[*expr_index];
63                expr.expand(source, *expr_index, vars, out);
64                *source_index += expr.len();
65                *expr_index += 1;
66            }
67        }
68    }
69}
70
71#[derive(Debug, Clone)]
72struct Expr {
73    op: Option<Operator>,
74    var_name_range: Range<usize>,
75}
76impl Expr {
77    fn len(&self) -> usize {
78        self.var_name_range.len() + 2 + if self.op.is_some() { 1 } else { 0 }
79    }
80    fn to_regex(&self) -> String {
81        match self.op {
82            Some(op) => {
83                let prefix = escape(op.to_prefix());
84                format!("(?:{prefix}([{RE_UNRESERVED}{RE_RESERVED}%]*))?",)
85            }
86            None => format!("([{RE_UNRESERVED}%]*)",),
87        }
88    }
89    fn expand(&self, source: &str, expr_index: usize, vars: &mut impl Vars, out: &mut String) {
90        let var_name = &source[self.var_name_range.clone()];
91        let var = vars.var(expr_index, var_name);
92        let Some(var) = var else {
93            return;
94        };
95        match self.op {
96            Some(op) => {
97                out.push_str(op.to_prefix());
98                encode_str_url(&var, out);
99            }
100            None => {
101                encode_str_unresreved(&var, out);
102            }
103        }
104    }
105}
106
107#[derive(Debug, PartialEq, Clone, Copy)]
108enum Operator {
109    /// `+`
110    Reserved,
111    /// `#`
112    Fragment,
113}
114impl Operator {
115    fn from_char(c: char) -> Option<Self> {
116        match c {
117            '+' => Some(Self::Reserved),
118            '#' => Some(Self::Fragment),
119            _ => None,
120        }
121    }
122    fn to_prefix(self) -> &'static str {
123        match self {
124            Self::Reserved => "",
125            Self::Fragment => "#",
126        }
127    }
128}
129
130impl UriTemplate {
131    pub fn new(s: &str) -> Result<Self> {
132        let mut segments = Vec::new();
133        let mut exprs = Vec::new();
134        let mut iter = DecodedIter::new(s);
135        let mut current = iter.next();
136        let mut re = String::from("^");
137
138        'root: while let Some(d) = current {
139            match d {
140                Decoded::Char { index, ch: '{' } => {
141                    let var_start = index;
142                    let mut op = None;
143                    current = iter.next();
144                    if let Some(d) = current {
145                        if let Some(ch) = d.ch() {
146                            op = Operator::from_char(ch);
147                        }
148                        if op.is_some() {
149                            current = iter.next();
150                        }
151                    }
152                    if let Some(d) = current {
153                        let var_name_start = d.index();
154                        while let Some(d) = current {
155                            if d.ch() == Some('}') {
156                                let expr = Expr {
157                                    op,
158                                    var_name_range: var_name_start..d.index(),
159                                };
160                                re.push_str(&expr.to_regex());
161                                exprs.push(expr);
162                                segments.push(Segment::Expr);
163                                current = iter.next();
164                                continue 'root;
165                            }
166                            current = iter.next();
167                        }
168                        return Err(Error {
169                            source: s.to_string(),
170                            kind: ErrorKind::InvalidExpression,
171                            source_index: var_start,
172                        });
173                    }
174                }
175                Decoded::Char { ch, .. } => {
176                    let len = ch.len_utf8();
177                    if is_reserved(ch) || is_unreserved(ch) {
178                        segments.push(Segment::Literals { len: 1 });
179                        re.push_str(&escape(&ch.to_string()));
180                    } else {
181                        segments.push(Segment::LiteralsNeedEncode { len });
182                        let mut s0 = String::new();
183                        encode_char(ch, &mut s0);
184                        re.push_str(&escape(&s0));
185                    }
186                }
187                Decoded::Byte { s, .. } => {
188                    segments.push(Segment::Literals { len: s.len() });
189                    re.push_str(&escape(s));
190                }
191            }
192            current = iter.next();
193        }
194        re.push('$');
195        Ok(Self {
196            source: s.to_string(),
197            segments,
198            exprs,
199            regex: Regex::new(&re).unwrap(),
200        })
201    }
202
203    pub fn expand(&self, mut vars: impl Vars) -> String {
204        let mut out = String::new();
205        let mut expr_index = 0;
206        let mut source_index = 0;
207        for segment in &self.segments {
208            segment.expand(
209                &self.source,
210                &mut source_index,
211                &self.exprs,
212                &mut expr_index,
213                &mut vars,
214                &mut out,
215            );
216        }
217        out
218    }
219    pub fn captures<'a>(&'a self, input: &'a str) -> Option<Captures<'a>> {
220        let captures = self.regex.captures(input)?;
221        let mut ms = Vec::with_capacity(self.exprs.len());
222        for (expr_index, expr) in self.exprs.iter().enumerate() {
223            if let Some(m) = captures.get(expr_index + 1) {
224                ms.push(Some(Match::new(m, self.var_name(expr_index), expr.op)));
225            } else {
226                ms.push(None);
227            }
228        }
229        Some(Captures { template: self, ms })
230    }
231    fn var_name(&self, index: usize) -> &str {
232        &self.source[self.exprs[index].var_name_range.clone()]
233    }
234
235    pub fn var_names(&self) -> impl Iterator<Item = &str> {
236        (0..self.exprs.len()).map(|i| self.var_name(i))
237    }
238    pub fn find_var_name(&self, name: &str) -> Option<usize> {
239        (0..self.exprs.len()).find(|&i| self.var_name(i) == name)
240    }
241}
242
243fn is_unreserved(c: char) -> bool {
244    matches!(c, 'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '.' | '_' | '~')
245}
246const RE_UNRESERVED: &str = r"A-Za-z0-9\-._~";
247
248fn is_reserved(c: char) -> bool {
249    matches!(
250        c,
251        ':' | '/'
252            | '?'
253            | '#'
254            | '['
255            | ']'
256            | '@'
257            | '!'
258            | '$'
259            | '&'
260            | '\'' // https://www.rfc-editor.org/errata/eid6937
261            | '('
262            | ')'
263            | '*'
264            | '+'
265            | ','
266            | ';'
267            | '='
268    )
269}
270const RE_RESERVED: &str = r":/?#\[\]@!$&'()*+,;=";
271
272fn encode_char(ch: char, out: &mut String) {
273    for b in ch.encode_utf8(&mut [0; 4]).as_bytes() {
274        write!(out, "%{b:02X}").unwrap();
275    }
276}
277fn encode_str_unresreved(s: &str, out: &mut String) {
278    for ch in s.chars() {
279        if is_unreserved(ch) {
280            out.push(ch);
281        } else {
282            encode_char(ch, out);
283        }
284    }
285}
286fn encode_str_url(s: &str, out: &mut String) {
287    let iter = DecodedIter::new(s);
288    for d in iter {
289        match d {
290            Decoded::Char { ch, .. } => {
291                if is_unreserved(ch) || is_reserved(ch) {
292                    out.push(ch);
293                } else {
294                    encode_char(ch, out);
295                }
296            }
297            Decoded::Byte { s, .. } => {
298                out.push_str(s);
299            }
300        }
301    }
302}
303
304struct Decoder<'a> {
305    source: &'a str,
306    source_index: usize,
307    out: String,
308    bytes: Vec<u8>,
309}
310impl<'a> Decoder<'a> {
311    fn new(source: &'a str, source_index: usize) -> Self {
312        Self {
313            source,
314            source_index,
315            out: String::new(),
316            bytes: Vec::new(),
317        }
318    }
319    fn push_char(&mut self, ch: char) -> Result<()> {
320        self.commit_bytes()?;
321        self.source_index += ch.len_utf8();
322        self.out.push(ch);
323        Ok(())
324    }
325    fn push_byte(&mut self, b: u8) {
326        self.bytes.push(b);
327    }
328    fn commit_bytes(&mut self) -> Result<()> {
329        for check in self.bytes.utf8_chunks() {
330            let valid = check.valid();
331            self.source_index += valid.len() * 3;
332            self.out.push_str(valid);
333            if !check.invalid().is_empty() {
334                return Err(Error::new(
335                    self.source,
336                    self.source_index,
337                    ErrorKind::InvalidUtf8,
338                ));
339            }
340        }
341        self.bytes.clear();
342        Ok(())
343    }
344    fn build(mut self) -> Result<String> {
345        self.commit_bytes()?;
346        Ok(self.out)
347    }
348}
349
350fn decode_str(s: &str, source_index: usize) -> Result<String> {
351    let mut out = Decoder::new(s, source_index);
352    for d in DecodedIter::new(s) {
353        match d {
354            Decoded::Char { ch, .. } => {
355                out.push_char(ch)?;
356            }
357            Decoded::Byte { b, .. } => {
358                out.push_byte(b);
359            }
360        }
361    }
362    out.build()
363}
364
365fn to_u8(c: char) -> Option<u8> {
366    match c {
367        '0'..='9' => Some(c as u8 - b'0'),
368        'a'..='f' => Some(c as u8 - b'a' + 10),
369        'A'..='F' => Some(c as u8 - b'A' + 10),
370        _ => None,
371    }
372}
373
374#[derive(Clone, Copy)]
375enum Decoded<'a> {
376    Char { index: usize, ch: char },
377    Byte { index: usize, b: u8, s: &'a str },
378}
379impl Decoded<'_> {
380    fn ch(&self) -> Option<char> {
381        match self {
382            Self::Char { ch, .. } => Some(*ch),
383            Self::Byte { .. } => None,
384        }
385    }
386    fn index(&self) -> usize {
387        match self {
388            Self::Char { index, .. } => *index,
389            Self::Byte { index, .. } => *index,
390        }
391    }
392}
393#[derive(Clone)]
394struct DecodedIter<'a> {
395    source: &'a str,
396    chars_indices: CharIndices<'a>,
397}
398impl<'a> DecodedIter<'a> {
399    fn new(source: &'a str) -> Self {
400        Self {
401            source,
402            chars_indices: source.char_indices(),
403        }
404    }
405}
406impl<'a> Iterator for DecodedIter<'a> {
407    type Item = Decoded<'a>;
408
409    fn next(&mut self) -> Option<Self::Item> {
410        let (index, ch) = self.chars_indices.next()?;
411        if ch == '%' {
412            let this = self.clone();
413            if let Some(b) = next_decoded_u8(&mut self.chars_indices) {
414                Some(Decoded::Byte {
415                    index,
416                    b,
417                    s: &self.source[index..][..3],
418                })
419            } else {
420                *self = this;
421                Some(Decoded::Char { index, ch: '%' })
422            }
423        } else {
424            Some(Decoded::Char { index, ch })
425        }
426    }
427}
428
429fn next_decoded_u8(chars_indices: &mut CharIndices) -> Option<u8> {
430    let c0 = next_hex(chars_indices)?;
431    let c1 = next_hex(chars_indices)?;
432    Some(c0 * 16 + c1)
433}
434fn next_hex(chars_indices: &mut CharIndices) -> Option<u8> {
435    let (_, c) = chars_indices.next()?;
436    to_u8(c)
437}
438
439#[derive(Debug)]
440pub struct Captures<'a> {
441    template: &'a UriTemplate,
442    ms: Vec<Option<Match<'a>>>,
443}
444
445impl Captures<'_> {
446    pub fn empty() -> Self {
447        static DUMMY_TEMPLATE: LazyLock<UriTemplate> =
448            LazyLock::new(|| UriTemplate::new("").unwrap());
449        Self {
450            template: &DUMMY_TEMPLATE,
451            ms: Vec::new(),
452        }
453    }
454
455    pub fn name(&self, name: &str) -> Option<&Match> {
456        for (expr, m) in self.template.exprs.iter().zip(&self.ms) {
457            if &self.template.source[expr.var_name_range.clone()] == name {
458                return m.as_ref();
459            }
460        }
461        None
462    }
463    pub fn get(&self, i: usize) -> Option<&Match> {
464        self.ms.get(i)?.as_ref()
465    }
466    pub fn len(&self) -> usize {
467        self.ms.len()
468    }
469    pub fn is_empty(&self) -> bool {
470        self.ms.is_empty()
471    }
472    pub fn iter(&self) -> impl Iterator<Item = (&str, Option<&Match>)> {
473        (0..self.ms.len()).map(|i| (self.template.var_name(i), self.get(i)))
474    }
475}
476
477#[derive(Debug)]
478pub struct Match<'a> {
479    m: regex::Match<'a>,
480    name: &'a str,
481    op: Option<Operator>,
482}
483impl<'a> Match<'a> {
484    fn new(m: regex::Match<'a>, name: &'a str, op: Option<Operator>) -> Self {
485        Self { m, name, op }
486    }
487    pub fn name(&self) -> &str {
488        self.name
489    }
490    pub fn value(&self) -> Result<Cow<str>> {
491        match self.op {
492            None => Ok(Cow::Owned(decode_str(self.m.as_str(), 0)?)),
493            Some(Operator::Reserved | Operator::Fragment) => Ok(Cow::Borrowed(self.source())),
494        }
495    }
496    pub fn source(&self) -> &str {
497        self.m.as_str()
498    }
499    pub fn start(&self) -> usize {
500        self.m.start()
501    }
502    pub fn end(&self) -> usize {
503        self.m.end()
504    }
505}
506
507pub type Result<T> = std::result::Result<T, Error>;
508
509#[derive(Clone, Copy, Debug, Display)]
510enum ErrorKind {
511    InvalidExpression,
512    InvalidUtf8,
513}
514
515#[derive(Clone, Debug)]
516pub struct Error {
517    source: String,
518    source_index: usize,
519    kind: ErrorKind,
520}
521
522impl Error {
523    fn new(source: &str, source_index: usize, kind: ErrorKind) -> Self {
524        Self {
525            source: source.to_string(),
526            source_index,
527            kind,
528        }
529    }
530}
531impl fmt::Display for Error {
532    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
533        writeln!(
534            f,
535            "{} (\"{} >>>> {}\")",
536            self.kind,
537            &self.source[..self.source_index],
538            &self.source[self.source_index..],
539        )
540    }
541}
542
543impl std::error::Error for Error {}