iri_string/template/parser/
validate.rs

1//! Validating parsers.
2
3use crate::parser::str::{
4    find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, starts_with_double_hexdigits,
5};
6use crate::template::components::MaybeOperator;
7use crate::template::error::{Error, ErrorKind};
8
9use crate::template::parser::char::{
10    is_ascii_literal_char, is_ascii_varchar_continue, is_ascii_varchar_start,
11};
12
13/// Returns `Ok(())` if the given string is a valid literal.
14fn validate_literal(s: &str, offset: usize) -> Result<(), Error> {
15    match s
16        .chars()
17        .position(|c| !c.is_ascii() || !is_ascii_literal_char(c as u8))
18    {
19        Some(pos) => Err(Error::new(ErrorKind::InvalidCharacter, offset + pos)),
20        None => Ok(()),
21    }
22}
23
24/// Returns `Ok(())` if the given string is a valid varspec.
25fn validate_varspec(s: &str, offset: usize) -> Result<(), Error> {
26    match find_split2_hole(s, b':', b'*') {
27        Some((maybe_varname, b':', maybe_len)) => {
28            validate_varname(maybe_varname, offset)?;
29            if !(1..=5).contains(&maybe_len.len()) {
30                return Err(Error::new(
31                    ErrorKind::InvalidExpression,
32                    offset + maybe_varname.len() + 2,
33                ));
34            }
35            if let Some(pos) = maybe_len.bytes().position(|b| !b.is_ascii_digit()) {
36                return Err(Error::new(
37                    ErrorKind::InvalidExpression,
38                    offset + maybe_varname.len() + 2 + pos,
39                ));
40            }
41        }
42        Some((maybe_varname, b'*', extra)) => {
43            validate_varname(maybe_varname, offset)?;
44            if !extra.is_empty() {
45                return Err(Error::new(
46                    ErrorKind::InvalidExpression,
47                    offset + maybe_varname.len() + 1,
48                ));
49            }
50        }
51        Some((_, sep, _)) => unreachable!("[consistency] the byte {sep:#02x} is not searched"),
52        None => validate_varname(s, offset)?,
53    }
54    Ok(())
55}
56
57/// Returns `Ok(())` if the given string is a valid varname.
58pub(crate) fn validate_varname(s: &str, offset: usize) -> Result<(), Error> {
59    let rest = match s.as_bytes().first() {
60        Some(b'%') if starts_with_double_hexdigits(&s.as_bytes()[1..]) => &s[3..],
61        Some(b) if b.is_ascii() && is_ascii_varchar_start(*b) => &s[1..],
62        _ => return Err(Error::new(ErrorKind::InvalidExpression, offset)),
63    };
64    let is_valid = satisfy_chars_with_pct_encoded(rest, is_ascii_varchar_continue, |_| false);
65    if !is_valid {
66        return Err(Error::new(ErrorKind::InvalidExpression, offset));
67    }
68    Ok(())
69}
70
71/// Returns `Ok(())` if the given string is a valid expression.
72///
73/// "Expression" here is the expression body inside `{` and `}`, but not including braces.
74fn validate_expr_body(s: &str, mut offset: usize) -> Result<(), Error> {
75    if s.is_empty() {
76        return Err(Error::new(ErrorKind::InvalidExpression, offset));
77    }
78
79    // Skip the operator.
80    let maybe_variable_list = match MaybeOperator::from_byte(s.as_bytes()[0]) {
81        Some(MaybeOperator::Operator(_)) => {
82            offset += 1;
83            &s[1..]
84        }
85        Some(MaybeOperator::Reserved(_)) => {
86            return Err(Error::new(ErrorKind::UnsupportedOperator, offset));
87        }
88        None => s,
89    };
90
91    // Validate varspecs.
92    for (spec_i, maybe_varspec) in maybe_variable_list.split(',').enumerate() {
93        if spec_i != 0 {
94            // Add the length of the leading separator `,`.
95            offset += 1;
96        }
97        validate_varspec(maybe_varspec, offset)?;
98        offset += maybe_varspec.len();
99    }
100
101    Ok(())
102}
103
104/// Validates whether the given string is valid as a URI template.
105///
106/// Returns `Ok(())` if the given string is a valid URI template.
107pub(in crate::template) fn validate_template_str(s: &str) -> Result<(), Error> {
108    let mut rest = s;
109    let mut offset = 0;
110    while !rest.is_empty() {
111        rest = match find_split2_hole(rest, b'%', b'{') {
112            Some((literal, b'%', xdigits2_and_rest)) => {
113                validate_literal(literal, offset)?;
114
115                if xdigits2_and_rest.len() < 2 {
116                    return Err(Error::new(
117                        ErrorKind::InvalidPercentEncoding,
118                        offset + literal.len(),
119                    ));
120                }
121                let (xdigits2, new_rest) = xdigits2_and_rest.split_at(2);
122                if !xdigits2.as_bytes()[0].is_ascii_hexdigit() {
123                    return Err(Error::new(
124                        ErrorKind::InvalidPercentEncoding,
125                        offset + literal.len() + 1,
126                    ));
127                }
128                if !xdigits2.as_bytes()[1].is_ascii_hexdigit() {
129                    return Err(Error::new(
130                        ErrorKind::InvalidPercentEncoding,
131                        offset + literal.len() + 2,
132                    ));
133                }
134                new_rest
135            }
136            Some((literal, b'{', expr_and_rest)) => {
137                validate_literal(literal, offset)?;
138
139                let (expr, new_rest) = match find_split_hole(expr_and_rest, b'}') {
140                    Some(v) => v,
141                    None => {
142                        return Err(Error::new(
143                            ErrorKind::ExpressionNotClosed,
144                            offset + literal.len(),
145                        ))
146                    }
147                };
148
149                // +1 is `+ "{".len()`.
150                validate_expr_body(expr, offset + literal.len() + 1)?;
151
152                new_rest
153            }
154            Some(_) => unreachable!("[consistency] searching only `%` and `{{`"),
155            None => return validate_literal(rest, offset),
156        };
157        offset = s.len() - rest.len();
158    }
159
160    Ok(())
161}