iri-string 0.7.0

IRI as string types
Documentation
//! Validating parsers.

use crate::parser::str::{
    find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, starts_with_double_hexdigits,
};
use crate::template::components::MaybeOperator;
use crate::template::error::{Error, ErrorKind};

use crate::template::parser::char::{
    is_ascii_literal_char, is_ascii_varchar_continue, is_ascii_varchar_start,
};

/// Returns `Ok(())` if the given string is a valid literal.
fn validate_literal(s: &str, offset: usize) -> Result<(), Error> {
    match s
        .chars()
        .position(|c| !c.is_ascii() || !is_ascii_literal_char(c as u8))
    {
        Some(pos) => Err(Error::new(ErrorKind::InvalidCharacter, offset + pos)),
        None => Ok(()),
    }
}

/// Returns `Ok(())` if the given string is a valid varspec.
fn validate_varspec(s: &str, offset: usize) -> Result<(), Error> {
    match find_split2_hole(s, b':', b'*') {
        Some((maybe_varname, b':', maybe_len)) => {
            validate_varname(maybe_varname, offset)?;
            if !(1..=5).contains(&maybe_len.len()) {
                return Err(Error::new(
                    ErrorKind::InvalidExpression,
                    offset + maybe_varname.len() + 2,
                ));
            }
            if let Some(pos) = maybe_len.bytes().position(|b| !b.is_ascii_digit()) {
                return Err(Error::new(
                    ErrorKind::InvalidExpression,
                    offset + maybe_varname.len() + 2 + pos,
                ));
            }
        }
        Some((maybe_varname, b'*', extra)) => {
            validate_varname(maybe_varname, offset)?;
            if !extra.is_empty() {
                return Err(Error::new(
                    ErrorKind::InvalidExpression,
                    offset + maybe_varname.len() + 1,
                ));
            }
        }
        Some((_, sep, _)) => unreachable!("[consistency] the byte {sep:#02x} is not searched"),
        None => validate_varname(s, offset)?,
    }
    Ok(())
}

/// Returns `Ok(())` if the given string is a valid varname.
fn validate_varname(s: &str, offset: usize) -> Result<(), Error> {
    let rest = match s.as_bytes().first() {
        Some(b'%') if starts_with_double_hexdigits(&s.as_bytes()[1..]) => &s[3..],
        Some(b) if b.is_ascii() && is_ascii_varchar_start(*b) => &s[1..],
        _ => return Err(Error::new(ErrorKind::InvalidExpression, offset)),
    };
    let is_valid = satisfy_chars_with_pct_encoded(rest, is_ascii_varchar_continue, |_| false);
    if !is_valid {
        return Err(Error::new(ErrorKind::InvalidExpression, offset));
    }
    Ok(())
}

/// Returns `Ok(())` if the given string is a valid expression.
///
/// "Expression" here is the expression body inside `{` and `}`, but not including braces.
fn validate_expr_body(s: &str, mut offset: usize) -> Result<(), Error> {
    if s.is_empty() {
        return Err(Error::new(ErrorKind::InvalidExpression, offset));
    }

    // Skip the operator.
    let maybe_variable_list = match MaybeOperator::from_byte(s.as_bytes()[0]) {
        Some(MaybeOperator::Operator(_)) => {
            offset += 1;
            &s[1..]
        }
        Some(MaybeOperator::Reserved(_)) => {
            return Err(Error::new(ErrorKind::UnsupportedOperator, offset));
        }
        None => s,
    };

    // Validate varspecs.
    for (spec_i, maybe_varspec) in maybe_variable_list.split(',').enumerate() {
        if spec_i != 0 {
            // Add the length of the leading separator `,`.
            offset += 1;
        }
        validate_varspec(maybe_varspec, offset)?;
        offset += maybe_varspec.len();
    }

    Ok(())
}

/// Validates whether the given string is valid as a URI template.
///
/// Returns `Ok(())` if the given string is a valid URI template.
pub(in crate::template) fn validate_template_str(s: &str) -> Result<(), Error> {
    let mut rest = s;
    let mut offset = 0;
    while !rest.is_empty() {
        rest = match find_split2_hole(rest, b'%', b'{') {
            Some((literal, b'%', xdigits2_and_rest)) => {
                validate_literal(literal, offset)?;

                if xdigits2_and_rest.len() < 2 {
                    return Err(Error::new(
                        ErrorKind::InvalidPercentEncoding,
                        offset + literal.len(),
                    ));
                }
                let (xdigits2, new_rest) = xdigits2_and_rest.split_at(2);
                if !xdigits2.as_bytes()[0].is_ascii_hexdigit() {
                    return Err(Error::new(
                        ErrorKind::InvalidPercentEncoding,
                        offset + literal.len() + 1,
                    ));
                }
                if !xdigits2.as_bytes()[1].is_ascii_hexdigit() {
                    return Err(Error::new(
                        ErrorKind::InvalidPercentEncoding,
                        offset + literal.len() + 2,
                    ));
                }
                new_rest
            }
            Some((literal, b'{', expr_and_rest)) => {
                validate_literal(literal, offset)?;

                let (expr, new_rest) = match find_split_hole(expr_and_rest, b'}') {
                    Some(v) => v,
                    None => {
                        return Err(Error::new(
                            ErrorKind::ExpressionNotClosed,
                            offset + literal.len(),
                        ))
                    }
                };

                // +1 is `+ "{".len()`.
                validate_expr_body(expr, offset + literal.len() + 1)?;

                new_rest
            }
            Some(_) => unreachable!("[consistency] searching only `%` and `{{`"),
            None => return validate_literal(rest, offset),
        };
        offset = s.len() - rest.len();
    }

    Ok(())
}