vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
//! Percent-decoding and parser helpers shared by security detectors.

use super::bytes::{is_base64url_char, lower_ascii};

/// Convert an ASCII hex nibble to its numeric value.
#[must_use]
pub fn hex_nibble(byte: u8) -> Option<u8> {
    match byte {
        b'0'..=b'9' => Some(byte - b'0'),
        b'a'..=b'f' => Some(byte - b'a' + 10),
        b'A'..=b'F' => Some(byte - b'A' + 10),
        _ => None,
    }
}

/// Percent-decode bytes and lowercase ASCII for rule matching.
#[must_use]
pub fn percent_decode_lower(input: &[u8]) -> Vec<u8> {
    let mut out = Vec::with_capacity(input.len());
    let mut index = 0usize;
    while index < input.len() {
        if input[index] == b'%' && index + 2 < input.len() {
            if let (Some(hi), Some(lo)) =
                (hex_nibble(input[index + 1]), hex_nibble(input[index + 2]))
            {
                out.push(lower_ascii((hi << 4) | lo));
                index += 3;
                continue;
            }
        }
        out.push(lower_ascii(if input[index] == b'+' {
            b' '
        } else {
            input[index]
        }));
        index += 1;
    }
    out
}

/// Parse one decimal IPv4 octet.
#[must_use]
pub fn parse_octet(input: &[u8]) -> Option<(u32, usize)> {
    let mut value = 0u32;
    let mut used = 0usize;
    while used < 3 && input.get(used).is_some_and(u8::is_ascii_digit) {
        value = value * 10 + u32::from(input[used] - b'0');
        used += 1;
    }
    (used > 0).then_some((value, used))
}

/// Parse an IPv4 address prefix and return its byte length.
#[must_use]
pub fn parse_ipv4(input: &[u8]) -> Option<usize> {
    let mut index = 0usize;
    for part in 0..4 {
        let (value, used) = parse_octet(&input[index..])?;
        if value > 255 || (used > 1 && input[index] == b'0') {
            return None;
        }
        index += used;
        if part < 3 {
            if input.get(index).copied()? != b'.' {
                return None;
            }
            index += 1;
        }
    }
    Some(index)
}

/// Parse an IPv6 address prefix with hex groups and optional `::`.
#[must_use]
pub fn parse_ipv6(input: &[u8]) -> Option<usize> {
    let mut index = 0usize;
    let mut groups = 0usize;
    let mut saw_double = false;
    while index < input.len() {
        if input.get(index) == Some(&b':') && input.get(index + 1) == Some(&b':') {
            if saw_double {
                return None;
            }
            saw_double = true;
            index += 2;
            continue;
        }
        let start = index;
        while index < input.len() && index - start < 4 && input[index].is_ascii_hexdigit() {
            index += 1;
        }
        if index == start {
            break;
        }
        groups += 1;
        if input.get(index) == Some(&b':') {
            index += 1;
        } else {
            break;
        }
    }
    ((saw_double && groups <= 7) || groups == 8).then_some(index)
}

/// Parse a three-segment base64url JWT-like token prefix.
#[must_use]
pub fn parse_jwt(input: &[u8]) -> Option<usize> {
    let mut index = 0usize;
    for part in 0..3 {
        let start = index;
        while input.get(index).is_some_and(|&b| is_base64url_char(b)) {
            index += 1;
        }
        if index - start < 8 {
            return None;
        }
        if part < 2 {
            if input.get(index).copied()? != b'.' {
                return None;
            }
            index += 1;
        }
    }
    Some(index)
}