gha-expression-proof 1.0.0

GitHub Actions expression evaluator and receipt generator for offline CI compatibility testing
Documentation
use anyhow::{Result, bail};
use serde_json::{Number, Value};

#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind {
    LParen,
    RParen,
    LBracket,
    RBracket,
    Dot,
    Comma,
    Star,
    Bang,
    Lt,
    Le,
    Gt,
    Ge,
    EqEq,
    Ne,
    AndAnd,
    OrOr,
    Ident(String),
    Literal(Value),
    Eof,
}

#[derive(Debug, Clone, PartialEq)]
pub struct Token {
    pub kind: TokenKind,
    pub offset: usize,
}

pub fn lex(input: &str) -> Result<Vec<Token>> {
    let chars = input.char_indices().collect::<Vec<_>>();
    let mut tokens = Vec::new();
    let mut i = 0;

    while i < chars.len() {
        let (offset, ch) = chars[i];
        if ch.is_whitespace() {
            i += 1;
            continue;
        }

        let kind = match ch {
            '(' => {
                i += 1;
                TokenKind::LParen
            }
            ')' => {
                i += 1;
                TokenKind::RParen
            }
            '[' => {
                i += 1;
                TokenKind::LBracket
            }
            ']' => {
                i += 1;
                TokenKind::RBracket
            }
            '.' => {
                i += 1;
                TokenKind::Dot
            }
            ',' => {
                i += 1;
                TokenKind::Comma
            }
            '*' => {
                i += 1;
                TokenKind::Star
            }
            '!' if peek_char(&chars, i + 1) == Some('=') => {
                i += 2;
                TokenKind::Ne
            }
            '!' => {
                i += 1;
                TokenKind::Bang
            }
            '<' if peek_char(&chars, i + 1) == Some('=') => {
                i += 2;
                TokenKind::Le
            }
            '<' => {
                i += 1;
                TokenKind::Lt
            }
            '>' if peek_char(&chars, i + 1) == Some('=') => {
                i += 2;
                TokenKind::Ge
            }
            '>' => {
                i += 1;
                TokenKind::Gt
            }
            '=' if peek_char(&chars, i + 1) == Some('=') => {
                i += 2;
                TokenKind::EqEq
            }
            '&' if peek_char(&chars, i + 1) == Some('&') => {
                i += 2;
                TokenKind::AndAnd
            }
            '|' if peek_char(&chars, i + 1) == Some('|') => {
                i += 2;
                TokenKind::OrOr
            }
            '\'' => {
                let (literal, next) = string_literal(&chars, i)?;
                i = next;
                TokenKind::Literal(Value::String(literal))
            }
            '"' => bail!("double-quoted strings are not valid GitHub Actions expressions"),
            '-' | '0'..='9' => {
                let (literal, next) = number_literal(input, &chars, i)?;
                i = next;
                TokenKind::Literal(literal)
            }
            c if is_ident_start(c) => {
                let (ident, next) = identifier(input, &chars, i);
                i = next;
                match ident.to_ascii_lowercase().as_str() {
                    "true" => TokenKind::Literal(Value::Bool(true)),
                    "false" => TokenKind::Literal(Value::Bool(false)),
                    "null" => TokenKind::Literal(Value::Null),
                    _ => TokenKind::Ident(ident),
                }
            }
            _ => bail!("unexpected character `{ch}` at byte {offset}"),
        };

        tokens.push(Token { kind, offset });
    }

    tokens.push(Token {
        kind: TokenKind::Eof,
        offset: input.len(),
    });
    Ok(tokens)
}

fn string_literal(chars: &[(usize, char)], start: usize) -> Result<(String, usize)> {
    let mut i = start + 1;
    let mut out = String::new();

    while i < chars.len() {
        let (_, ch) = chars[i];
        if ch == '\'' {
            if peek_char(chars, i + 1) == Some('\'') {
                out.push('\'');
                i += 2;
            } else {
                return Ok((out, i + 1));
            }
        } else {
            out.push(ch);
            i += 1;
        }
    }

    bail!(
        "unterminated string literal starting at byte {}",
        chars[start].0
    )
}

fn number_literal(input: &str, chars: &[(usize, char)], start: usize) -> Result<(Value, usize)> {
    let mut i = start;
    if peek_char(chars, i) == Some('-') {
        i += 1;
    }

    if peek_char(chars, i) == Some('0') && matches!(peek_char(chars, i + 1), Some('x' | 'X')) {
        i += 2;
        let hex_start = i;
        while matches!(peek_char(chars, i), Some(c) if c.is_ascii_hexdigit()) {
            i += 1;
        }
        if i == hex_start {
            bail!("hex number literal must contain at least one digit");
        }
        let slice = &input[chars[start].0..end_offset(input, chars, i)];
        let negative = slice.starts_with('-');
        let digits = slice
            .trim_start_matches('-')
            .trim_start_matches("0x")
            .trim_start_matches("0X");
        let mut value = i128::from_str_radix(digits, 16)?;
        if negative {
            value = -value;
        }
        return Ok((Value::Number(Number::from(value as i64)), i));
    }

    while matches!(peek_char(chars, i), Some(c) if c.is_ascii_digit()) {
        i += 1;
    }
    if peek_char(chars, i) == Some('.') {
        i += 1;
        while matches!(peek_char(chars, i), Some(c) if c.is_ascii_digit()) {
            i += 1;
        }
    }
    if matches!(peek_char(chars, i), Some('e' | 'E')) {
        i += 1;
        if matches!(peek_char(chars, i), Some('+' | '-')) {
            i += 1;
        }
        let exp_start = i;
        while matches!(peek_char(chars, i), Some(c) if c.is_ascii_digit()) {
            i += 1;
        }
        if i == exp_start {
            bail!("exponent must contain at least one digit");
        }
    }

    let slice = &input[chars[start].0..end_offset(input, chars, i)];
    let value = serde_json::from_str::<Value>(slice).or_else(|_| {
        slice
            .parse::<f64>()
            .ok()
            .and_then(Number::from_f64)
            .map(Value::Number)
            .ok_or_else(|| anyhow::anyhow!("invalid number literal `{slice}`"))
    })?;
    Ok((value, i))
}

fn identifier(input: &str, chars: &[(usize, char)], start: usize) -> (String, usize) {
    let mut i = start + 1;
    while matches!(peek_char(chars, i), Some(c) if is_ident_continue(c)) {
        i += 1;
    }
    (
        input[chars[start].0..end_offset(input, chars, i)].to_owned(),
        i,
    )
}

fn is_ident_start(ch: char) -> bool {
    ch.is_ascii_alphabetic() || ch == '_'
}

fn is_ident_continue(ch: char) -> bool {
    ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'
}

fn peek_char(chars: &[(usize, char)], i: usize) -> Option<char> {
    chars.get(i).map(|(_, ch)| *ch)
}

fn end_offset(input: &str, chars: &[(usize, char)], i: usize) -> usize {
    chars
        .get(i)
        .map(|(offset, _)| *offset)
        .unwrap_or(input.len())
}