vyre-conform 0.1.0

Conformance suite for vyre backends — proves byte-identical output to CPU reference
Documentation
use crate::{Convention, DataType, OpSignature, OpSpec};

/// WGSL source for the tokenize op.
///
/// Duplicated from `gputokenize` so that the conformance crate remains
/// self-contained.
fn tokenize_wgsl() -> String {
    r#"
const STATE_STRING: u32 = 0u;
const STATE_IDENTIFIER: u32 = 1u;
const STATE_NUMBER: u32 = 2u;
const STATE_COMMENT: u32 = 3u;
const STATE_REGEX: u32 = 4u;
const STATE_OPERATOR: u32 = 5u;
const STATE_WHITESPACE: u32 = 6u;
const STATE_UNKNOWN: u32 = 7u;

struct Config {
    length: u32,
};

@group(0) @binding(0) var<storage, read> source: array<u32>;
@group(0) @binding(1) var<storage, read_write> tokens: array<u32>;
@group(0) @binding(2) var<uniform> config: Config;

fn get_char(idx: u32) -> u32 {
    let word = source[idx / 4u];
    let shift = (idx % 4u) * 8u;
    return (word >> shift) & 0xFFu;
}

@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
    let idx = global_id.x;
    if (idx >= config.length) {
        return;
    }

    var state = STATE_WHITESPACE;
    var string_quote = 0u;
    var in_escape = false;
    var in_block_comment = false;
    var block_comment_star = false;
    var in_line_comment = false;
    var in_regex = false;
    var last_significant_state = STATE_UNKNOWN;

    for (var i = 0u; i <= idx; i = i + 1u) {
        let c = get_char(i);

        if (in_escape) {
            in_escape = false;
            if (i == idx) {
                if (string_quote != 0u) { state = STATE_STRING; }
                else if (in_regex) { state = STATE_REGEX; }
                else { state = STATE_UNKNOWN; }
            }
            continue;
        }

        if (c == 92u) { // '\\'
            if (string_quote != 0u) {
                in_escape = true;
                if (i == idx) { state = STATE_STRING; }
                continue;
            } else if (in_regex) {
                in_escape = true;
                if (i == idx) { state = STATE_REGEX; }
                continue;
            }
        }

        if (in_line_comment) {
            if (c == 10u) { // '\n'
                in_line_comment = false;
                if (i == idx) { state = STATE_WHITESPACE; }
            } else {
                if (i == idx) { state = STATE_COMMENT; }
            }
            continue;
        }

        if (in_block_comment) {
            if (block_comment_star && c == 47u) { // '/'
                in_block_comment = false;
                block_comment_star = false;
                if (i == idx) { state = STATE_COMMENT; }
            } else {
                block_comment_star = (c == 42u); // '*'
                if (i == idx) { state = STATE_COMMENT; }
            }
            continue;
        }

        if (string_quote != 0u) {
            if (c == string_quote) {
                string_quote = 0u;
                if (i == idx) { state = STATE_STRING; }
                last_significant_state = STATE_STRING;
            } else {
                if (i == idx) { state = STATE_STRING; }
            }
            continue;
        }

        if (in_regex) {
            if (c == 47u) { // '/'
                in_regex = false;
                if (i == idx) { state = STATE_REGEX; }
                last_significant_state = STATE_REGEX;
            } else {
                if (i == idx) { state = STATE_REGEX; }
            }
            continue;
        }

        if (c == 47u) { // '/'
            var next_c = 0u;
            if (i + 1u < config.length) {
                next_c = get_char(i + 1u);
            }
            if (next_c == 47u) { // "//"
                in_line_comment = true;
                if (i == idx) { state = STATE_COMMENT; }
                continue;
            } else if (next_c == 42u) { // "/*"
                in_block_comment = true;
                block_comment_star = false;
                if (i == idx) { state = STATE_COMMENT; }
                continue;
            } else {
                if (last_significant_state == STATE_OPERATOR || last_significant_state == STATE_UNKNOWN || last_significant_state == STATE_WHITESPACE) {
                    in_regex = true;
                    if (i == idx) { state = STATE_REGEX; }
                    continue;
                } else {
                    if (i == idx) { state = STATE_OPERATOR; }
                    last_significant_state = STATE_OPERATOR;
                    continue;
                }
            }
        }

        if (c == 34u || c == 39u || c == 96u) { // '"', '\'', '`'
            string_quote = c;
            if (i == idx) { state = STATE_STRING; }
            continue;
        }

        let is_alpha = (c >= 65u && c <= 90u) || (c >= 97u && c <= 122u) || c == 95u || c == 36u;
        let is_digit = (c >= 48u && c <= 57u);

        if (is_alpha || (is_digit && state == STATE_IDENTIFIER)) {
            state = STATE_IDENTIFIER;
            last_significant_state = STATE_IDENTIFIER;
            continue;
        }

        if (is_digit || (c == 46u && state == STATE_NUMBER)) { // '.'
            state = STATE_NUMBER;
            last_significant_state = STATE_NUMBER;
            continue;
        }

        if (c == 32u || c == 9u || c == 10u || c == 13u) {
            state = STATE_WHITESPACE;
            continue;
        }

        let is_op = c == 40u || c == 41u || c == 123u || c == 125u || c == 91u || c == 93u ||
                    c == 61u || c == 43u || c == 45u || c == 42u || c == 37u || c == 33u ||
                    c == 60u || c == 62u || c == 38u || c == 124u || c == 94u || c == 126u ||
                    c == 63u || c == 58u || c == 44u || c == 46u || c == 59u;

        if (is_op) {
            state = STATE_OPERATOR;
            last_significant_state = STATE_OPERATOR;
            continue;
        }

        state = STATE_UNKNOWN;
        last_significant_state = STATE_UNKNOWN;
    }

    tokens[idx] = state;
}
"#
    .to_string()
}