use crate::{Convention, DataType, OpSignature, OpSpec};
fn tokenize_wgsl() -> String {
r#"
const STATE_STRING: u32 = 0u;
const STATE_IDENTIFIER: u32 = 1u;
const STATE_NUMBER: u32 = 2u;
const STATE_COMMENT: u32 = 3u;
const STATE_REGEX: u32 = 4u;
const STATE_OPERATOR: u32 = 5u;
const STATE_WHITESPACE: u32 = 6u;
const STATE_UNKNOWN: u32 = 7u;
struct Config {
length: u32,
};
@group(0) @binding(0) var<storage, read> source: array<u32>;
@group(0) @binding(1) var<storage, read_write> tokens: array<u32>;
@group(0) @binding(2) var<uniform> config: Config;
fn get_char(idx: u32) -> u32 {
let word = source[idx / 4u];
let shift = (idx % 4u) * 8u;
return (word >> shift) & 0xFFu;
}
@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let idx = global_id.x;
if (idx >= config.length) {
return;
}
var state = STATE_WHITESPACE;
var string_quote = 0u;
var in_escape = false;
var in_block_comment = false;
var block_comment_star = false;
var in_line_comment = false;
var in_regex = false;
var last_significant_state = STATE_UNKNOWN;
for (var i = 0u; i <= idx; i = i + 1u) {
let c = get_char(i);
if (in_escape) {
in_escape = false;
if (i == idx) {
if (string_quote != 0u) { state = STATE_STRING; }
else if (in_regex) { state = STATE_REGEX; }
else { state = STATE_UNKNOWN; }
}
continue;
}
if (c == 92u) { // '\\'
if (string_quote != 0u) {
in_escape = true;
if (i == idx) { state = STATE_STRING; }
continue;
} else if (in_regex) {
in_escape = true;
if (i == idx) { state = STATE_REGEX; }
continue;
}
}
if (in_line_comment) {
if (c == 10u) { // '\n'
in_line_comment = false;
if (i == idx) { state = STATE_WHITESPACE; }
} else {
if (i == idx) { state = STATE_COMMENT; }
}
continue;
}
if (in_block_comment) {
if (block_comment_star && c == 47u) { // '/'
in_block_comment = false;
block_comment_star = false;
if (i == idx) { state = STATE_COMMENT; }
} else {
block_comment_star = (c == 42u); // '*'
if (i == idx) { state = STATE_COMMENT; }
}
continue;
}
if (string_quote != 0u) {
if (c == string_quote) {
string_quote = 0u;
if (i == idx) { state = STATE_STRING; }
last_significant_state = STATE_STRING;
} else {
if (i == idx) { state = STATE_STRING; }
}
continue;
}
if (in_regex) {
if (c == 47u) { // '/'
in_regex = false;
if (i == idx) { state = STATE_REGEX; }
last_significant_state = STATE_REGEX;
} else {
if (i == idx) { state = STATE_REGEX; }
}
continue;
}
if (c == 47u) { // '/'
var next_c = 0u;
if (i + 1u < config.length) {
next_c = get_char(i + 1u);
}
if (next_c == 47u) { // "//"
in_line_comment = true;
if (i == idx) { state = STATE_COMMENT; }
continue;
} else if (next_c == 42u) { // "/*"
in_block_comment = true;
block_comment_star = false;
if (i == idx) { state = STATE_COMMENT; }
continue;
} else {
if (last_significant_state == STATE_OPERATOR || last_significant_state == STATE_UNKNOWN || last_significant_state == STATE_WHITESPACE) {
in_regex = true;
if (i == idx) { state = STATE_REGEX; }
continue;
} else {
if (i == idx) { state = STATE_OPERATOR; }
last_significant_state = STATE_OPERATOR;
continue;
}
}
}
if (c == 34u || c == 39u || c == 96u) { // '"', '\'', '`'
string_quote = c;
if (i == idx) { state = STATE_STRING; }
continue;
}
let is_alpha = (c >= 65u && c <= 90u) || (c >= 97u && c <= 122u) || c == 95u || c == 36u;
let is_digit = (c >= 48u && c <= 57u);
if (is_alpha || (is_digit && state == STATE_IDENTIFIER)) {
state = STATE_IDENTIFIER;
last_significant_state = STATE_IDENTIFIER;
continue;
}
if (is_digit || (c == 46u && state == STATE_NUMBER)) { // '.'
state = STATE_NUMBER;
last_significant_state = STATE_NUMBER;
continue;
}
if (c == 32u || c == 9u || c == 10u || c == 13u) {
state = STATE_WHITESPACE;
continue;
}
let is_op = c == 40u || c == 41u || c == 123u || c == 125u || c == 91u || c == 93u ||
c == 61u || c == 43u || c == 45u || c == 42u || c == 37u || c == 33u ||
c == 60u || c == 62u || c == 38u || c == 124u || c == 94u || c == 126u ||
c == 63u || c == 58u || c == 44u || c == 46u || c == 59u;
if (is_op) {
state = STATE_OPERATOR;
last_significant_state = STATE_OPERATOR;
continue;
}
state = STATE_UNKNOWN;
last_significant_state = STATE_UNKNOWN;
}
tokens[idx] = state;
}
"#
.to_string()
}