whamm 0.1.0

A framework for 'Wasm Application Monitoring and Manipulation'
Documentation
// ==============================
// ---- High-Level Structure ----
// ==============================

// supports top-level global declarations/initial assignments and probe definitions
script = { SOI ~ (lib_import | if_stmt | statement | fn_def)* ~ probe_def ~ ( if_stmt | statement | fn_def | probe_def )* ~ EOI }
 
// TODO -- support comma separated list of rules: https://docs.oracle.com/cd/E23824_01/html/E22973/glghi.html#scrolltoc
probe_def = { PROBE_RULE ~ PUSH(predicate?) ~ block }

predicate = { "/" ~ expr ~ "/" }

//making the decision not to support "void" functions -- can change later
fn_def = { "fn" ~ ID ~ fn_params ~ ("->" ~ TYPE) ? ~ block }
fn_params = _{ "(" ~ param_list? ~ ")" }
param_list = _{ (( param ) ~ ("," ~ param )*) }
param = { ID ~ ":" ~ TYPE }

// ===================
// ---- Libraries ----
// ===================

lib_import = { "use" ~ ID ~ ";" }

annotation = { "@" ~ ID }
lib_call = { annotation? ~ ID ~ "." ~ fn_call }

// =====================
// ---- Identifiers ----
// =====================

PROBE_ID = @{
    (ASCII_ALPHA
    | "_"
    | "."
    | ASCII_DIGIT
    | "*"
    | "+"
    | "\\"
    | "?"
    | "|"
    | "!"
    | "["
    | "]")+
}

TY_BOUNDS = { "(" ~ param_list ~ ")" }

//disallowed IDs should have the full list of reserved names like return
get_map = { ID ~ "[" ~ expr ~ "]"}

DISALLOWED_ID = _{ "return" | "if" | "while" | "else" | "map" }
ID = @{ (!DISALLOWED_ID ~ (ASCII_ALPHA | "_")+ ~ ( ASCII_DIGIT | (ASCII_ALPHA | "_")+ )*) |
        (DISALLOWED_ID ~ (ASCII_ALPHA | "_" | ASCII_DIGIT)+)
}
PROBE_RULE = {
    PROBE_ID? ~ TY_BOUNDS? ~ ":" ~ PROBE_ID? ~ TY_BOUNDS? ~ ":" ~ PROBE_ID? ~ TY_BOUNDS? ~ ":" ~ PROBE_ID? ~ TY_BOUNDS?
    | PROBE_ID? ~ TY_BOUNDS? ~ ":" ~ PROBE_ID? ~ TY_BOUNDS? ~ ":" ~ PROBE_ID? ~ TY_BOUNDS? // to handle package:provider:event, e.g. wasm:func:entry
    | PROBE_ID ~ ":" ~ PROBE_ID // to handle wasm:before, wasm:after
}

// ===============
// ---- Types ----
// ===============

TY_U8 = @{ "u8" }
TY_I8 = @{ "i8" }
TY_U16 = @{ "u16" }
TY_I16 = @{ "i16" }
TY_U32 = @{ "u32" }
TY_I32 = @{ "i32" }
TY_F32 = @{ "f32" }
TY_U64 = @{ "u64" }
TY_I64 = @{ "i64" }
TY_F64 = @{ "f64" }
TY_BOOL = @{ "bool" }
TY_STRING = @{ "str" }

// a tuple that's used as a type declaration
TY_TUPLE = { "(" ~ ")"| "(" ~ TYPE ~ ( "," ~ TYPE )* ~ ")" }
// first TYPE is the type of the key, second TYPE is the type of the value
TY_MAP = { "map<" ~ TYPE ~ "," ~ TYPE ~ ">" }

TYPE = _{
    TYPE_PRIMITIVE |
    TY_TUPLE |
    TY_MAP
}
TYPE_PRIMITIVE = _{
    TY_U8 |
    TY_I8 |
    TY_U16 |
    TY_I16 |
    TY_U32 |
    TY_I32 |
    TY_F32 |
    TY_U64 |
    TY_I64 |
    TY_F64 |
    TY_BOOL |
    TY_STRING
}
TY_UNKNOWN = @{ "unknown" }
TYPE_YML = _{
    TYPE |
    TY_UNKNOWN
}

// ====================
// ---- Statements ----
// ====================

statement = { ( decl_init | lib_call | fn_call | special_decl | decl | assignment | incrementor | decrementor | ret ) ~ ";" }

// control flow
if_stmt = { "if" ~ "(" ~ expr ~ ")" ~ block ~ (else_stmt | elif) ? }
elif = { "elif" ~ "(" ~ expr ~ ")" ~ block ~ (else_stmt | elif) ? }
else_stmt = { "else" ~  block }
block = { "{" ~ (if_stmt | statement)* ~ "}" }
ret = { "return" ~ expr ? }

// calls
arg = { tuple | expr | val | ternary } // todo -- tuple and val are redundant
fn_call = { ID ~ "(" ~ ( arg )? ~ ( "," ~ arg )* ~ ")" }

// unop
incrementor = { (get_map | ID) ~ "++"}
decrementor = { (get_map | ID) ~ "--"}

// var ops
REPORT = @{ "report" }
UNSHARED = @{ "unshared" }
var_decorators = { ( REPORT | UNSHARED )+ }
special_decl = { var_decorators ~ decl }
assignment = { (get_map | ID) ~ "=" ~ assignment_rhs }
decl = { "var" ~ !RESERVED_KEYWORDS ~ ID ~ ":" ~ TYPE  }
decl_init = { (special_decl | decl) ~ "=" ~ assignment_rhs }

assignment_rhs = { ternary | expr }

// =====================
// ---- Expressions ----
// =====================

ternary = { expr ~ "?" ~ expr ~ ":" ~ expr }

expr = { operand ~ (BINOP ~ operand)* }

prefix = _{ neg | binary_not }
    neg = { "!" }
    binary_not = { "~" }
postfix = _{ cast }
    cast = { "as" ~ TYPE_PRIMITIVE }

val = _{ BOOL | get_map | ID | FLOAT | INT | STRING | tuple }
operand = _{ prefix? ~ ( lib_call | fn_call | "(" ~ expr ~ ")" | val ) ~ postfix? }

tuple = { "(" ~ ")" | "(" ~ (val) ~ ( "," ~ val )* ~ ")" }

BINOP = _{ SHIFTOP | LOGOP | BITOP | RELOP | SUMOP | MULOP }

    // Binary operators
    lshift = { "<<" }
    rshift = { ">>" }
    SHIFTOP = _{ lshift | rshift }

    // Logical operators
    and = { "&&" }
    or = { "||" }
    LOGOP = _{ and | or }

    // Bitwise operators
    binary_and = { "&" }
    binary_or = { "|" }
    binary_xor = { "^" }
    BITOP = _{ binary_and | binary_or | binary_xor }

    // Relational operators
    eq = { "==" }
    ne = { "!=" }
    ge = { ">=" }
    gt = { ">" }
    le = { "<=" }
    lt = { "<" }
    RELOP = _{ eq | ne | ge | gt | le | lt }

    // Highest precedence arithmetic operators
    add = { "+" }
    subtract = { "-" }
    SUMOP = _{ add | subtract }

    // Next highest precedence arithmetic operators
    multiply = { "*" }
    divide = { "/" }
    modulo = { "%" }
    MULOP = _{ multiply | divide | modulo }

// ===================
// ---- Terminals ----
// ===================

INT = {
    int_hex
    | int_bin
//     | int_oct
    | int
}
    int_hex = @{ ^"0x" ~ ASCII_HEX_DIGIT+ } // Hexadecimal digit
    int_bin = @{ ^"0b" ~ ASCII_BIN_DIGIT+ } // Binary digit
//     int_oct = @{ "-"? ~ "0" ~ ASCII_OCT_DIGIT+ }  // Octal digit
    int = @{
        "-"? ~ ASCII_NONZERO_DIGIT ~ ("_" | ASCII_DIGIT)* ~ ASCII_DIGIT*  // Multiple digit, base 10 AND negative anything but 0
        | ASCII_DIGIT                              // Single digit, base 10 (account for having just '0')
    }

FLOAT = {
    float
}
    float = @{
        "-"? ~ ^"inf" |
        "-"? ~ ^"infinity" |
        "-"? ~ ^"NaN" |
        "-"? ~ ASCII_DIGIT+ ~ ("_" | ASCII_DIGIT)* ~ "."? ~ ASCII_DIGIT* ~ (^"e" ~ ("+" | "-")? ~ ASCII_DIGIT+ ) |
        "-"? ~ ASCII_DIGIT+ ~ ("_" | ASCII_DIGIT)* ~ "." ~ ASCII_DIGIT*
    }

BOOL = @{
    "true"
    | "false"
}

// strings

predefined_escape = { "n" | "r" | "t" | "\"" | "'" | "\\" | "0" }
hex_escape        = { "x" ~ HEX_DIGIT{2} }
unicode_escape    = { "u{" ~ HEX_DIGIT+ ~ "}" }
escape            = { "\\" ~ (predefined_escape | hex_escape | unicode_escape) }
normal_char       = { !("\"" | "\\") ~ ANY }
STRING            = ${ "\"" ~ (escape | normal_char)* ~ "\"" }

// ===========================
// ---- Reserved Keywords ----
// ===========================
// TODO -- may not need this after adding argN type assertions

instr_arg = { "arg" ~ ASCII_DIGIT+ }

RESERVED_KEYWORDS = @{
    instr_arg
}

// ===================
// ---- Skip-able ----
// ===================

newline = _{ "\n" | "\r\n" }
WHITESPACE = _{ " " | "\t" | newline }

block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" }
COMMENT    = _{ block_comment | ("//" ~ (!newline ~ ANY)*) }