edgevec 0.9.0

High-performance embedded vector database for Browser, Node, and Edge
Documentation
// EdgeVec Filter Expression Grammar
// ==================================
// Version: 1.0.0
// Week 23 Task W23.1.2
//
// This grammar defines the filter expression syntax for EdgeVec queries.
// It supports 27 AST node types across comparison, string, array, range,
// logical, and null-check operators.
//
// Grammar follows SQL-style syntax with case-insensitive keywords.

// =============================================================================
// TOP-LEVEL RULES
// =============================================================================

/// Root rule: a complete filter expression
filter = { SOI ~ logical_expr ~ EOI }

// =============================================================================
// LOGICAL EXPRESSIONS (Precedence: OR < AND < NOT)
// =============================================================================

/// Logical expression (OR is lowest precedence)
logical_expr = { or_expr }

/// OR expression: left-associative chain of AND expressions
or_expr = { and_expr ~ (or_op ~ and_expr)* }

/// AND expression: left-associative chain of NOT expressions
and_expr = { not_expr ~ (and_op ~ not_expr)* }

/// NOT expression: unary NOT or primary expression
not_expr = { not_op ~ not_expr | primary_expr }

// =============================================================================
// PRIMARY EXPRESSIONS
// =============================================================================

/// Primary expression: the building blocks of filter expressions
/// Order matters for PEG parsing - more specific patterns first
primary_expr = {
    grouped_expr
    | null_check
    | between_expr
    | array_op_expr
    | string_op_expr
    | set_op_expr
    | comparison_expr
}

/// Grouped expression: parentheses for precedence override
grouped_expr = { "(" ~ logical_expr ~ ")" }

// =============================================================================
// COMPARISON EXPRESSIONS
// =============================================================================

/// Comparison: field comparison_operator value
comparison_expr = { field ~ comp_op ~ value }

/// Comparison operators (order matters: longer operators first)
comp_op = { "<=" | ">=" | "!=" | "<" | ">" | "=" }

// =============================================================================
// STRING OPERATIONS
// =============================================================================

/// String operation: field string_operator string_literal
string_op_expr = { field ~ string_op ~ string_literal }

/// String operators (case-insensitive)
string_op = {
    ^"contains"
    | ^"starts_with"
    | ^"ends_with"
    | ^"like"
}

// =============================================================================
// SET OPERATIONS (IN, NOT IN)
// =============================================================================

/// Set operation: field set_operator array_literal
set_op_expr = { field ~ set_op ~ array_literal }

/// Set operators (case-insensitive, order matters: NOT IN before IN)
set_op = { not_in_op | in_op }

/// NOT IN operator (compound keyword)
not_in_op = @{ ^"not" ~ WHITESPACE+ ~ ^"in" }

/// IN operator
in_op = @{ ^"in" }

// =============================================================================
// ARRAY OPERATIONS (ANY, ALL, NONE)
// =============================================================================

/// Array operation: field array_operator array_literal
array_op_expr = { field ~ array_op ~ array_literal }

/// Array operators (case-insensitive)
/// These operate on array-valued metadata fields
array_op = {
    ^"any"
    | ^"all"
    | ^"none"
}

// =============================================================================
// RANGE OPERATIONS
// =============================================================================

/// Between expression: field BETWEEN low AND high
between_expr = { field ~ between_op ~ value ~ and_keyword ~ value }

/// BETWEEN operator (case-insensitive)
between_op = @{ ^"between" }

/// AND keyword for BETWEEN (case-insensitive)
and_keyword = @{ ^"and" }

// =============================================================================
// NULL CHECKS
// =============================================================================

/// Null check: field IS NULL or field IS NOT NULL
null_check = { field ~ is_null_op }

/// IS NULL / IS NOT NULL operators (order matters: IS NOT NULL before IS NULL)
is_null_op = { is_not_null_op | is_null_only_op }

/// IS NOT NULL (compound keyword)
is_not_null_op = @{ ^"is" ~ WHITESPACE+ ~ ^"not" ~ WHITESPACE+ ~ ^"null" }

/// IS NULL (compound keyword)
is_null_only_op = @{ ^"is" ~ WHITESPACE+ ~ ^"null" }

// =============================================================================
// LOGICAL OPERATORS
// =============================================================================

/// OR operator (case-insensitive or symbol)
or_op = _{ ^"or" | "||" }

/// AND operator (case-insensitive or symbol)
and_op = _{ ^"and" | "&&" }

/// NOT operator (case-insensitive or symbol)
not_op = _{ ^"not" | "!" }

// =============================================================================
// VALUES AND LITERALS
// =============================================================================

/// Value: any valid right-hand-side value
value = { string_literal | number | boolean | field }

/// String literal: double-quoted with escape sequences
string_literal = @{ "\"" ~ inner_string ~ "\"" }

/// Inner string: content between quotes
inner_string = @{ (!("\"" | "\\") ~ ANY | escape_seq)* }

/// Escape sequences: \", \\, \n, \r, \t
escape_seq = @{ "\\" ~ ("\"" | "\\" | "n" | "r" | "t") }

/// Number: integer or floating-point
number = @{ "-"? ~ integer_part ~ decimal_part? }

/// Integer part of number
integer_part = @{ "0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* }

/// Decimal part of number (optional)
decimal_part = @{ "." ~ ASCII_DIGIT+ }

/// Boolean literal (case-insensitive)
boolean = { ^"true" | ^"false" }

/// Array literal: square brackets with comma-separated values
array_literal = { "[" ~ (value ~ ("," ~ value)*)? ~ "]" }

/// Field name: identifier starting with letter or underscore
field = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }

// =============================================================================
// WHITESPACE (implicit - automatically skipped between tokens)
// =============================================================================

/// Whitespace characters (automatically handled by pest)
WHITESPACE = _{ " " | "\t" | "\n" | "\r" }