// Gremlin Grammar for Interstellar
// A TinkerPop-compatible Gremlin text parser using pest PEG
// ============================================================
// Whitespace and Comments (silently consumed)
// ============================================================
WHITESPACE = _{ " " | "\t" | "\r" | "\n" }
COMMENT = _{ "//" ~ (!"\n" ~ ANY)* | "/*" ~ (!"*/" ~ ANY)* ~ "*/" }
// ============================================================
// Lexical Elements
// ============================================================
// String literals (both quote styles for TinkerPop compatibility)
string = ${ single_quoted | double_quoted }
single_quoted = ${ "'" ~ single_inner ~ "'" }
single_inner = @{ (!"'" ~ ("\\\\" | "\\'" | ANY))* }
double_quoted = ${ "\"" ~ double_inner ~ "\"" }
double_inner = @{ (!"\"" ~ ("\\\\" | "\\\"" | ANY))* }
// Numeric literals
integer = @{ "-"? ~ ASCII_DIGIT+ }
float = @{ "-"? ~ ASCII_DIGIT+ ~ "." ~ ASCII_DIGIT+ ~ (^"e" ~ "-"? ~ ASCII_DIGIT+)? }
// Boolean and null
boolean = { "true" | "false" }
null = { "null" }
// Identifiers
identifier = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
// ============================================================
// Entry Points
// ============================================================
// Single traversal entry point: g.V().step1().step2()...
traversal = { SOI ~ graph_source ~ step* ~ terminal_step? ~ EOI }
// Multi-statement script entry point
script = { SOI ~ statement* ~ EOI }
// A statement is either an assignment or a standalone traversal expression
statement = { assignment | traversal_expression }
// Assignment: identifier = g.traversal().terminal()
assignment = { variable_name ~ "=" ~ traversal_body ~ terminal_step }
// Traversal expression without assignment (just executed)
traversal_expression = { traversal_body ~ terminal_step? }
// The body of a traversal (without SOI/EOI)
traversal_body = { graph_source ~ step* }
// Variable name for assignment (distinct from identifier to avoid conflicts)
variable_name = @{ !keyword ~ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
// Reserved keywords that cannot be used as variable names
keyword = { "g" | "true" | "false" | "null" | "P" | "TextP" | "TextQ" | "__" | "none" }
// Variable reference in from/to/V contexts
variable_ref = { variable_name }
// ============================================================
// Graph Source
// ============================================================
// Graph source: g
graph_source = { "g" ~ "." ~ source_step }
// Source steps
source_step = {
v_step // V(), V(id), V(id, id, ...)
| e_step // E(), E(id), E(id, id, ...)
| add_v_source_step // addV(label)
| add_e_source_step // addE(label)
| inject_step // inject(value, value, ...)
| search_text_v_source_step // searchTextV(prop, query, k) [spec-55c]
| search_text_e_source_step // searchTextE(prop, query, k) [spec-55c]
}
// V() can accept: nothing (all), values (ids), or variable reference
v_step = { "V" ~ "(" ~ (variable_ref | value_list)? ~ ")" }
// E() can accept: nothing (all), values (ids), or variable reference
e_step = { "E" ~ "(" ~ (variable_ref | value_list)? ~ ")" }
add_v_source_step = { "addV" ~ "(" ~ string ~ ")" }
add_e_source_step = { "addE" ~ "(" ~ string ~ ")" }
inject_step = { "inject" ~ "(" ~ value_list? ~ ")" }
// spec-55c full-text search source steps.
// The query argument accepts either a bare string (sugar for TextQuery::Match)
// or a structured TextQ DSL expression.
search_text_v_source_step = {
"searchTextV" ~ "(" ~ string ~ "," ~ search_text_query_arg ~ "," ~ integer ~ ")"
}
search_text_e_source_step = {
"searchTextE" ~ "(" ~ string ~ "," ~ search_text_query_arg ~ "," ~ integer ~ ")"
}
// Argument shapes for the FTS query position. Order matters for PEG: try the
// structured DSL first so that a bare string fallback only matches when the
// `TextQ.` prefix is absent.
search_text_query_arg = { text_q_expr | string }
value_list = { value ~ ("," ~ value)* }
// ============================================================
// Steps
// ============================================================
// Main step dispatcher
step = { "." ~ step_body }
step_body = {
// Navigation (vertex to vertex) - must come before single-word matches
out_step | in_step | both_step |
// Navigation (vertex to edge)
out_e_step | in_e_step | both_e_step |
// Navigation (edge to vertex)
out_v_step | in_v_step | both_v_step | other_v_step |
// Filter (order matters - more specific first)
has_label_step | has_id_step | has_not_step | has_key_step | has_value_step | has_step |
where_step | is_step | and_step | or_step | not_step |
dedup_step | limit_step | skip_step | range_step | tail_step |
coin_step | sample_step | simple_path_step | cyclic_path_step |
// Transform
values_step | properties_step | value_map_step | element_map_step | property_map_step |
id_step | label_step | key_step | value_step |
path_step | select_step | project_step | by_step |
unfold_step | fold_step |
count_step | sum_step | max_step | min_step | mean_step | group_step | group_count_step |
order_step | math_step | constant_step | identity_step | index_step | loops_step |
text_score_step | // spec-55c: read BM25 score from traverser sack
// Branch
choose_step | union_step | coalesce_step | optional_step | local_step | branch_step | option_step |
// Repeat
repeat_step | times_step | until_step | emit_step |
// Side effect
as_step | aggregate_step | store_step | cap_step | side_effect_step | profile_step |
// Mutation
add_v_inline_step | add_e_inline_step | property_step | from_step | to_step | drop_step |
// Algorithm
shortest_path_step | k_shortest_paths_step |
bfs_traversal_step | dfs_traversal_step |
bidirectional_bfs_step | iddfs_step | with_step
}
// ============================================================
// Navigation Steps
// ============================================================
out_step = { "out" ~ "(" ~ label_list? ~ ")" }
in_step = { "in" ~ "(" ~ label_list? ~ ")" }
both_step = { "both" ~ "(" ~ label_list? ~ ")" }
out_e_step = { "outE" ~ "(" ~ label_list? ~ ")" }
in_e_step = { "inE" ~ "(" ~ label_list? ~ ")" }
both_e_step = { "bothE" ~ "(" ~ label_list? ~ ")" }
out_v_step = { "outV" ~ "(" ~ ")" }
in_v_step = { "inV" ~ "(" ~ ")" }
both_v_step = { "bothV" ~ "(" ~ ")" }
other_v_step = { "otherV" ~ "(" ~ ")" }
label_list = { string ~ ("," ~ string)* }
// ============================================================
// Filter Steps
// ============================================================
// has() - multiple overloads
// Order matters for PEG - try most specific first
has_step = { "has" ~ "(" ~ has_args ~ ")" }
has_args = {
has_label_key_value // has(label, key, value) - 3 args
| has_key_predicate // has(key, predicate) - 2 args with P.
| has_key_value // has(key, value) - 2 args
| has_key_only // has(key) - 1 arg
}
has_key_only = { string }
has_key_value = { string ~ "," ~ value }
has_key_predicate = { string ~ "," ~ predicate }
has_label_key_value = { string ~ "," ~ string ~ "," ~ value }
has_label_step = { "hasLabel" ~ "(" ~ string ~ ("," ~ string)* ~ ")" }
has_id_step = { "hasId" ~ "(" ~ value ~ ("," ~ value)* ~ ")" }
has_not_step = { "hasNot" ~ "(" ~ string ~ ")" }
has_key_step = { "hasKey" ~ "(" ~ string ~ ("," ~ string)* ~ ")" }
has_value_step = { "hasValue" ~ "(" ~ value ~ ("," ~ value)* ~ ")" }
// where() - traversal or predicate
where_step = { "where" ~ "(" ~ where_args ~ ")" }
where_args = {
where_predicate // where(P.eq('a'))
| where_traversal // where(__.out())
}
where_predicate = { predicate }
where_traversal = { anonymous_traversal }
// is() - value or predicate
is_step = { "is" ~ "(" ~ is_arg ~ ")" }
is_arg = { predicate | value }
// Boolean combinators
and_step = { "and" ~ "(" ~ anonymous_traversal ~ ("," ~ anonymous_traversal)* ~ ")" }
or_step = { "or" ~ "(" ~ anonymous_traversal ~ ("," ~ anonymous_traversal)* ~ ")" }
not_step = { "not" ~ "(" ~ anonymous_traversal ~ ")" }
// Limiting steps
dedup_step = { "dedup" ~ "(" ~ string? ~ ")" }
limit_step = { "limit" ~ "(" ~ integer ~ ")" }
skip_step = { "skip" ~ "(" ~ integer ~ ")" }
range_step = { "range" ~ "(" ~ integer ~ "," ~ integer ~ ")" }
tail_step = { "tail" ~ "(" ~ integer? ~ ")" }
coin_step = { "coin" ~ "(" ~ float ~ ")" }
sample_step = { "sample" ~ "(" ~ integer ~ ")" }
simple_path_step = { "simplePath" ~ "(" ~ ")" }
cyclic_path_step = { "cyclicPath" ~ "(" ~ ")" }
// ============================================================
// Transform Steps
// ============================================================
values_step = { "values" ~ "(" ~ (string ~ ("," ~ string)*)? ~ ")" }
properties_step = { "properties" ~ "(" ~ (string ~ ("," ~ string)*)? ~ ")" }
value_map_step = { "valueMap" ~ "(" ~ value_map_args? ~ ")" }
value_map_args = { boolean ~ ("," ~ string)* | string ~ ("," ~ string)* }
element_map_step = { "elementMap" ~ "(" ~ (string ~ ("," ~ string)*)? ~ ")" }
property_map_step = { "propertyMap" ~ "(" ~ (string ~ ("," ~ string)*)? ~ ")" }
id_step = { "id" ~ "(" ~ ")" }
label_step = { "label" ~ "(" ~ ")" }
key_step = { "key" ~ "(" ~ ")" }
value_step = { "value" ~ "(" ~ ")" }
path_step = { "path" ~ "(" ~ ")" }
select_step = { "select" ~ "(" ~ string ~ ("," ~ string)* ~ ")" }
project_step = { "project" ~ "(" ~ string ~ ("," ~ string)* ~ ")" }
by_step = { "by" ~ "(" ~ by_arg? ~ ")" }
by_arg = {
by_key_direction // by('name', asc)
| order_direction // by(asc) or by(desc)
| anonymous_traversal // by(__.values('name'))
| string // by('name')
}
order_direction = { "Order.asc" | "Order.desc" | "Order.shuffle" | "asc" | "desc" | "shuffle" }
by_key_direction = { string ~ "," ~ order_direction }
unfold_step = { "unfold" ~ "(" ~ ")" }
fold_step = { "fold" ~ "(" ~ ")" }
count_step = { "count" ~ "(" ~ ")" }
sum_step = { "sum" ~ "(" ~ ")" }
max_step = { "max" ~ "(" ~ ")" }
min_step = { "min" ~ "(" ~ ")" }
mean_step = { "mean" ~ "(" ~ ")" }
group_step = { "group" ~ "(" ~ ")" }
group_count_step = { "groupCount" ~ "(" ~ ")" }
order_step = { "order" ~ "(" ~ ")" }
math_step = { "math" ~ "(" ~ string ~ ")" }
constant_step = { "constant" ~ "(" ~ value ~ ")" }
identity_step = { "identity" ~ "(" ~ ")" }
index_step = { "index" ~ "(" ~ ")" }
loops_step = { "loops" ~ "(" ~ ")" }
// spec-55c: textScore() reads the BM25 relevance score from the traverser
// sack populated by searchTextV/searchTextE. Emits a Float Value per traverser.
text_score_step = { "textScore" ~ "(" ~ ")" }
// ============================================================
// Branch Steps
// ============================================================
// choose() - multiple forms
choose_step = { "choose" ~ "(" ~ choose_args ~ ")" }
choose_args = {
choose_if_then_else // choose(cond, true_trav, false_trav)
| choose_predicate // choose(P.gt(25))
| choose_by_traversal // choose(__.values('type'))
}
choose_if_then_else = { anonymous_traversal ~ "," ~ anonymous_traversal ~ "," ~ anonymous_traversal }
choose_by_traversal = { anonymous_traversal }
choose_predicate = { predicate }
union_step = { "union" ~ "(" ~ anonymous_traversal ~ ("," ~ anonymous_traversal)* ~ ")" }
coalesce_step = { "coalesce" ~ "(" ~ anonymous_traversal ~ ("," ~ anonymous_traversal)* ~ ")" }
optional_step = { "optional" ~ "(" ~ anonymous_traversal ~ ")" }
local_step = { "local" ~ "(" ~ anonymous_traversal ~ ")" }
branch_step = { "branch" ~ "(" ~ anonymous_traversal ~ ")" }
option_step = { "option" ~ "(" ~ option_args ~ ")" }
option_args = {
option_none // option(none, __.identity())
| option_key_value // option('a', __.out())
}
option_none = { "none" ~ "," ~ anonymous_traversal }
option_key_value = { value ~ "," ~ anonymous_traversal }
// ============================================================
// Repeat Steps
// ============================================================
repeat_step = { "repeat" ~ "(" ~ anonymous_traversal ~ ")" }
times_step = { "times" ~ "(" ~ integer ~ ")" }
until_step = { "until" ~ "(" ~ anonymous_traversal ~ ")" }
emit_step = { "emit" ~ "(" ~ anonymous_traversal? ~ ")" }
// ============================================================
// Side Effect Steps
// ============================================================
as_step = { "as" ~ "(" ~ string ~ ")" }
aggregate_step = { "aggregate" ~ "(" ~ string ~ ")" }
store_step = { "store" ~ "(" ~ string ~ ")" }
cap_step = { "cap" ~ "(" ~ string ~ ("," ~ string)* ~ ")" }
side_effect_step = { "sideEffect" ~ "(" ~ anonymous_traversal ~ ")" }
profile_step = { "profile" ~ "(" ~ string? ~ ")" }
// ============================================================
// Mutation Steps
// ============================================================
add_v_inline_step = { "addV" ~ "(" ~ string ~ ")" }
add_e_inline_step = { "addE" ~ "(" ~ string ~ ")" }
property_step = { "property" ~ "(" ~ property_args ~ ")" }
property_args = {
property_cardinality // property(Cardinality.single, 'key', value)
| property_key_value // property('key', value)
}
property_cardinality = { cardinality ~ "," ~ string ~ "," ~ value }
property_key_value = { string ~ "," ~ value }
cardinality = { "Cardinality.single" | "Cardinality.list" | "Cardinality.set" | "single" | "list" | "set" }
from_step = { "from" ~ "(" ~ from_to_arg ~ ")" }
to_step = { "to" ~ "(" ~ from_to_arg ~ ")" }
// from/to can accept: anonymous traversal, string label, variable reference, or value (id)
from_to_arg = { anonymous_traversal | string | variable_ref | value }
drop_step = { "drop" ~ "(" ~ ")" }
// ============================================================
// Algorithm Steps
// ============================================================
// shortestPath(targetId) - unweighted shortest path; followed by .by('weight') for Dijkstra
shortest_path_step = { "shortestPath" ~ "(" ~ value ~ ")" }
// kShortestPaths(targetId, k) - Yen's k-shortest paths; followed by .by('weight')
k_shortest_paths_step = { "kShortestPaths" ~ "(" ~ value ~ "," ~ integer ~ ")" }
// bfs() - breadth-first search from current vertex
bfs_traversal_step = { "bfs" ~ "(" ~ ")" }
// dfs() - depth-first search from current vertex
dfs_traversal_step = { "dfs" ~ "(" ~ ")" }
// bidirectionalBfs(targetId) - bidirectional BFS shortest path
bidirectional_bfs_step = { "bidirectionalBfs" ~ "(" ~ value ~ ")" }
// iddfs(targetId, maxDepth) - iterative deepening DFS
iddfs_step = { "iddfs" ~ "(" ~ value ~ "," ~ integer ~ ")" }
// with('key', value) - configuration modulator for algorithm steps
with_step = { "with" ~ "(" ~ string ~ "," ~ value ~ ")" }
// ============================================================
// Terminal Steps
// ============================================================
terminal_step = { "." ~ terminal_body }
terminal_body = {
next_step | to_list_step | to_set_step | iterate_step | has_next_step | explain_step
}
next_step = { "next" ~ "(" ~ integer? ~ ")" }
to_list_step = { "toList" ~ "(" ~ ")" }
to_set_step = { "toSet" ~ "(" ~ ")" }
iterate_step = { "iterate" ~ "(" ~ ")" }
has_next_step = { "hasNext" ~ "(" ~ ")" }
explain_step = { "explain" ~ "(" ~ ")" }
// ============================================================
// Predicates
// ============================================================
predicate = { geo_predicate | p_predicate | text_p_predicate }
// P.eq(), P.neq(), P.lt(), etc.
p_predicate = { "P" ~ "." ~ p_method }
p_method = {
p_eq | p_neq | p_lt | p_lte | p_gt | p_gte |
p_between | p_inside | p_outside |
p_within | p_without |
p_and | p_or | p_not
}
p_eq = { "eq" ~ "(" ~ value ~ ")" }
p_neq = { "neq" ~ "(" ~ value ~ ")" }
p_lt = { "lt" ~ "(" ~ value ~ ")" }
p_lte = { "lte" ~ "(" ~ value ~ ")" }
p_gt = { "gt" ~ "(" ~ value ~ ")" }
p_gte = { "gte" ~ "(" ~ value ~ ")" }
p_between = { "between" ~ "(" ~ value ~ "," ~ value ~ ")" }
p_inside = { "inside" ~ "(" ~ value ~ "," ~ value ~ ")" }
p_outside = { "outside" ~ "(" ~ value ~ "," ~ value ~ ")" }
p_within = { "within" ~ "(" ~ value_list ~ ")" }
p_without = { "without" ~ "(" ~ value_list ~ ")" }
p_and = { "and" ~ "(" ~ predicate ~ "," ~ predicate ~ ")" }
p_or = { "or" ~ "(" ~ predicate ~ "," ~ predicate ~ ")" }
p_not = { "not" ~ "(" ~ predicate ~ ")" }
// TextP.containing(), TextP.startingWith(), etc.
text_p_predicate = { "TextP" ~ "." ~ text_p_method }
text_p_method = {
text_containing | text_not_containing |
text_starting_with | text_not_starting_with |
text_ending_with | text_not_ending_with |
text_regex
}
text_containing = { "containing" ~ "(" ~ string ~ ")" }
text_not_containing = { "notContaining" ~ "(" ~ string ~ ")" }
text_starting_with = { "startingWith" ~ "(" ~ string ~ ")" }
text_not_starting_with = { "notStartingWith" ~ "(" ~ string ~ ")" }
text_ending_with = { "endingWith" ~ "(" ~ string ~ ")" }
text_not_ending_with = { "notEndingWith" ~ "(" ~ string ~ ")" }
text_regex = { "regex" ~ "(" ~ string ~ ")" }
// ============================================================
// Geospatial Predicates (spec-56)
// ============================================================
geo_predicate = {
geo_within_distance
| geo_intersects
| geo_contained_by
| geo_bbox
}
geo_within_distance = { "geo_within_distance" ~ "(" ~ value ~ "," ~ distance_expr ~ ")" }
geo_intersects = { "geo_intersects" ~ "(" ~ value ~ ")" }
geo_contained_by = { "geo_contained_by" ~ "(" ~ value ~ ")" }
geo_bbox = { "geo_bbox" ~ "(" ~ number ~ "," ~ number ~ "," ~ number ~ "," ~ number ~ ")" }
// Number helper (float or integer, used by geo rules)
number = { float | integer }
// Distance expression: 5km, 100m, 3.2mi, 10nmi
distance_expr = ${ (float | integer) ~ distance_unit }
distance_unit = @{ "nmi" | "km" | "mi" | "m" }
// ============================================================
// TextQ DSL (spec-55c)
//
// Builds a structured TextQuery for searchTextV/searchTextE. Maps to the
// `interstellar::storage::text::TextQuery` enum.
//
// TextQ.match('term1 term2') -> TextQuery::Match
// TextQ.matchAll('a b') -> TextQuery::MatchAll
// TextQ.phrase('exact phrase') -> TextQuery::Phrase
// TextQ.prefix('foo') -> TextQuery::Prefix
// TextQ.and(q1, q2, ...) -> TextQuery::And(vec![...])
// TextQ.or(q1, q2, ...) -> TextQuery::Or(vec![...])
// TextQ.not(q) -> TextQuery::Not(Box::new(q))
// ============================================================
text_q_expr = { "TextQ" ~ "." ~ text_q_method }
text_q_method = {
text_q_match_all
| text_q_match
| text_q_phrase
| text_q_prefix
| text_q_and
| text_q_or
| text_q_not
}
text_q_match = { "match" ~ "(" ~ string ~ ")" }
text_q_match_all = { "matchAll" ~ "(" ~ string ~ ")" }
text_q_phrase = { "phrase" ~ "(" ~ string ~ ")" }
text_q_prefix = { "prefix" ~ "(" ~ string ~ ")" }
text_q_and = { "and" ~ "(" ~ text_q_expr ~ ("," ~ text_q_expr)+ ~ ")" }
text_q_or = { "or" ~ "(" ~ text_q_expr ~ ("," ~ text_q_expr)+ ~ ")" }
text_q_not = { "not" ~ "(" ~ text_q_expr ~ ")" }
// ============================================================
// Anonymous Traversal
// ============================================================
// __.out(), __.in(), __.identity(), etc.
// Note: step* allows zero steps (e.g., __ used as identity)
anonymous_traversal = { "__" ~ step* }
// ============================================================
// Values
// ============================================================
value = { geo_point | geo_polygon | float | integer | string | boolean | null | list_value | map_value }
// Geospatial value constructors (spec-56)
geo_point = { "point" ~ "(" ~ number ~ "," ~ number ~ ")" }
geo_polygon = { "polygon" ~ "(" ~ "[" ~ point_pair ~ ("," ~ point_pair)* ~ "]" ~ ")" }
point_pair = { "[" ~ number ~ "," ~ number ~ "]" }
list_value = { "[" ~ (value ~ ("," ~ value)*)? ~ "]" }
// Maps use [:] for empty, and key:value syntax (distinct from list)
map_value = { "[" ~ ":" ~ "]" | "[" ~ map_entry ~ ("," ~ map_entry)* ~ "]" }
map_entry = { (string | identifier) ~ ":" ~ value }