use crate::storage::query::ast::{
CompareOp, EdgeDirection, EdgePattern, FieldRef, Filter, GraphPattern, GraphQuery, NodePattern,
Projection, PropertyFilter, QueryExpr,
};
use crate::storage::schema::Value;
#[derive(Debug, Clone)]
pub struct GremlinError {
pub message: String,
pub position: usize,
}
impl std::fmt::Display for GremlinError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Gremlin error at {}: {}", self.position, self.message)
}
}
impl std::error::Error for GremlinError {}
#[derive(Debug, Clone)]
pub struct GremlinTraversal {
pub source: TraversalSource,
pub steps: Vec<GremlinStep>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TraversalSource {
Graph,
Anonymous,
}
#[derive(Debug, Clone)]
pub enum GremlinStep {
V(Option<String>), E(Option<String>),
Out(Option<String>), In(Option<String>), Both(Option<String>), OutE(Option<String>), InE(Option<String>), BothE(Option<String>), OutV, InV, BothV, OtherV,
Has(String, Option<GremlinValue>), HasNot(String), HasLabel(String), HasId(String), Where(Box<GremlinTraversal>), Filter(Box<GremlinTraversal>), Dedup, Limit(u64), Skip(u64), Range(u64, u64),
Values(Vec<String>), ValueMap(Vec<String>), Id, Label, Properties(Vec<String>), Count, Sum, Min, Max, Mean, Select(Vec<String>), Project(Vec<String>), Path, SimplePath, CyclicPath,
Repeat(Box<GremlinTraversal>), Times(u32), Until(Box<GremlinTraversal>), Emit, Union(Vec<GremlinTraversal>), Choose(
Box<GremlinTraversal>,
Box<GremlinTraversal>,
Option<Box<GremlinTraversal>>,
),
Coalesce(Vec<GremlinTraversal>),
As(String), By(ByModifier), Aggregate(String), Store(String), Group, GroupCount,
ToList, ToSet, Next, Fold, }
#[derive(Debug, Clone, PartialEq)]
pub enum GremlinValue {
String(String),
Integer(i64),
Float(f64),
Boolean(bool),
Predicate(GremlinPredicate),
}
#[derive(Debug, Clone, PartialEq)]
pub enum GremlinPredicate {
Eq(Box<GremlinValue>), Neq(Box<GremlinValue>), Lt(Box<GremlinValue>), Lte(Box<GremlinValue>), Gt(Box<GremlinValue>), Gte(Box<GremlinValue>), Between(Box<GremlinValue>, Box<GremlinValue>), Inside(Box<GremlinValue>, Box<GremlinValue>), Outside(Box<GremlinValue>, Box<GremlinValue>), Within(Vec<GremlinValue>), Without(Vec<GremlinValue>), StartingWith(String), EndingWith(String), Containing(String), Regex(String), }
#[derive(Debug, Clone)]
pub enum ByModifier {
Key(String),
Traversal(Box<GremlinTraversal>),
Order(OrderDirection),
}
#[derive(Debug, Clone)]
pub enum OrderDirection {
Asc,
Desc,
}
pub struct GremlinParser<'a> {
input: &'a str,
pos: usize,
}
impl<'a> GremlinParser<'a> {
pub fn new(input: &'a str) -> Self {
Self { input, pos: 0 }
}
pub fn parse(input: &str) -> Result<GremlinTraversal, GremlinError> {
let mut parser = GremlinParser::new(input);
parser.parse_traversal()
}
fn parse_traversal(&mut self) -> Result<GremlinTraversal, GremlinError> {
self.skip_whitespace();
let source = if self.consume_if("g.") {
TraversalSource::Graph
} else if self.consume_if("__.") {
TraversalSource::Anonymous
} else if self.consume_if("__") {
TraversalSource::Anonymous
} else {
return Err(self.error("Expected 'g.' or '__' at start of traversal"));
};
let mut steps = Vec::new();
loop {
self.skip_whitespace();
if self.is_at_end() || self.peek() == Some(')') || self.peek() == Some(',') {
break;
}
self.consume_if(".");
self.skip_whitespace();
if self.is_at_end() || self.peek() == Some(')') || self.peek() == Some(',') {
break;
}
let step = self.parse_step()?;
steps.push(step);
}
Ok(GremlinTraversal { source, steps })
}
fn parse_step(&mut self) -> Result<GremlinStep, GremlinError> {
let name = self.parse_identifier()?;
match name.as_str() {
"V" => {
self.expect('(')?;
let id = self.parse_optional_string_arg()?;
self.expect(')')?;
Ok(GremlinStep::V(id))
}
"E" => {
self.expect('(')?;
let id = self.parse_optional_string_arg()?;
self.expect(')')?;
Ok(GremlinStep::E(id))
}
"out" => {
self.expect('(')?;
let label = self.parse_optional_string_arg()?;
self.expect(')')?;
Ok(GremlinStep::Out(label))
}
"in" => {
self.expect('(')?;
let label = self.parse_optional_string_arg()?;
self.expect(')')?;
Ok(GremlinStep::In(label))
}
"both" => {
self.expect('(')?;
let label = self.parse_optional_string_arg()?;
self.expect(')')?;
Ok(GremlinStep::Both(label))
}
"outE" => {
self.expect('(')?;
let label = self.parse_optional_string_arg()?;
self.expect(')')?;
Ok(GremlinStep::OutE(label))
}
"inE" => {
self.expect('(')?;
let label = self.parse_optional_string_arg()?;
self.expect(')')?;
Ok(GremlinStep::InE(label))
}
"bothE" => {
self.expect('(')?;
let label = self.parse_optional_string_arg()?;
self.expect(')')?;
Ok(GremlinStep::BothE(label))
}
"outV" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::OutV)
}
"inV" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::InV)
}
"bothV" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::BothV)
}
"otherV" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::OtherV)
}
"has" => {
self.expect('(')?;
let key = self.parse_string()?;
self.skip_whitespace();
let value = if self.consume_if(",") {
self.skip_whitespace();
Some(self.parse_value()?)
} else {
None
};
self.expect(')')?;
Ok(GremlinStep::Has(key, value))
}
"hasNot" => {
self.expect('(')?;
let key = self.parse_string()?;
self.expect(')')?;
Ok(GremlinStep::HasNot(key))
}
"hasLabel" => {
self.expect('(')?;
let label = self.parse_string()?;
self.expect(')')?;
Ok(GremlinStep::HasLabel(label))
}
"hasId" => {
self.expect('(')?;
let id = self.parse_string()?;
self.expect(')')?;
Ok(GremlinStep::HasId(id))
}
"dedup" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Dedup)
}
"limit" => {
self.expect('(')?;
let n = self.parse_integer()? as u64;
self.expect(')')?;
Ok(GremlinStep::Limit(n))
}
"skip" => {
self.expect('(')?;
let n = self.parse_integer()? as u64;
self.expect(')')?;
Ok(GremlinStep::Skip(n))
}
"range" => {
self.expect('(')?;
let from = self.parse_integer()? as u64;
self.expect(',')?;
self.skip_whitespace();
let to = self.parse_integer()? as u64;
self.expect(')')?;
Ok(GremlinStep::Range(from, to))
}
"values" => {
self.expect('(')?;
let keys = self.parse_string_list()?;
self.expect(')')?;
Ok(GremlinStep::Values(keys))
}
"valueMap" => {
self.expect('(')?;
let keys = self.parse_string_list()?;
self.expect(')')?;
Ok(GremlinStep::ValueMap(keys))
}
"id" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Id)
}
"label" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Label)
}
"properties" => {
self.expect('(')?;
let keys = self.parse_string_list()?;
self.expect(')')?;
Ok(GremlinStep::Properties(keys))
}
"count" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Count)
}
"sum" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Sum)
}
"min" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Min)
}
"max" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Max)
}
"mean" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Mean)
}
"select" => {
self.expect('(')?;
let labels = self.parse_string_list()?;
self.expect(')')?;
Ok(GremlinStep::Select(labels))
}
"project" => {
self.expect('(')?;
let keys = self.parse_string_list()?;
self.expect(')')?;
Ok(GremlinStep::Project(keys))
}
"path" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Path)
}
"simplePath" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::SimplePath)
}
"cyclicPath" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::CyclicPath)
}
"repeat" => {
self.expect('(')?;
let inner = self.parse_inner_traversal()?;
self.expect(')')?;
Ok(GremlinStep::Repeat(Box::new(inner)))
}
"times" => {
self.expect('(')?;
let n = self.parse_integer()? as u32;
self.expect(')')?;
Ok(GremlinStep::Times(n))
}
"until" => {
self.expect('(')?;
let inner = self.parse_inner_traversal()?;
self.expect(')')?;
Ok(GremlinStep::Until(Box::new(inner)))
}
"emit" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Emit)
}
"as" => {
self.expect('(')?;
let label = self.parse_string()?;
self.expect(')')?;
Ok(GremlinStep::As(label))
}
"aggregate" => {
self.expect('(')?;
let label = self.parse_string()?;
self.expect(')')?;
Ok(GremlinStep::Aggregate(label))
}
"store" => {
self.expect('(')?;
let label = self.parse_string()?;
self.expect(')')?;
Ok(GremlinStep::Store(label))
}
"group" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Group)
}
"groupCount" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::GroupCount)
}
"toList" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::ToList)
}
"toSet" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::ToSet)
}
"next" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Next)
}
"fold" => {
self.expect('(')?;
self.expect(')')?;
Ok(GremlinStep::Fold)
}
_ => Err(self.error(&format!("Unknown step: {}", name))),
}
}
fn parse_inner_traversal(&mut self) -> Result<GremlinTraversal, GremlinError> {
self.skip_whitespace();
if self.input[self.pos..].starts_with("__") || self.input[self.pos..].starts_with("g.") {
return self.parse_traversal();
}
let mut steps = Vec::new();
loop {
self.skip_whitespace();
if self.is_at_end() || self.peek() == Some(')') {
break;
}
self.consume_if(".");
self.skip_whitespace();
if self.is_at_end() || self.peek() == Some(')') {
break;
}
let step = self.parse_step()?;
steps.push(step);
}
Ok(GremlinTraversal {
source: TraversalSource::Anonymous,
steps,
})
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.peek() {
if c.is_whitespace() {
self.pos += 1;
} else {
break;
}
}
}
fn peek(&self) -> Option<char> {
self.input[self.pos..].chars().next()
}
fn is_at_end(&self) -> bool {
self.pos >= self.input.len()
}
fn consume_if(&mut self, s: &str) -> bool {
if self.input[self.pos..].starts_with(s) {
self.pos += s.len();
true
} else {
false
}
}
fn expect(&mut self, c: char) -> Result<(), GremlinError> {
self.skip_whitespace();
if self.peek() == Some(c) {
self.pos += 1;
Ok(())
} else {
Err(self.error(&format!("Expected '{}', found {:?}", c, self.peek())))
}
}
fn parse_identifier(&mut self) -> Result<String, GremlinError> {
self.skip_whitespace();
let start = self.pos;
while let Some(c) = self.peek() {
if c.is_alphanumeric() || c == '_' {
self.pos += 1;
} else {
break;
}
}
if self.pos == start {
Err(self.error("Expected identifier"))
} else {
Ok(self.input[start..self.pos].to_string())
}
}
fn parse_string(&mut self) -> Result<String, GremlinError> {
self.skip_whitespace();
let quote = self.peek();
if quote != Some('\'') && quote != Some('"') {
return Err(self.error("Expected string"));
}
self.pos += 1;
let start = self.pos;
while let Some(c) = self.peek() {
if Some(c) == quote {
let s = self.input[start..self.pos].to_string();
self.pos += 1;
return Ok(s);
}
if c == '\\' {
self.pos += 2; } else {
self.pos += 1;
}
}
Err(self.error("Unterminated string"))
}
fn parse_optional_string_arg(&mut self) -> Result<Option<String>, GremlinError> {
self.skip_whitespace();
if self.peek() == Some(')') {
Ok(None)
} else if self.peek() == Some('\'') || self.peek() == Some('"') {
Ok(Some(self.parse_string()?))
} else {
let start = self.pos;
while let Some(c) = self.peek() {
if c.is_alphanumeric() || c == '_' || c == ':' || c == '.' || c == '-' {
self.pos += 1;
} else {
break;
}
}
if self.pos > start {
Ok(Some(self.input[start..self.pos].to_string()))
} else {
Ok(None)
}
}
}
fn parse_string_list(&mut self) -> Result<Vec<String>, GremlinError> {
let mut result = Vec::new();
self.skip_whitespace();
if self.peek() == Some(')') {
return Ok(result);
}
loop {
self.skip_whitespace();
if self.peek() == Some(')') {
break;
}
result.push(self.parse_string()?);
self.skip_whitespace();
if !self.consume_if(",") {
break;
}
}
Ok(result)
}
fn parse_integer(&mut self) -> Result<i64, GremlinError> {
self.skip_whitespace();
let start = self.pos;
if self.peek() == Some('-') {
self.pos += 1;
}
while let Some(c) = self.peek() {
if c.is_ascii_digit() {
self.pos += 1;
} else {
break;
}
}
let s = &self.input[start..self.pos];
s.parse()
.map_err(|_| self.error(&format!("Invalid integer: {}", s)))
}
fn parse_value(&mut self) -> Result<GremlinValue, GremlinError> {
self.skip_whitespace();
if self.peek() == Some('\'') || self.peek() == Some('"') {
return Ok(GremlinValue::String(self.parse_string()?));
}
if self.consume_if("true") {
return Ok(GremlinValue::Boolean(true));
}
if self.consume_if("false") {
return Ok(GremlinValue::Boolean(false));
}
let start = self.pos;
if self.peek() == Some('-') {
self.pos += 1;
}
while let Some(c) = self.peek() {
if c.is_ascii_digit() || c == '.' {
self.pos += 1;
} else {
break;
}
}
let s = &self.input[start..self.pos];
if s.contains('.') {
let f: f64 = s
.parse()
.map_err(|_| self.error(&format!("Invalid float: {}", s)))?;
Ok(GremlinValue::Float(f))
} else {
let i: i64 = s
.parse()
.map_err(|_| self.error(&format!("Invalid integer: {}", s)))?;
Ok(GremlinValue::Integer(i))
}
}
fn error(&self, message: &str) -> GremlinError {
GremlinError {
message: message.to_string(),
position: self.pos,
}
}
}
impl GremlinTraversal {
pub fn to_query_expr(&self) -> QueryExpr {
let mut nodes = Vec::new();
let mut edges = Vec::new();
let mut filters = Vec::new();
let mut projections = Vec::new();
let mut current_alias = "n0".to_string();
let mut alias_counter = 0;
for step in &self.steps {
match step {
GremlinStep::V(id) => {
let mut node = NodePattern {
alias: current_alias.clone(),
node_label: None,
properties: Vec::new(),
};
if let Some(id) = id {
node.properties.push(PropertyFilter {
name: "id".to_string(),
op: CompareOp::Eq,
value: Value::text(id.clone()),
});
}
nodes.push(node);
}
GremlinStep::HasLabel(label) => {
if let Some(last) = nodes.last_mut() {
let lower = label.to_lowercase();
last.node_label = Some(match lower.as_str() {
"vuln" => "vulnerability".to_string(),
"tech" => "technology".to_string(),
"cert" => "certificate".to_string(),
_ => lower,
});
}
}
GremlinStep::Has(key, value) => {
let field_ref = FieldRef::NodeProperty {
alias: current_alias.clone(),
property: key.clone(),
};
let filter = if let Some(val) = value {
Filter::Compare {
field: field_ref,
op: CompareOp::Eq,
value: match val {
GremlinValue::String(s) => Value::text(s.clone()),
GremlinValue::Integer(i) => Value::Integer(*i),
GremlinValue::Float(f) => Value::Float(*f),
GremlinValue::Boolean(b) => Value::Boolean(*b),
GremlinValue::Predicate(_) => Value::Null, },
}
} else {
Filter::IsNotNull(field_ref)
};
filters.push(filter);
}
GremlinStep::Out(label) | GremlinStep::In(label) | GremlinStep::Both(label) => {
alias_counter += 1;
let new_alias = format!("n{}", alias_counter);
let direction = match step {
GremlinStep::Out(_) => EdgeDirection::Outgoing,
GremlinStep::In(_) => EdgeDirection::Incoming,
GremlinStep::Both(_) => EdgeDirection::Both,
_ => EdgeDirection::Outgoing,
};
let edge_label = label.as_ref().map(|l| {
let lower = l.to_lowercase();
match lower.as_str() {
"hasservice" => "has_service".to_string(),
"hasendpoint" => "has_endpoint".to_string(),
"usestech" => "uses_tech".to_string(),
"authaccess" => "auth_access".to_string(),
"affectedby" => "affected_by".to_string(),
"connectsto" | "connects" => "connects_to".to_string(),
"relatedto" => "related_to".to_string(),
"hasuser" => "has_user".to_string(),
"hascert" => "has_cert".to_string(),
_ => lower,
}
});
edges.push(EdgePattern {
alias: None,
from: current_alias.clone(),
to: new_alias.clone(),
edge_label,
direction,
min_hops: 1,
max_hops: 1,
});
nodes.push(NodePattern {
alias: new_alias.clone(),
node_label: None,
properties: Vec::new(),
});
current_alias = new_alias;
}
GremlinStep::Limit(_n) => {
}
GremlinStep::Values(keys) => {
for key in keys {
projections.push(Projection::from_field(FieldRef::NodeProperty {
alias: current_alias.clone(),
property: key.clone(),
}));
}
}
GremlinStep::Count => {
projections.push(Projection::Field(
FieldRef::NodeId {
alias: current_alias.clone(),
},
Some("count".to_string()),
));
}
GremlinStep::As(label) => {
if let Some(last) = nodes.last_mut() {
last.alias = label.clone();
current_alias = label.clone();
}
}
_ => {}
}
}
if projections.is_empty() {
projections.push(Projection::from_field(FieldRef::NodeId {
alias: current_alias.clone(),
}));
}
let combined_filter = if filters.is_empty() {
None
} else {
let mut iter = filters.into_iter();
let first = iter.next().unwrap();
Some(iter.fold(first, |acc, f| Filter::And(Box::new(acc), Box::new(f))))
};
QueryExpr::Graph(GraphQuery {
alias: None,
pattern: GraphPattern { nodes, edges },
filter: combined_filter,
return_: projections,
limit: None,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_v() {
let t = GremlinParser::parse("g.V()").unwrap();
assert_eq!(t.source, TraversalSource::Graph);
assert_eq!(t.steps.len(), 1);
assert!(matches!(t.steps[0], GremlinStep::V(None)));
}
#[test]
fn test_parse_v_with_id() {
let t = GremlinParser::parse("g.V('host:10.0.0.1')").unwrap();
assert!(matches!(&t.steps[0], GremlinStep::V(Some(id)) if id == "host:10.0.0.1"));
}
#[test]
fn test_parse_has_label() {
let t = GremlinParser::parse("g.V().hasLabel('host')").unwrap();
assert_eq!(t.steps.len(), 2);
assert!(matches!(&t.steps[1], GremlinStep::HasLabel(l) if l == "host"));
}
#[test]
fn test_parse_has_key_value() {
let t = GremlinParser::parse("g.V().has('name', 'alice')").unwrap();
assert!(matches!(
&t.steps[1],
GremlinStep::Has(k, Some(GremlinValue::String(v))) if k == "name" && v == "alice"
));
}
#[test]
fn test_parse_out() {
let t = GremlinParser::parse("g.V().out('knows')").unwrap();
assert!(matches!(&t.steps[1], GremlinStep::Out(Some(l)) if l == "knows"));
}
#[test]
fn test_parse_chain() {
let t =
GremlinParser::parse("g.V().hasLabel('host').out('connects').has('port', 22)").unwrap();
assert_eq!(t.steps.len(), 4);
}
#[test]
fn test_parse_limit() {
let t = GremlinParser::parse("g.V().limit(10)").unwrap();
assert!(matches!(t.steps[1], GremlinStep::Limit(10)));
}
#[test]
fn test_parse_count() {
let t = GremlinParser::parse("g.V().count()").unwrap();
assert!(matches!(t.steps[1], GremlinStep::Count));
}
#[test]
fn test_parse_repeat_times() {
let t = GremlinParser::parse("g.V().repeat(out()).times(3)").unwrap();
assert_eq!(t.steps.len(), 3);
assert!(matches!(&t.steps[1], GremlinStep::Repeat(_)));
assert!(matches!(t.steps[2], GremlinStep::Times(3)));
}
#[test]
fn test_parse_anonymous() {
let t = GremlinParser::parse("__.out('knows')").unwrap();
assert_eq!(t.source, TraversalSource::Anonymous);
assert!(matches!(&t.steps[0], GremlinStep::Out(Some(l)) if l == "knows"));
}
#[test]
fn test_parse_values() {
let t = GremlinParser::parse("g.V().values('name', 'age')").unwrap();
assert!(matches!(&t.steps[1], GremlinStep::Values(keys) if keys.len() == 2));
}
#[test]
fn test_to_query_expr() {
let t = GremlinParser::parse("g.V().hasLabel('host').out('connects').limit(10)").unwrap();
let expr = t.to_query_expr();
assert!(matches!(expr, QueryExpr::Graph(_)));
}
}