use super::ast::{
EdgeDirection, EdgePattern, GqlLiteral, GqlPredicate, GqlQuery, NodePattern, PathSegment,
};
use crate::gql::GqlTranslateError;
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum Token {
Ident(String),
Str(String),
Int(i64),
Float(f64),
LParen,
RParen,
LBracket,
RBracket,
LBrace,
RBrace,
Colon,
Dot,
Comma,
Eq,
Minus,
Gt,
Lt,
}
pub(crate) fn tokenise(src: &str) -> Result<Vec<(usize, Token)>, GqlTranslateError> {
let chars: Vec<char> = src.chars().collect();
let mut pos = 0usize;
let mut tokens = Vec::new();
let byte_offsets: Vec<usize> = {
let mut offs = vec![0usize];
let mut running = 0usize;
for c in &chars {
running += c.len_utf8();
offs.push(running);
}
offs
};
macro_rules! byte_pos {
($char_idx:expr) => {
byte_offsets[$char_idx]
};
}
while pos < chars.len() {
let c = chars[pos];
if c.is_whitespace() {
pos += 1;
continue;
}
if c == '#' {
while pos < chars.len() && chars[pos] != '\n' {
pos += 1;
}
continue;
}
let start = byte_pos!(pos);
if c == '"' {
pos += 1; let mut s = String::new();
loop {
if pos >= chars.len() {
return Err(GqlTranslateError::ParseError {
pos: byte_pos!(pos),
msg: "Unterminated string literal".to_string(),
});
}
let ch = chars[pos];
if ch == '"' {
pos += 1; break;
}
if ch == '\\' {
pos += 1;
if pos >= chars.len() {
return Err(GqlTranslateError::ParseError {
pos: byte_pos!(pos),
msg: "Escape sequence at end of input".to_string(),
});
}
match chars[pos] {
'n' => s.push('\n'),
't' => s.push('\t'),
'r' => s.push('\r'),
'"' => s.push('"'),
'\\' => s.push('\\'),
other => {
s.push('\\');
s.push(other);
}
}
pos += 1;
} else {
s.push(ch);
pos += 1;
}
}
tokens.push((start, Token::Str(s)));
continue;
}
if c.is_ascii_digit() {
let num_start = pos;
while pos < chars.len() && chars[pos].is_ascii_digit() {
pos += 1;
}
if pos + 1 < chars.len() && chars[pos] == '.' && chars[pos + 1].is_ascii_digit() {
pos += 1; while pos < chars.len() && chars[pos].is_ascii_digit() {
pos += 1;
}
let raw: String = chars[num_start..pos].iter().collect();
let v: f64 = raw.parse().map_err(|_| GqlTranslateError::ParseError {
pos: start,
msg: format!("Invalid float literal: {raw}"),
})?;
tokens.push((start, Token::Float(v)));
} else {
let raw: String = chars[num_start..pos].iter().collect();
let v: i64 = raw.parse().map_err(|_| GqlTranslateError::ParseError {
pos: start,
msg: format!("Invalid integer literal: {raw}"),
})?;
tokens.push((start, Token::Int(v)));
}
continue;
}
if c.is_alphabetic() || c == '_' {
let id_start = pos;
while pos < chars.len() && (chars[pos].is_alphanumeric() || chars[pos] == '_') {
pos += 1;
}
let id: String = chars[id_start..pos].iter().collect();
tokens.push((start, Token::Ident(id)));
continue;
}
let tok = match c {
'(' => Token::LParen,
')' => Token::RParen,
'[' => Token::LBracket,
']' => Token::RBracket,
'{' => Token::LBrace,
'}' => Token::RBrace,
':' => Token::Colon,
'.' => Token::Dot,
',' => Token::Comma,
'=' => Token::Eq,
'-' => Token::Minus,
'>' => Token::Gt,
'<' => Token::Lt,
other => {
return Err(GqlTranslateError::ParseError {
pos: start,
msg: format!("Unexpected character: {other:?}"),
});
}
};
pos += 1;
tokens.push((start, tok));
}
Ok(tokens)
}
pub(crate) struct Parser {
tokens: Vec<(usize, Token)>,
cursor: usize,
anon_counter: usize,
}
impl Parser {
pub(crate) fn new(tokens: Vec<(usize, Token)>) -> Self {
Self {
tokens,
cursor: 0,
anon_counter: 0,
}
}
fn current_pos(&self) -> usize {
self.tokens
.get(self.cursor)
.map(|(p, _)| *p)
.unwrap_or_else(|| {
self.tokens.last().map(|(p, _)| p + 1).unwrap_or(0)
})
}
fn peek(&self) -> Option<&Token> {
self.tokens.get(self.cursor).map(|(_, t)| t)
}
fn peek2(&self) -> Option<&Token> {
self.tokens.get(self.cursor + 1).map(|(_, t)| t)
}
fn advance(&mut self) -> Option<&Token> {
if self.cursor < self.tokens.len() {
let tok = &self.tokens[self.cursor].1;
self.cursor += 1;
Some(tok)
} else {
None
}
}
fn expect(&mut self, expected: &Token) -> Result<(), GqlTranslateError> {
match self.peek() {
Some(t) if std::mem::discriminant(t) == std::mem::discriminant(expected) => {
self.advance();
Ok(())
}
Some(t) => Err(GqlTranslateError::ParseError {
pos: self.current_pos(),
msg: format!("Expected {expected:?}, found {t:?}"),
}),
None => Err(GqlTranslateError::ParseError {
pos: self.current_pos(),
msg: format!("Expected {expected:?}, found end of input"),
}),
}
}
fn expect_keyword(&mut self, kw: &str) -> Result<(), GqlTranslateError> {
match self.peek() {
Some(Token::Ident(id)) if id.to_uppercase() == kw.to_uppercase() => {
self.advance();
Ok(())
}
Some(t) => Err(GqlTranslateError::ParseError {
pos: self.current_pos(),
msg: format!("Expected keyword '{kw}', found {t:?}"),
}),
None => Err(GqlTranslateError::ParseError {
pos: self.current_pos(),
msg: format!("Expected keyword '{kw}', found end of input"),
}),
}
}
fn peek_keyword(&self, kw: &str) -> bool {
matches!(self.peek(), Some(Token::Ident(id)) if id.to_uppercase() == kw.to_uppercase())
}
fn consume_ident(&mut self) -> Result<String, GqlTranslateError> {
match self.peek() {
Some(Token::Ident(_)) => {
if let Some(Token::Ident(id)) = self.advance() {
Ok(id.clone())
} else {
unreachable!("peek confirmed Ident variant")
}
}
Some(t) => {
let msg = format!("Expected identifier, found {t:?}");
let pos = self.current_pos();
Err(GqlTranslateError::ParseError { pos, msg })
}
None => Err(GqlTranslateError::ParseError {
pos: self.current_pos(),
msg: "Expected identifier, found end of input".to_string(),
}),
}
}
fn fresh_anon(&mut self) -> String {
let n = self.anon_counter;
self.anon_counter += 1;
format!("_anon{n}")
}
pub(crate) fn parse_query(&mut self) -> Result<GqlQuery, GqlTranslateError> {
self.expect_keyword("MATCH")?;
let match_pattern = self.parse_graph_pattern()?;
let where_pred = if self.peek_keyword("WHERE") {
self.advance(); Some(self.parse_predicate()?)
} else {
None
};
self.expect_keyword("RETURN")?;
let return_vars = self.parse_return_clause()?;
if self.cursor < self.tokens.len() {
let pos = self.current_pos();
return Err(GqlTranslateError::ParseError {
pos,
msg: format!("Unexpected token after RETURN clause: {:?}", self.peek()),
});
}
Ok(GqlQuery {
match_pattern,
where_pred,
return_vars,
})
}
fn parse_graph_pattern(&mut self) -> Result<Vec<PathSegment>, GqlTranslateError> {
let mut segments = Vec::new();
segments.push(PathSegment::Node(self.parse_node_pattern()?));
loop {
let is_forward_edge = matches!(self.peek(), Some(Token::Minus))
&& matches!(self.peek2(), Some(Token::LBracket));
let is_backward_edge = matches!(self.peek(), Some(Token::Lt))
&& matches!(self.peek2(), Some(Token::Minus));
if !is_forward_edge && !is_backward_edge {
break;
}
segments.push(PathSegment::Edge(self.parse_edge_pattern()?));
segments.push(PathSegment::Node(self.parse_node_pattern()?));
}
Ok(segments)
}
fn parse_node_pattern(&mut self) -> Result<NodePattern, GqlTranslateError> {
self.expect(&Token::LParen)?;
let var = match self.peek() {
Some(Token::Ident(id)) if !self.is_structural_keyword(id) => {
let id = id.clone();
self.advance();
if id == "_" {
Some(self.fresh_anon())
} else {
Some(id)
}
}
_ => None,
};
let label = if matches!(self.peek(), Some(Token::Colon)) {
self.advance(); Some(self.consume_ident()?)
} else {
None
};
let props = if matches!(self.peek(), Some(Token::LBrace)) {
self.parse_prop_filter()?
} else {
Vec::new()
};
self.expect(&Token::RParen)?;
Ok(NodePattern { var, label, props })
}
fn parse_edge_pattern(&mut self) -> Result<EdgePattern, GqlTranslateError> {
let direction = if matches!(self.peek(), Some(Token::Minus)) {
self.advance(); self.expect(&Token::LBracket)?;
EdgeDirection::Forward
} else {
self.expect(&Token::Lt)?;
self.expect(&Token::Minus)?;
self.expect(&Token::LBracket)?;
EdgeDirection::Backward
};
let var = match self.peek() {
Some(Token::Ident(id)) if !self.is_structural_keyword(id) => {
let id = id.clone();
self.advance();
if id == "_" {
Some(self.fresh_anon())
} else {
Some(id)
}
}
_ => None,
};
let label = if matches!(self.peek(), Some(Token::Colon)) {
self.advance(); Some(self.consume_ident()?)
} else {
None
};
self.expect(&Token::RBracket)?;
match direction {
EdgeDirection::Forward => {
self.expect(&Token::Minus)?;
self.expect(&Token::Gt)?;
}
EdgeDirection::Backward => {
self.expect(&Token::Minus)?;
}
}
Ok(EdgePattern {
var,
label,
direction,
})
}
fn parse_prop_filter(&mut self) -> Result<Vec<(String, GqlLiteral)>, GqlTranslateError> {
self.expect(&Token::LBrace)?;
let mut kvs = Vec::new();
if !matches!(self.peek(), Some(Token::RBrace)) {
kvs.push(self.parse_prop_kv()?);
while matches!(self.peek(), Some(Token::Comma)) {
self.advance(); kvs.push(self.parse_prop_kv()?);
}
}
self.expect(&Token::RBrace)?;
Ok(kvs)
}
fn parse_prop_kv(&mut self) -> Result<(String, GqlLiteral), GqlTranslateError> {
let key = self.consume_ident()?;
self.expect(&Token::Colon)?;
let val = self.parse_literal()?;
Ok((key, val))
}
fn parse_predicate(&mut self) -> Result<GqlPredicate, GqlTranslateError> {
let var = self.consume_ident()?;
self.expect(&Token::Dot)?;
let prop = self.consume_ident()?;
self.expect(&Token::Eq)?;
let value = self.parse_literal()?;
Ok(GqlPredicate { var, prop, value })
}
fn parse_return_clause(&mut self) -> Result<Vec<String>, GqlTranslateError> {
let first = self.consume_ident()?;
let mut vars = vec![first];
while matches!(self.peek(), Some(Token::Comma)) {
self.advance(); vars.push(self.consume_ident()?);
}
Ok(vars)
}
fn parse_literal(&mut self) -> Result<GqlLiteral, GqlTranslateError> {
let negate = if matches!(self.peek(), Some(Token::Minus)) {
if matches!(self.peek2(), Some(Token::Int(_)) | Some(Token::Float(_))) {
self.advance(); true
} else {
false
}
} else {
false
};
match self.peek() {
Some(Token::Str(_)) => {
if let Some(Token::Str(s)) = self.advance() {
Ok(GqlLiteral::Str(s.clone()))
} else {
unreachable!()
}
}
Some(Token::Int(_)) => {
if let Some(Token::Int(n)) = self.advance() {
let v = if negate { -n } else { *n };
Ok(GqlLiteral::Int(v))
} else {
unreachable!()
}
}
Some(Token::Float(_)) => {
if let Some(Token::Float(f)) = self.advance() {
let v = if negate { -f } else { *f };
Ok(GqlLiteral::Float(v))
} else {
unreachable!()
}
}
Some(Token::Ident(id)) => {
let id_lower = id.to_lowercase();
let pos = self.current_pos();
self.advance(); match id_lower.as_str() {
"true" => Ok(GqlLiteral::Bool(true)),
"false" => Ok(GqlLiteral::Bool(false)),
other => Err(GqlTranslateError::ParseError {
pos,
msg: format!("Expected literal, found identifier '{other}'"),
}),
}
}
Some(t) => {
let msg = format!("Expected literal, found {t:?}");
let pos = self.current_pos();
Err(GqlTranslateError::ParseError { pos, msg })
}
None => Err(GqlTranslateError::ParseError {
pos: self.current_pos(),
msg: "Expected literal, found end of input".to_string(),
}),
}
}
fn is_structural_keyword(&self, id: &str) -> bool {
matches!(
id.to_uppercase().as_str(),
"MATCH" | "WHERE" | "RETURN" | "TRUE" | "FALSE"
)
}
}
pub fn parse_gql(src: &str) -> Result<GqlQuery, GqlTranslateError> {
let tokens = tokenise(src)?;
let mut parser = Parser::new(tokens);
parser.parse_query()
}