use std::fmt;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Match,
Where,
Return,
Order,
By,
Limit,
Skip,
Distinct,
Count,
Asc,
Desc,
And,
Or,
Not,
In,
Is,
Null,
True,
False,
Similar,
To,
Using,
Find,
Rank,
Similarity,
Top,
As,
Of,
Between,
Exists,
Contains,
Starts,
Ends,
With,
Cosine,
Euclidean,
DotProduct,
LeftParen,
RightParen,
LeftBracket,
RightBracket,
LeftBrace,
RightBrace,
Colon,
Comma,
Dot,
Star,
Dash,
Arrow,
LeftArrow,
Eq,
Ne,
Lt,
Le,
Gt,
Ge,
Identifier(String),
StringLiteral(String),
IntegerLiteral(i64),
FloatLiteral(f64),
Parameter(String),
Eof,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Token::Match => write!(f, "MATCH"),
Token::Where => write!(f, "WHERE"),
Token::Return => write!(f, "RETURN"),
Token::Order => write!(f, "ORDER"),
Token::By => write!(f, "BY"),
Token::Limit => write!(f, "LIMIT"),
Token::Skip => write!(f, "SKIP"),
Token::Distinct => write!(f, "DISTINCT"),
Token::Count => write!(f, "COUNT"),
Token::Asc => write!(f, "ASC"),
Token::Desc => write!(f, "DESC"),
Token::And => write!(f, "AND"),
Token::Or => write!(f, "OR"),
Token::Not => write!(f, "NOT"),
Token::In => write!(f, "IN"),
Token::Is => write!(f, "IS"),
Token::Null => write!(f, "NULL"),
Token::True => write!(f, "TRUE"),
Token::False => write!(f, "FALSE"),
Token::Similar => write!(f, "SIMILAR"),
Token::To => write!(f, "TO"),
Token::Using => write!(f, "USING"),
Token::Find => write!(f, "FIND"),
Token::Rank => write!(f, "RANK"),
Token::Similarity => write!(f, "SIMILARITY"),
Token::Top => write!(f, "TOP"),
Token::As => write!(f, "AS"),
Token::Of => write!(f, "OF"),
Token::Between => write!(f, "BETWEEN"),
Token::Exists => write!(f, "EXISTS"),
Token::Contains => write!(f, "CONTAINS"),
Token::Starts => write!(f, "STARTS"),
Token::Ends => write!(f, "ENDS"),
Token::With => write!(f, "WITH"),
Token::Cosine => write!(f, "COSINE"),
Token::Euclidean => write!(f, "EUCLIDEAN"),
Token::DotProduct => write!(f, "DOT_PRODUCT"),
Token::LeftParen => write!(f, "("),
Token::RightParen => write!(f, ")"),
Token::LeftBracket => write!(f, "["),
Token::RightBracket => write!(f, "]"),
Token::LeftBrace => write!(f, "{{"),
Token::RightBrace => write!(f, "}}"),
Token::Colon => write!(f, ":"),
Token::Comma => write!(f, ","),
Token::Dot => write!(f, "."),
Token::Star => write!(f, "*"),
Token::Dash => write!(f, "-"),
Token::Arrow => write!(f, "->"),
Token::LeftArrow => write!(f, "<-"),
Token::Eq => write!(f, "="),
Token::Ne => write!(f, "<>"),
Token::Lt => write!(f, "<"),
Token::Le => write!(f, "<="),
Token::Gt => write!(f, ">"),
Token::Ge => write!(f, ">="),
Token::Identifier(s) => write!(f, "{}", s),
Token::StringLiteral(s) => write!(f, "'{}'", s),
Token::IntegerLiteral(n) => write!(f, "{}", n),
Token::FloatLiteral(n) => write!(f, "{}", n),
Token::Parameter(s) => write!(f, "${}", s),
Token::Eof => write!(f, "EOF"),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct LexerError {
pub message: String,
pub position: usize,
pub line: usize,
pub column: usize,
}
impl fmt::Display for LexerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"Lexer error at line {}, column {}: {}",
self.line, self.column, self.message
)
}
}
impl std::error::Error for LexerError {}
pub struct Lexer<'a> {
input: &'a str,
chars: std::iter::Peekable<std::str::CharIndices<'a>>,
position: usize,
line: usize,
column: usize,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
Lexer {
input,
chars: input.char_indices().peekable(),
position: 0,
line: 1,
column: 1,
}
}
pub fn tokenize(input: &str) -> Result<Vec<Token>, LexerError> {
let mut lexer = Lexer::new(input);
let mut tokens = Vec::with_capacity(input.len() / 4);
loop {
let token = lexer.next_token()?;
let is_eof = token == Token::Eof;
tokens.push(token);
if is_eof {
break;
}
}
Ok(tokens)
}
pub fn next_token(&mut self) -> Result<Token, LexerError> {
self.skip_whitespace_and_comments()?;
let Some(&(pos, ch)) = self.chars.peek() else {
return Ok(Token::Eof);
};
self.position = pos;
match ch {
'(' => {
self.advance();
Ok(Token::LeftParen)
}
')' => {
self.advance();
Ok(Token::RightParen)
}
'[' => {
self.advance();
Ok(Token::LeftBracket)
}
']' => {
self.advance();
Ok(Token::RightBracket)
}
'{' => {
self.advance();
Ok(Token::LeftBrace)
}
'}' => {
self.advance();
Ok(Token::RightBrace)
}
':' => {
self.advance();
Ok(Token::Colon)
}
',' => {
self.advance();
Ok(Token::Comma)
}
'.' => {
self.advance();
Ok(Token::Dot)
}
'*' => {
self.advance();
Ok(Token::Star)
}
'=' => {
self.advance();
Ok(Token::Eq)
}
'-' => self.read_dash_or_arrow(),
'<' => self.read_less_than(),
'>' => self.read_greater_than(),
'!' => self.read_not_equal(),
'\'' | '"' => self.read_string(),
'$' => self.read_parameter(),
'0'..='9' => self.read_number(),
'a'..='z' | 'A'..='Z' | '_' => self.read_identifier_or_keyword(),
_ => Err(self.error(format!("Unexpected character: '{}'", ch))),
}
}
fn advance(&mut self) -> Option<(usize, char)> {
let result = self.chars.next();
if let Some((_, ch)) = result {
if ch == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
result
}
fn skip_whitespace_and_comments(&mut self) -> Result<(), LexerError> {
loop {
while let Some(&(_, ch)) = self.chars.peek() {
if ch.is_whitespace() {
self.advance();
} else {
break;
}
}
if let Some(&(idx, ch)) = self.chars.peek() {
if ch == '-' {
if self.input.as_bytes().get(idx + 1) == Some(&b'-') {
self.advance(); self.advance(); while let Some(&(_, ch)) = self.chars.peek() {
if ch == '\n' {
self.advance();
break;
}
self.advance();
}
continue;
}
} else if ch == '/' {
if let Some(&next_byte) = self.input.as_bytes().get(idx + 1) {
if next_byte == b'/' {
self.advance(); self.advance(); while let Some(&(_, ch)) = self.chars.peek() {
if ch == '\n' {
self.advance();
break;
}
self.advance();
}
continue;
} else if next_byte == b'*' {
self.advance(); self.advance(); let mut found_end = false;
loop {
match self.advance() {
Some((_, '*')) => {
if let Some(&(_, '/')) = self.chars.peek() {
self.advance();
found_end = true;
break;
}
}
None => break,
_ => {}
}
}
if !found_end {
return Err(self.error("Unterminated block comment".to_string()));
}
continue;
}
}
}
}
break;
}
Ok(())
}
fn read_dash_or_arrow(&mut self) -> Result<Token, LexerError> {
self.advance();
if let Some(&(_, ch)) = self.chars.peek() {
match ch {
'>' => {
self.advance();
return Ok(Token::Arrow);
}
'[' | '(' => {
return Ok(Token::Dash);
}
'0'..='9' => {
return self.read_negative_number();
}
_ => {}
}
}
Ok(Token::Dash)
}
fn read_less_than(&mut self) -> Result<Token, LexerError> {
self.advance();
if let Some(&(_, ch)) = self.chars.peek() {
match ch {
'=' => {
self.advance();
return Ok(Token::Le);
}
'>' => {
self.advance();
return Ok(Token::Ne);
}
'-' => {
self.advance();
return Ok(Token::LeftArrow);
}
_ => {}
}
}
Ok(Token::Lt)
}
fn read_greater_than(&mut self) -> Result<Token, LexerError> {
self.advance();
if let Some(&(_, '=')) = self.chars.peek() {
self.advance();
return Ok(Token::Ge);
}
Ok(Token::Gt)
}
fn read_not_equal(&mut self) -> Result<Token, LexerError> {
self.advance();
if let Some(&(_, '=')) = self.chars.peek() {
self.advance();
return Ok(Token::Ne);
}
Err(self.error("Expected '=' after '!'".to_string()))
}
fn read_string(&mut self) -> Result<Token, LexerError> {
let quote = self
.advance()
.map(|(_, c)| c)
.ok_or_else(|| self.error("Unexpected EOF while reading string".to_string()))?;
let mut value = String::new();
loop {
match self.advance() {
Some((_, ch)) if ch == quote => {
if let Some(&(_, next_ch)) = self.chars.peek()
&& next_ch == quote
{
value.push(quote);
self.advance();
continue;
}
break;
}
Some((_, '\\')) => {
match self.advance() {
Some((_, 'n')) => value.push('\n'),
Some((_, 't')) => value.push('\t'),
Some((_, 'r')) => value.push('\r'),
Some((_, '\\')) => value.push('\\'),
Some((_, ch)) if ch == quote => value.push(quote),
Some((_, ch)) => {
value.push('\\');
value.push(ch);
}
None => return Err(self.error("Unterminated string".to_string())),
}
}
Some((_, ch)) => value.push(ch),
None => return Err(self.error("Unterminated string".to_string())),
}
}
Ok(Token::StringLiteral(value))
}
fn read_parameter(&mut self) -> Result<Token, LexerError> {
self.advance(); let mut name = String::new();
while let Some(&(_, ch)) = self.chars.peek() {
if ch.is_alphanumeric() || ch == '_' {
name.push(ch);
self.advance();
} else {
break;
}
}
if name.is_empty() {
return Err(self.error("Expected parameter name after '$'".to_string()));
}
Ok(Token::Parameter(name))
}
fn read_number(&mut self) -> Result<Token, LexerError> {
let start_pos = self.position;
let mut has_dot = false;
let mut has_exp = false;
while let Some(&(_, ch)) = self.chars.peek() {
match ch {
'0'..='9' => {
self.advance();
}
'.' if !has_dot && !has_exp => {
if let Some(&(idx, _)) = self.chars.peek() {
if self
.input
.as_bytes()
.get(idx + 1)
.map(|b| b.is_ascii_digit())
== Some(true)
{
has_dot = true;
self.advance();
} else {
break;
}
} else {
break;
}
}
'e' | 'E' if !has_exp => {
has_exp = true;
has_dot = true; self.advance();
if let Some(&(_, sign)) = self.chars.peek()
&& (sign == '+' || sign == '-')
{
self.advance();
}
}
_ => break,
}
}
let text = &self.input[start_pos..self.position + self.current_offset()];
self.parse_number(text, has_dot)
}
fn read_negative_number(&mut self) -> Result<Token, LexerError> {
if let Some(&(pos, _)) = self.chars.peek() {
self.position = pos;
}
let token = self.read_number()?;
match token {
Token::IntegerLiteral(n) => Ok(Token::IntegerLiteral(-n)),
Token::FloatLiteral(f) => Ok(Token::FloatLiteral(-f)),
_ => Err(self.error("Expected number after '-'".to_string())),
}
}
fn current_offset(&self) -> usize {
self.chars
.clone()
.next()
.map(|(pos, _)| pos - self.position)
.unwrap_or(self.input.len() - self.position)
}
fn parse_number(&self, text: &str, is_float: bool) -> Result<Token, LexerError> {
if is_float {
text.parse::<f64>()
.map(Token::FloatLiteral)
.map_err(|_| self.error(format!("Invalid float: {}", text)))
} else {
text.parse::<i64>()
.map(Token::IntegerLiteral)
.map_err(|_| self.error(format!("Invalid integer: {}", text)))
}
}
fn read_identifier_or_keyword(&mut self) -> Result<Token, LexerError> {
let start_pos = self.position;
while let Some(&(_, ch)) = self.chars.peek() {
if ch.is_alphanumeric() || ch == '_' {
self.advance();
} else {
break;
}
}
let end_pos = self
.chars
.peek()
.map(|(pos, _)| *pos)
.unwrap_or(self.input.len());
let text = &self.input[start_pos..end_pos];
let token = match text.to_uppercase().as_str() {
"MATCH" => Token::Match,
"WHERE" => Token::Where,
"RETURN" => Token::Return,
"ORDER" => Token::Order,
"BY" => Token::By,
"LIMIT" => Token::Limit,
"SKIP" => Token::Skip,
"DISTINCT" => Token::Distinct,
"COUNT" => Token::Count,
"ASC" => Token::Asc,
"DESC" => Token::Desc,
"AND" => Token::And,
"OR" => Token::Or,
"NOT" => Token::Not,
"IN" => Token::In,
"IS" => Token::Is,
"NULL" => Token::Null,
"TRUE" => Token::True,
"FALSE" => Token::False,
"SIMILAR" => Token::Similar,
"TO" => Token::To,
"USING" => Token::Using,
"FIND" => Token::Find,
"RANK" => Token::Rank,
"SIMILARITY" => Token::Similarity,
"TOP" => Token::Top,
"AS" => Token::As,
"OF" => Token::Of,
"BETWEEN" => Token::Between,
"EXISTS" => Token::Exists,
"CONTAINS" => Token::Contains,
"STARTS" => Token::Starts,
"ENDS" => Token::Ends,
"WITH" => Token::With,
"COSINE" => Token::Cosine,
"EUCLIDEAN" => Token::Euclidean,
"DOT_PRODUCT" => Token::DotProduct,
_ => Token::Identifier(text.to_string()),
};
Ok(token)
}
fn error(&self, message: String) -> LexerError {
LexerError {
message,
position: self.position,
line: self.line,
column: self.column,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_input() {
let tokens = Lexer::tokenize("").unwrap();
assert_eq!(tokens, vec![Token::Eof]);
}
#[test]
fn test_whitespace_only() {
let tokens = Lexer::tokenize(" \n\t ").unwrap();
assert_eq!(tokens, vec![Token::Eof]);
}
#[test]
fn test_graph_keywords() {
let tokens = Lexer::tokenize("MATCH WHERE RETURN ORDER BY LIMIT SKIP").unwrap();
assert_eq!(
tokens,
vec![
Token::Match,
Token::Where,
Token::Return,
Token::Order,
Token::By,
Token::Limit,
Token::Skip,
Token::Eof
]
);
}
#[test]
fn test_keywords_case_insensitive() {
let tokens = Lexer::tokenize("match Match MATCH").unwrap();
assert_eq!(
tokens,
vec![Token::Match, Token::Match, Token::Match, Token::Eof]
);
}
#[test]
fn test_vector_keywords() {
let tokens = Lexer::tokenize("SIMILAR TO USING FIND RANK SIMILARITY TOP").unwrap();
assert_eq!(
tokens,
vec![
Token::Similar,
Token::To,
Token::Using,
Token::Find,
Token::Rank,
Token::Similarity,
Token::Top,
Token::Eof
]
);
}
#[test]
fn test_temporal_keywords() {
let tokens = Lexer::tokenize("AS OF BETWEEN").unwrap();
assert_eq!(
tokens,
vec![Token::As, Token::Of, Token::Between, Token::Eof]
);
}
#[test]
fn test_logical_keywords() {
let tokens = Lexer::tokenize("AND OR NOT IN IS NULL TRUE FALSE").unwrap();
assert_eq!(
tokens,
vec![
Token::And,
Token::Or,
Token::Not,
Token::In,
Token::Is,
Token::Null,
Token::True,
Token::False,
Token::Eof
]
);
}
#[test]
fn test_string_predicate_keywords() {
let tokens = Lexer::tokenize("EXISTS CONTAINS STARTS ENDS WITH").unwrap();
assert_eq!(
tokens,
vec![
Token::Exists,
Token::Contains,
Token::Starts,
Token::Ends,
Token::With,
Token::Eof
]
);
}
#[test]
fn test_metric_keywords() {
let tokens = Lexer::tokenize("COSINE EUCLIDEAN DOT_PRODUCT").unwrap();
assert_eq!(
tokens,
vec![
Token::Cosine,
Token::Euclidean,
Token::DotProduct,
Token::Eof
]
);
}
#[test]
fn test_result_keywords() {
let tokens = Lexer::tokenize("DISTINCT COUNT ASC DESC").unwrap();
assert_eq!(
tokens,
vec![
Token::Distinct,
Token::Count,
Token::Asc,
Token::Desc,
Token::Eof
]
);
}
#[test]
fn test_punctuation() {
let tokens = Lexer::tokenize("()[]{}:,.*").unwrap();
assert_eq!(
tokens,
vec![
Token::LeftParen,
Token::RightParen,
Token::LeftBracket,
Token::RightBracket,
Token::LeftBrace,
Token::RightBrace,
Token::Colon,
Token::Comma,
Token::Dot,
Token::Star,
Token::Eof
]
);
}
#[test]
fn test_arrows() {
let tokens = Lexer::tokenize("-> <- -").unwrap();
assert_eq!(
tokens,
vec![Token::Arrow, Token::LeftArrow, Token::Dash, Token::Eof]
);
}
#[test]
fn test_comparison_operators() {
let tokens = Lexer::tokenize("= <> != < <= > >=").unwrap();
assert_eq!(
tokens,
vec![
Token::Eq,
Token::Ne,
Token::Ne,
Token::Lt,
Token::Le,
Token::Gt,
Token::Ge,
Token::Eof
]
);
}
#[test]
fn test_identifiers() {
let tokens = Lexer::tokenize("foo bar_baz _underscore camelCase").unwrap();
assert_eq!(
tokens,
vec![
Token::Identifier("foo".to_string()),
Token::Identifier("bar_baz".to_string()),
Token::Identifier("_underscore".to_string()),
Token::Identifier("camelCase".to_string()),
Token::Eof
]
);
}
#[test]
fn test_identifier_with_numbers() {
let tokens = Lexer::tokenize("node1 var2name item_3").unwrap();
assert_eq!(
tokens,
vec![
Token::Identifier("node1".to_string()),
Token::Identifier("var2name".to_string()),
Token::Identifier("item_3".to_string()),
Token::Eof
]
);
}
#[test]
fn test_single_quoted_string() {
let tokens = Lexer::tokenize("'hello world'").unwrap();
assert_eq!(
tokens,
vec![Token::StringLiteral("hello world".to_string()), Token::Eof]
);
}
#[test]
fn test_double_quoted_string() {
let tokens = Lexer::tokenize("\"hello world\"").unwrap();
assert_eq!(
tokens,
vec![Token::StringLiteral("hello world".to_string()), Token::Eof]
);
}
#[test]
fn test_string_with_escape_sequences() {
let tokens = Lexer::tokenize("'hello\\nworld\\ttab'").unwrap();
assert_eq!(
tokens,
vec![
Token::StringLiteral("hello\nworld\ttab".to_string()),
Token::Eof
]
);
}
#[test]
fn test_string_with_escaped_quote() {
let tokens = Lexer::tokenize("'it''s escaped'").unwrap();
assert_eq!(
tokens,
vec![Token::StringLiteral("it's escaped".to_string()), Token::Eof]
);
}
#[test]
fn test_empty_string() {
let tokens = Lexer::tokenize("''").unwrap();
assert_eq!(
tokens,
vec![Token::StringLiteral("".to_string()), Token::Eof]
);
}
#[test]
fn test_integer_literals() {
let tokens = Lexer::tokenize("0 42 12345").unwrap();
assert_eq!(
tokens,
vec![
Token::IntegerLiteral(0),
Token::IntegerLiteral(42),
Token::IntegerLiteral(12345),
Token::Eof
]
);
}
#[test]
fn test_negative_integer() {
let tokens = Lexer::tokenize("-42").unwrap();
assert_eq!(tokens, vec![Token::IntegerLiteral(-42), Token::Eof]);
}
#[test]
fn test_float_literals() {
let tokens = Lexer::tokenize("2.71 0.5 10.0").unwrap();
assert_eq!(
tokens,
vec![
Token::FloatLiteral(2.71),
Token::FloatLiteral(0.5),
Token::FloatLiteral(10.0),
Token::Eof
]
);
}
#[test]
fn test_negative_float() {
let tokens = Lexer::tokenize("-2.71").unwrap();
assert_eq!(tokens, vec![Token::FloatLiteral(-2.71), Token::Eof]);
}
#[test]
fn test_scientific_notation() {
let tokens = Lexer::tokenize("1e10 2.5E-3 1e+5").unwrap();
assert_eq!(
tokens,
vec![
Token::FloatLiteral(1e10),
Token::FloatLiteral(2.5e-3),
Token::FloatLiteral(1e5),
Token::Eof
]
);
}
#[test]
fn test_parameters() {
let tokens = Lexer::tokenize("$embedding $user_id $1").unwrap();
assert_eq!(
tokens,
vec![
Token::Parameter("embedding".to_string()),
Token::Parameter("user_id".to_string()),
Token::Parameter("1".to_string()),
Token::Eof
]
);
}
#[test]
fn test_line_comment_double_dash() {
let tokens = Lexer::tokenize("MATCH -- this is a comment\nWHERE").unwrap();
assert_eq!(tokens, vec![Token::Match, Token::Where, Token::Eof]);
}
#[test]
fn test_line_comment_double_slash() {
let tokens = Lexer::tokenize("MATCH // this is a comment\nWHERE").unwrap();
assert_eq!(tokens, vec![Token::Match, Token::Where, Token::Eof]);
}
#[test]
fn test_block_comment() {
let tokens = Lexer::tokenize("MATCH /* multi\nline\ncomment */ WHERE").unwrap();
assert_eq!(tokens, vec![Token::Match, Token::Where, Token::Eof]);
}
#[test]
fn test_simple_match_query() {
let tokens = Lexer::tokenize("MATCH (n:Person) RETURN n").unwrap();
assert_eq!(
tokens,
vec![
Token::Match,
Token::LeftParen,
Token::Identifier("n".to_string()),
Token::Colon,
Token::Identifier("Person".to_string()),
Token::RightParen,
Token::Return,
Token::Identifier("n".to_string()),
Token::Eof
]
);
}
#[test]
fn test_match_with_properties() {
let tokens = Lexer::tokenize("MATCH (n:Person {name: 'Alice'})").unwrap();
assert_eq!(
tokens,
vec![
Token::Match,
Token::LeftParen,
Token::Identifier("n".to_string()),
Token::Colon,
Token::Identifier("Person".to_string()),
Token::LeftBrace,
Token::Identifier("name".to_string()),
Token::Colon,
Token::StringLiteral("Alice".to_string()),
Token::RightBrace,
Token::RightParen,
Token::Eof
]
);
}
#[test]
fn test_match_with_relationship() {
let tokens = Lexer::tokenize("MATCH (a)-[:KNOWS]->(b)").unwrap();
assert_eq!(
tokens,
vec![
Token::Match,
Token::LeftParen,
Token::Identifier("a".to_string()),
Token::RightParen,
Token::Dash,
Token::LeftBracket,
Token::Colon,
Token::Identifier("KNOWS".to_string()),
Token::RightBracket,
Token::Arrow,
Token::LeftParen,
Token::Identifier("b".to_string()),
Token::RightParen,
Token::Eof
]
);
}
#[test]
fn test_vector_search_query() {
let tokens = Lexer::tokenize("SIMILAR TO $embedding USING COSINE LIMIT 10").unwrap();
assert_eq!(
tokens,
vec![
Token::Similar,
Token::To,
Token::Parameter("embedding".to_string()),
Token::Using,
Token::Cosine,
Token::Limit,
Token::IntegerLiteral(10),
Token::Eof
]
);
}
#[test]
fn test_temporal_query() {
let tokens = Lexer::tokenize("AS OF '2024-01-15' MATCH (n)").unwrap();
assert_eq!(
tokens,
vec![
Token::As,
Token::Of,
Token::StringLiteral("2024-01-15".to_string()),
Token::Match,
Token::LeftParen,
Token::Identifier("n".to_string()),
Token::RightParen,
Token::Eof
]
);
}
#[test]
fn test_where_clause() {
let tokens = Lexer::tokenize("WHERE n.age > 18 AND n.name = 'Alice'").unwrap();
assert_eq!(
tokens,
vec![
Token::Where,
Token::Identifier("n".to_string()),
Token::Dot,
Token::Identifier("age".to_string()),
Token::Gt,
Token::IntegerLiteral(18),
Token::And,
Token::Identifier("n".to_string()),
Token::Dot,
Token::Identifier("name".to_string()),
Token::Eq,
Token::StringLiteral("Alice".to_string()),
Token::Eof
]
);
}
#[test]
fn test_hybrid_query() {
let tokens = Lexer::tokenize(
"AS OF '2024-01-01' MATCH (a:Person)-[:KNOWS]->(b) RANK BY SIMILARITY TO $embedding TOP 10",
)
.unwrap();
assert_eq!(
tokens,
vec![
Token::As,
Token::Of,
Token::StringLiteral("2024-01-01".to_string()),
Token::Match,
Token::LeftParen,
Token::Identifier("a".to_string()),
Token::Colon,
Token::Identifier("Person".to_string()),
Token::RightParen,
Token::Dash,
Token::LeftBracket,
Token::Colon,
Token::Identifier("KNOWS".to_string()),
Token::RightBracket,
Token::Arrow,
Token::LeftParen,
Token::Identifier("b".to_string()),
Token::RightParen,
Token::Rank,
Token::By,
Token::Similarity,
Token::To,
Token::Parameter("embedding".to_string()),
Token::Top,
Token::IntegerLiteral(10),
Token::Eof
]
);
}
#[test]
fn test_order_by_clause() {
let tokens = Lexer::tokenize("ORDER BY n.age DESC, n.name ASC").unwrap();
assert_eq!(
tokens,
vec![
Token::Order,
Token::By,
Token::Identifier("n".to_string()),
Token::Dot,
Token::Identifier("age".to_string()),
Token::Desc,
Token::Comma,
Token::Identifier("n".to_string()),
Token::Dot,
Token::Identifier("name".to_string()),
Token::Asc,
Token::Eof
]
);
}
#[test]
fn test_variable_length_path() {
let tokens = Lexer::tokenize("MATCH (a)-[:KNOWS*1..3]->(b)").unwrap();
assert_eq!(
tokens,
vec![
Token::Match,
Token::LeftParen,
Token::Identifier("a".to_string()),
Token::RightParen,
Token::Dash,
Token::LeftBracket,
Token::Colon,
Token::Identifier("KNOWS".to_string()),
Token::Star,
Token::IntegerLiteral(1),
Token::Dot,
Token::Dot,
Token::IntegerLiteral(3),
Token::RightBracket,
Token::Arrow,
Token::LeftParen,
Token::Identifier("b".to_string()),
Token::RightParen,
Token::Eof
]
);
}
#[test]
fn test_unterminated_string() {
let result = Lexer::tokenize("'unterminated");
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.message.contains("Unterminated string"));
}
#[test]
fn test_unexpected_character() {
let result = Lexer::tokenize("MATCH @invalid");
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.message.contains("Unexpected character"));
}
#[test]
fn test_invalid_not_equal() {
let result = Lexer::tokenize("!");
assert!(result.is_err());
}
#[test]
fn test_empty_parameter() {
let result = Lexer::tokenize("$");
assert!(result.is_err());
let err = result.unwrap_err();
assert!(err.message.contains("parameter name"));
}
#[test]
fn test_token_display() {
assert_eq!(format!("{}", Token::Match), "MATCH");
assert_eq!(format!("{}", Token::Where), "WHERE");
assert_eq!(format!("{}", Token::Return), "RETURN");
assert_eq!(format!("{}", Token::Order), "ORDER");
assert_eq!(format!("{}", Token::By), "BY");
assert_eq!(format!("{}", Token::Limit), "LIMIT");
assert_eq!(format!("{}", Token::Skip), "SKIP");
assert_eq!(format!("{}", Token::Distinct), "DISTINCT");
assert_eq!(format!("{}", Token::Count), "COUNT");
assert_eq!(format!("{}", Token::Asc), "ASC");
assert_eq!(format!("{}", Token::Desc), "DESC");
assert_eq!(format!("{}", Token::And), "AND");
assert_eq!(format!("{}", Token::Or), "OR");
assert_eq!(format!("{}", Token::Not), "NOT");
assert_eq!(format!("{}", Token::In), "IN");
assert_eq!(format!("{}", Token::Is), "IS");
assert_eq!(format!("{}", Token::Null), "NULL");
assert_eq!(format!("{}", Token::True), "TRUE");
assert_eq!(format!("{}", Token::False), "FALSE");
assert_eq!(format!("{}", Token::Similar), "SIMILAR");
assert_eq!(format!("{}", Token::To), "TO");
assert_eq!(format!("{}", Token::Using), "USING");
assert_eq!(format!("{}", Token::Find), "FIND");
assert_eq!(format!("{}", Token::Rank), "RANK");
assert_eq!(format!("{}", Token::Similarity), "SIMILARITY");
assert_eq!(format!("{}", Token::Top), "TOP");
assert_eq!(format!("{}", Token::As), "AS");
assert_eq!(format!("{}", Token::Of), "OF");
assert_eq!(format!("{}", Token::Between), "BETWEEN");
assert_eq!(format!("{}", Token::Exists), "EXISTS");
assert_eq!(format!("{}", Token::Contains), "CONTAINS");
assert_eq!(format!("{}", Token::Starts), "STARTS");
assert_eq!(format!("{}", Token::Ends), "ENDS");
assert_eq!(format!("{}", Token::With), "WITH");
assert_eq!(format!("{}", Token::Cosine), "COSINE");
assert_eq!(format!("{}", Token::Euclidean), "EUCLIDEAN");
assert_eq!(format!("{}", Token::DotProduct), "DOT_PRODUCT");
assert_eq!(format!("{}", Token::LeftParen), "(");
assert_eq!(format!("{}", Token::RightParen), ")");
assert_eq!(format!("{}", Token::LeftBracket), "[");
assert_eq!(format!("{}", Token::RightBracket), "]");
assert_eq!(format!("{}", Token::LeftBrace), "{");
assert_eq!(format!("{}", Token::RightBrace), "}");
assert_eq!(format!("{}", Token::Colon), ":");
assert_eq!(format!("{}", Token::Comma), ",");
assert_eq!(format!("{}", Token::Dot), ".");
assert_eq!(format!("{}", Token::Star), "*");
assert_eq!(format!("{}", Token::Dash), "-");
assert_eq!(format!("{}", Token::Arrow), "->");
assert_eq!(format!("{}", Token::LeftArrow), "<-");
assert_eq!(format!("{}", Token::Eq), "=");
assert_eq!(format!("{}", Token::Ne), "<>");
assert_eq!(format!("{}", Token::Lt), "<");
assert_eq!(format!("{}", Token::Le), "<=");
assert_eq!(format!("{}", Token::Gt), ">");
assert_eq!(format!("{}", Token::Ge), ">=");
assert_eq!(format!("{}", Token::Identifier("foo".to_string())), "foo");
assert_eq!(
format!("{}", Token::StringLiteral("bar".to_string())),
"'bar'"
);
assert_eq!(format!("{}", Token::IntegerLiteral(42)), "42");
assert_eq!(format!("{}", Token::FloatLiteral(2.71)), "2.71");
assert_eq!(format!("{}", Token::Parameter("p".to_string())), "$p");
assert_eq!(format!("{}", Token::Eof), "EOF");
}
#[test]
fn test_token_derive() {
let tokens = vec![
Token::Match,
Token::Where,
Token::Return,
Token::Order,
Token::By,
Token::Limit,
Token::Skip,
Token::Distinct,
Token::Count,
Token::Asc,
Token::Desc,
Token::And,
Token::Or,
Token::Not,
Token::In,
Token::Is,
Token::Null,
Token::True,
Token::False,
Token::Similar,
Token::To,
Token::Using,
Token::Find,
Token::Rank,
Token::Similarity,
Token::Top,
Token::As,
Token::Of,
Token::Between,
Token::Exists,
Token::Contains,
Token::Starts,
Token::Ends,
Token::With,
Token::Cosine,
Token::Euclidean,
Token::DotProduct,
Token::LeftParen,
Token::RightParen,
Token::LeftBracket,
Token::RightBracket,
Token::LeftBrace,
Token::RightBrace,
Token::Colon,
Token::Comma,
Token::Dot,
Token::Star,
Token::Dash,
Token::Arrow,
Token::LeftArrow,
Token::Eq,
Token::Ne,
Token::Lt,
Token::Le,
Token::Gt,
Token::Ge,
Token::Identifier("id".to_string()),
Token::StringLiteral("s".to_string()),
Token::IntegerLiteral(1),
Token::FloatLiteral(1.0),
Token::Parameter("p".to_string()),
Token::Eof,
];
for token in tokens {
let cloned = token.clone();
assert_eq!(token, cloned);
let _ = format!("{:?}", token);
}
}
}