use super::{
Ast, BitmapPredicate, ProjectionItem, QueryError, Result, VectorPredicate, WhereClause,
};
pub(super) fn parse(query: &str) -> Result<Ast> {
let trimmed = query.trim();
if trimmed.is_empty() {
return Err(QueryError::InvalidSyntax(
"query must not be empty".to_string(),
));
}
let match_pos = require_keyword(trimmed, "MATCH", 0)?;
if match_pos != 0 {
return Err(QueryError::InvalidSyntax(
"query must start with MATCH".to_string(),
));
}
let return_pos = find_keyword(trimmed, "RETURN", 0)
.ok_or_else(|| QueryError::InvalidSyntax("missing RETURN clause".to_string()))?;
let where_pos = find_keyword(trimmed, "WHERE", "MATCH".len());
let with_pos = find_keyword(trimmed, "WITH", "MATCH".len());
let limit_pos = find_keyword(trimmed, "LIMIT", return_pos + "RETURN".len());
if let Some(where_idx) = where_pos {
if where_idx > return_pos {
return Err(QueryError::InvalidSyntax(
"WHERE must appear before RETURN".to_string(),
));
}
}
if let Some(with_idx) = with_pos {
if with_idx > return_pos {
return Err(QueryError::InvalidSyntax(
"WITH must appear before RETURN".to_string(),
));
}
}
let mut clause_end_candidates = vec![return_pos];
if let Some(where_idx) = where_pos {
clause_end_candidates.push(where_idx);
}
if let Some(with_idx) = with_pos {
clause_end_candidates.push(with_idx);
}
let match_slice_end = *clause_end_candidates.iter().min().unwrap_or(&return_pos);
let match_aliases = parse_match_aliases(&trimmed["MATCH".len()..match_slice_end])?;
let match_alias = match_aliases.first().cloned().ok_or_else(|| {
QueryError::InvalidSyntax("MATCH must contain at least one alias".to_string())
})?;
let where_clause = if let Some(where_idx) = where_pos {
let where_end = with_pos.unwrap_or(return_pos);
let where_raw = trimmed[where_idx + "WHERE".len()..where_end].trim();
Some(parse_where_clause(where_raw)?)
} else {
None
};
let where_predicate = where_clause_to_vector(where_clause.as_ref());
let where_bitmap_predicate = where_clause_to_bitmap(where_clause.as_ref());
let with_items = if let Some(with_idx) = with_pos {
let with_end = return_pos;
let with_raw = trimmed[with_idx + "WITH".len()..with_end].trim();
parse_projection_list(with_raw)?
} else {
Vec::new()
};
let return_clause = if let Some(limit_idx) = limit_pos {
&trimmed[return_pos + "RETURN".len()..limit_idx]
} else {
&trimmed[return_pos + "RETURN".len()..]
};
let return_items = parse_projection_list(return_clause)?;
let return_alias = primary_return_alias(&return_items)?;
let limit = if let Some(limit_idx) = limit_pos {
let raw = trimmed[limit_idx + "LIMIT".len()..].trim();
if raw.is_empty() {
return Err(QueryError::InvalidSyntax("LIMIT missing value".to_string()));
}
Some(
raw.parse::<u64>()
.map_err(|_| QueryError::InvalidSyntax("invalid LIMIT value".to_string()))?,
)
} else {
None
};
Ok(Ast {
match_aliases,
match_alias,
where_clause,
where_predicate,
where_bitmap_predicate,
with_items,
return_items,
return_alias,
limit,
})
}
fn parse_match_aliases(input: &str) -> Result<Vec<String>> {
let patterns = split_top_level(input, ',');
if patterns.is_empty() {
return Err(QueryError::InvalidSyntax(
"MATCH must contain at least one pattern".to_string(),
));
}
let mut aliases = Vec::with_capacity(patterns.len());
for pattern in patterns {
let open = pattern
.find('(')
.ok_or_else(|| QueryError::InvalidSyntax("MATCH missing '('".to_string()))?;
let close = pattern[open + 1..]
.find(')')
.ok_or_else(|| QueryError::InvalidSyntax("MATCH missing ')'".to_string()))?
+ open
+ 1;
let inside = pattern[open + 1..close].trim();
let alias = inside
.split([':', ' ', '\t'])
.find(|value| !value.is_empty())
.ok_or_else(|| {
QueryError::InvalidSyntax("MATCH alias must not be empty".to_string())
})?;
aliases.push(alias.to_string());
}
Ok(aliases)
}
fn parse_projection_list(input: &str) -> Result<Vec<ProjectionItem>> {
let parts = split_top_level(input, ',');
if parts.is_empty() {
return Err(QueryError::InvalidSyntax(
"projection list must not be empty".to_string(),
));
}
let mut items = Vec::with_capacity(parts.len());
for part in parts {
items.push(parse_projection_item(&part)?);
}
Ok(items)
}
fn parse_projection_item(input: &str) -> Result<ProjectionItem> {
let trimmed = input.trim();
if trimmed.is_empty() {
return Err(QueryError::InvalidSyntax(
"projection item must not be empty".to_string(),
));
}
let (expr, alias) = split_optional_alias(trimmed);
let open = expr.find('(');
let close = expr.rfind(')');
if let (Some(open), Some(close)) = (open, close) {
if close <= open {
return Err(QueryError::InvalidSyntax(
"function projection has invalid parentheses".to_string(),
));
}
let name = expr[..open].trim();
let argument = expr[open + 1..close].trim();
if name.is_empty() || argument.is_empty() {
return Err(QueryError::InvalidSyntax(
"function projection missing name or argument".to_string(),
));
}
return Ok(ProjectionItem::Function {
name: name.to_string(),
argument: argument.to_string(),
alias,
});
}
if alias.is_some() {
return Err(QueryError::InvalidSyntax(
"AS alias is only valid for function projections".to_string(),
));
}
Ok(ProjectionItem::Identifier(expr.to_string()))
}
fn split_optional_alias(input: &str) -> (&str, Option<String>) {
let uppercase = input.to_ascii_uppercase();
if let Some(idx) = uppercase.rfind(" AS ") {
let expr = input[..idx].trim();
let alias = input[idx + 4..].trim();
return (
expr,
if alias.is_empty() {
None
} else {
Some(alias.to_string())
},
);
}
(input, None)
}
fn parse_where_clause(input: &str) -> Result<WhereClause> {
let trimmed = input.trim();
if trimmed.is_empty() {
return Err(QueryError::InvalidSyntax(
"WHERE predicate must not be empty".to_string(),
));
}
if let Some(open) = trimmed.find('(') {
let close = trimmed
.rfind(')')
.ok_or_else(|| QueryError::InvalidSyntax("WHERE predicate missing ')'".to_string()))?;
if close <= open {
return Err(QueryError::InvalidSyntax(
"WHERE predicate has invalid function arguments".to_string(),
));
}
let function = trimmed[..open].trim();
if function.is_empty() {
return Err(QueryError::InvalidSyntax(
"WHERE predicate missing function".to_string(),
));
}
let args_str = trimmed[open + 1..close].trim();
let args = split_top_level(args_str, ',');
if args.is_empty() {
return Err(QueryError::InvalidSyntax(
"WHERE predicate requires function args".to_string(),
));
}
let tail = trimmed[close + 1..].trim();
let (operator, threshold) = parse_operator_threshold(tail)?;
return Ok(WhereClause::Function {
function: function.to_string(),
args,
operator: operator.to_string(),
threshold: threshold.to_string(),
});
}
let (operator, right, operator_idx) = parse_operator_threshold_with_index(trimmed)?;
let left = trimmed[..operator_idx].trim();
if left.is_empty() {
return Err(QueryError::InvalidSyntax(
"WHERE predicate missing left operand".to_string(),
));
}
Ok(WhereClause::Comparison {
left: left.to_string(),
operator: operator.to_string(),
right: right.to_string(),
})
}
fn where_clause_to_vector(where_clause: Option<&WhereClause>) -> Option<VectorPredicate> {
match where_clause {
Some(WhereClause::Function {
function,
args,
operator,
threshold,
}) if function.starts_with("vector.") && args.len() == 2 => Some(VectorPredicate {
function: function.clone(),
target: args[0].clone(),
param: args[1].clone(),
operator: operator.clone(),
threshold: threshold.clone(),
}),
_ => None,
}
}
fn where_clause_to_bitmap(where_clause: Option<&WhereClause>) -> Option<BitmapPredicate> {
match where_clause {
Some(WhereClause::Function {
function,
args,
operator,
threshold,
}) if function.starts_with("bitmap.") && args.len() == 2 => {
if operator != "=" {
return None;
}
let threshold_lc = threshold.trim().to_ascii_lowercase();
if threshold_lc != "1" && threshold_lc != "true" {
return None;
}
Some(BitmapPredicate {
function: function.clone(),
index_name: strip_optional_quotes(&args[0]),
value_key: strip_optional_quotes(&args[1]),
})
}
_ => None,
}
}
fn strip_optional_quotes(input: &str) -> String {
let trimmed = input.trim();
if trimmed.len() >= 2 && trimmed.starts_with('"') && trimmed.ends_with('"') {
trimmed[1..trimmed.len() - 1].to_string()
} else {
trimmed.to_string()
}
}
fn primary_return_alias(items: &[ProjectionItem]) -> Result<String> {
let first = items.first().ok_or_else(|| {
QueryError::InvalidSyntax("RETURN must contain at least one projection".to_string())
})?;
let alias = match first {
ProjectionItem::Identifier(value) => value.clone(),
ProjectionItem::Function { alias: Some(a), .. } => a.clone(),
ProjectionItem::Function { name, .. } => format!("{}()", name),
};
if alias.trim().is_empty() {
return Err(QueryError::InvalidSyntax(
"RETURN alias must not be empty".to_string(),
));
}
Ok(alias)
}
fn parse_operator_threshold(input: &str) -> Result<(&str, &str)> {
let (operator, right, _) = parse_operator_threshold_with_index(input)?;
Ok((operator, right))
}
fn parse_operator_threshold_with_index(input: &str) -> Result<(&str, &str, usize)> {
for op in [">=", "<=", "=", ">", "<"] {
if let Some(idx) = input.find(op) {
let threshold = input[idx + op.len()..].trim();
if threshold.is_empty() {
return Err(QueryError::InvalidSyntax(
"WHERE missing threshold".to_string(),
));
}
return Ok((op, threshold, idx));
}
}
Err(QueryError::InvalidSyntax(
"WHERE predicate missing operator".to_string(),
))
}
fn split_top_level(input: &str, delim: char) -> Vec<String> {
let mut parts = Vec::new();
let mut depth = 0_usize;
let mut start = 0_usize;
for (idx, ch) in input.char_indices() {
match ch {
'(' => depth = depth.saturating_add(1),
')' => depth = depth.saturating_sub(1),
_ => {}
}
if ch == delim && depth == 0 {
let part = input[start..idx].trim();
if !part.is_empty() {
parts.push(part.to_string());
}
start = idx + ch.len_utf8();
}
}
let tail = input[start..].trim();
if !tail.is_empty() {
parts.push(tail.to_string());
}
parts
}
fn find_keyword(input: &str, keyword: &str, start: usize) -> Option<usize> {
let upper_input = input.to_ascii_uppercase();
let upper_keyword = keyword.to_ascii_uppercase();
let mut offset = start;
while offset <= upper_input.len() {
let search = &upper_input[offset..];
let found = search.find(&upper_keyword)?;
let idx = offset + found;
let before_ok = idx == 0
|| !upper_input[..idx]
.chars()
.next_back()
.is_some_and(|c| c.is_ascii_alphanumeric() || c == '_');
let after_idx = idx + upper_keyword.len();
let after_ok = after_idx >= upper_input.len()
|| !upper_input[after_idx..]
.chars()
.next()
.is_some_and(|c| c.is_ascii_alphanumeric() || c == '_');
if before_ok && after_ok {
return Some(idx);
}
offset = idx + 1;
}
None
}
fn require_keyword(input: &str, keyword: &str, start: usize) -> Result<usize> {
find_keyword(input, keyword, start)
.ok_or_else(|| QueryError::InvalidSyntax(format!("missing {} clause", keyword)))
}