use super::ast::{GenerateLiteral, GenerateQuery, TemplateClause};
use super::GenerateError;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Keyword(String),
LBrace,
RBrace,
QuestionMark,
Ident(String),
StringLit(String),
Eof,
}
pub fn tokenize(input: &str) -> Vec<Token> {
let chars: Vec<char> = input.chars().collect();
let mut pos = 0usize;
let mut tokens = Vec::new();
while pos < chars.len() {
if chars[pos].is_whitespace() {
pos += 1;
continue;
}
if chars[pos] == '#' {
while pos < chars.len() && chars[pos] != '\n' {
pos += 1;
}
continue;
}
if chars[pos] == '{' {
tokens.push(Token::LBrace);
pos += 1;
continue;
}
if chars[pos] == '}' {
tokens.push(Token::RBrace);
pos += 1;
continue;
}
if chars[pos] == '?' {
tokens.push(Token::QuestionMark);
pos += 1;
continue;
}
if chars[pos] == ':' || chars[pos] == '<' || chars[pos] == '>' {
pos += 1;
continue;
}
if pos + 2 < chars.len()
&& ((chars[pos] == '"' && chars[pos + 1] == '"' && chars[pos + 2] == '"')
|| (chars[pos] == '\'' && chars[pos + 1] == '\'' && chars[pos + 2] == '\''))
{
let delim = chars[pos];
pos += 3; let start = pos;
while pos + 2 < chars.len()
&& !(chars[pos] == delim && chars[pos + 1] == delim && chars[pos + 2] == delim)
{
pos += 1;
}
let s: String = chars[start..pos].iter().collect();
pos += 3; tokens.push(Token::StringLit(s));
continue;
}
if chars[pos] == '"' {
pos += 1; let start = pos;
while pos < chars.len() && chars[pos] != '"' {
if chars[pos] == '\\' && pos + 1 < chars.len() {
pos += 1; }
pos += 1;
}
let s: String = chars[start..pos].iter().collect();
pos += 1; tokens.push(Token::StringLit(s));
continue;
}
if chars[pos] == '\'' {
pos += 1; let start = pos;
while pos < chars.len() && chars[pos] != '\'' {
if chars[pos] == '\\' && pos + 1 < chars.len() {
pos += 1;
}
pos += 1;
}
let s: String = chars[start..pos].iter().collect();
pos += 1; tokens.push(Token::StringLit(s));
continue;
}
if chars[pos].is_alphabetic() || chars[pos] == '_' {
let start = pos;
while pos < chars.len()
&& (chars[pos].is_alphanumeric() || chars[pos] == '_' || chars[pos] == '-')
{
pos += 1;
}
let word: String = chars[start..pos].iter().collect();
let upper = word.to_uppercase();
let tok = match upper.as_str() {
"GENERATE" | "WHERE" | "PREFIX" | "ITERATOR" => Token::Keyword(upper),
_ => Token::Ident(word),
};
tokens.push(tok);
continue;
}
pos += 1;
}
tokens.push(Token::Eof);
tokens
}
struct Cursor<'a> {
tokens: &'a [Token],
pos: usize,
}
impl<'a> Cursor<'a> {
fn new(tokens: &'a [Token]) -> Self {
Self { tokens, pos: 0 }
}
fn peek(&self) -> &Token {
self.tokens.get(self.pos).unwrap_or(&Token::Eof)
}
fn next(&mut self) -> &Token {
let tok = self.tokens.get(self.pos).unwrap_or(&Token::Eof);
if self.pos < self.tokens.len() {
self.pos += 1;
}
tok
}
fn expect_keyword(&mut self, kw: &str) -> Result<(), GenerateError> {
let pos = self.pos;
let tok = self.next().clone();
match tok {
Token::Keyword(ref k) if k.eq_ignore_ascii_case(kw) => Ok(()),
other => Err(GenerateError::ParseError {
pos,
msg: format!("expected keyword {kw}, got {other:?}"),
}),
}
}
fn expect_lbrace(&mut self) -> Result<(), GenerateError> {
let pos = self.pos;
let tok = self.next().clone();
match tok {
Token::LBrace => Ok(()),
other => Err(GenerateError::ParseError {
pos,
msg: format!("expected '{{', got {other:?}"),
}),
}
}
}
pub fn parse(input: &str) -> Result<GenerateQuery, GenerateError> {
let tokens = tokenize(input);
let mut cursor = Cursor::new(&tokens);
let mut prefix_decls: Vec<(String, String)> = Vec::new();
loop {
let is_prefix = matches!(cursor.peek(), Token::Keyword(kw) if kw == "PREFIX");
if !is_prefix {
break;
}
cursor.next();
let pos_before_label = cursor.pos;
let label_tok = cursor.next().clone();
let label = match label_tok {
Token::Ident(s) => s,
other => {
return Err(GenerateError::ParseError {
pos: pos_before_label,
msg: format!("expected prefix label after PREFIX, got {other:?}"),
})
}
};
let pos_before_iri = cursor.pos;
let iri_tok = cursor.next().clone();
let iri = match iri_tok {
Token::StringLit(s) | Token::Ident(s) => s,
other => {
return Err(GenerateError::ParseError {
pos: pos_before_iri,
msg: format!("expected IRI after prefix label, got {other:?}"),
})
}
};
prefix_decls.push((label, iri));
}
cursor.expect_keyword("GENERATE")?;
cursor.expect_lbrace()?;
let block_start = cursor.pos;
let mut depth = 1usize;
while depth > 0 {
match cursor.peek() {
Token::LBrace => {
depth += 1;
cursor.next();
}
Token::RBrace => {
depth -= 1;
if depth > 0 {
cursor.next();
}
}
Token::Eof => {
return Err(GenerateError::ParseError {
pos: cursor.pos,
msg: "unexpected EOF inside GENERATE block".to_string(),
})
}
_ => {
cursor.next();
}
}
}
let block_end = cursor.pos; let block_tokens = &tokens[block_start..block_end];
let pos_before_rbrace = cursor.pos;
let rbrace_tok = cursor.next().clone();
match rbrace_tok {
Token::RBrace => {}
other => {
return Err(GenerateError::ParseError {
pos: pos_before_rbrace,
msg: format!("expected '}}' to close GENERATE block, got {other:?}"),
})
}
}
let template = parse_template_block(block_tokens)?;
let is_iterator = matches!(cursor.peek(), Token::Keyword(kw) if kw == "ITERATOR");
let iterator = if is_iterator {
cursor.next(); let iter_tok = cursor.next().clone();
match iter_tok {
Token::StringLit(s) | Token::Ident(s) => Some(s),
_ => None,
}
} else {
None
};
let where_body = parse_where_body(input)?;
Ok(GenerateQuery {
prefix_decls,
template,
where_body,
iterator,
})
}
fn parse_template_block(tokens: &[Token]) -> Result<Vec<TemplateClause>, GenerateError> {
let mut clauses = Vec::new();
let mut pos = 0usize;
while pos < tokens.len() {
match &tokens[pos] {
Token::StringLit(prefix_text) => {
let prefix_text = prefix_text.clone();
pos += 1;
if pos < tokens.len() {
if let Token::QuestionMark = &tokens[pos] {
pos += 1; let var_name = match tokens.get(pos) {
Some(Token::Ident(v)) => {
pos += 1;
v.clone()
}
other => {
return Err(GenerateError::ParseError {
pos,
msg: format!("expected identifier after '?', got {other:?}"),
})
}
};
let suffix = if pos < tokens.len() {
if let Token::StringLit(s) = &tokens[pos] {
let s = s.clone();
pos += 1;
Some(s)
} else {
None
}
} else {
None
};
clauses.push(TemplateClause {
prefix: Some(prefix_text),
expr: GenerateLiteral::Var(var_name),
suffix,
});
continue;
}
}
clauses.push(TemplateClause {
prefix: None,
expr: GenerateLiteral::Text(prefix_text),
suffix: None,
});
}
Token::QuestionMark => {
pos += 1; let var_name = match tokens.get(pos) {
Some(Token::Ident(v)) => {
pos += 1;
v.clone()
}
other => {
return Err(GenerateError::ParseError {
pos,
msg: format!("expected identifier after '?', got {other:?}"),
})
}
};
let suffix = if pos < tokens.len() {
if let Token::StringLit(s) = &tokens[pos] {
let s = s.clone();
pos += 1;
Some(s)
} else {
None
}
} else {
None
};
clauses.push(TemplateClause {
prefix: None,
expr: GenerateLiteral::Var(var_name),
suffix,
});
}
_ => {
pos += 1;
}
}
}
Ok(clauses)
}
fn parse_where_body(input: &str) -> Result<String, GenerateError> {
let upper = input.to_uppercase();
let where_pos = upper
.find("WHERE")
.ok_or_else(|| GenerateError::ParseError {
pos: 0,
msg: "missing WHERE clause".to_string(),
})?;
let after_where = &input[where_pos + 5..]; let brace_offset = after_where
.find('{')
.ok_or_else(|| GenerateError::ParseError {
pos: where_pos,
msg: "expected '{' after WHERE".to_string(),
})?;
let body_start = where_pos + 5 + brace_offset + 1; let chars: Vec<char> = input[body_start..].chars().collect();
let mut depth = 1usize;
let mut byte_len = 0usize;
for ch in &chars {
if depth == 0 {
break;
}
match ch {
'{' => depth += 1,
'}' => {
depth -= 1;
if depth == 0 {
break;
}
}
_ => {}
}
byte_len += ch.len_utf8();
}
if depth != 0 {
return Err(GenerateError::ParseError {
pos: body_start,
msg: "unclosed '{' in WHERE clause".to_string(),
});
}
Ok(input[body_start..body_start + byte_len].trim().to_string())
}