use crate::error::{Error, Result};
use crate::parser::ast::{
Block, ConfigBlock, CustomBlock, DataBlock, EmbeddingAnnotation, EndpointBlock, GoldFile,
Parameter, QueryBlock, SchemaBlock, SchemaField, Value,
};
use crate::parser::lexer::{LocatedToken, Token, WinnowLexer};
use std::path::{Path, PathBuf};
pub struct GoldParser;
impl GoldParser {
pub fn parse_file(path: &Path) -> Result<GoldFile> {
let content = std::fs::read_to_string(path).map_err(Error::Io)?;
Self::parse_str(&content, path)
}
pub fn parse_str(content: &str, path: &Path) -> Result<GoldFile> {
let lexer = WinnowLexer::new(path);
let tokens = lexer.tokenize(content)?;
let mut parser = Parser::new(tokens, path);
parser.parse()
}
}
struct Parser {
tokens: Vec<LocatedToken>,
position: usize,
path: PathBuf,
}
impl Parser {
fn new(tokens: Vec<LocatedToken>, path: &Path) -> Self {
Self {
tokens,
position: 0,
path: path.to_path_buf(),
}
}
fn parse(&mut self) -> Result<GoldFile> {
let mut file = GoldFile::new();
while !self.is_at_end() {
if self.skip_trivia() {
continue;
}
if self.is_at_end() {
break;
}
if !self.check(&Token::At) {
return self.error("Expected '@' directive");
}
let block = self.parse_block()?;
file.add_block(block);
}
Ok(file)
}
fn parse_block(&mut self) -> Result<Block> {
self.consume(&Token::At, "Expected '@'")?;
let directive = self.expect_identifier("Expected directive name")?;
match directive.as_str() {
"crud" => {
self.skip_trivia();
self.consume(&Token::At, "Expected '@schema' after '@crud'")?;
let next_directive = self.expect_identifier("Expected 'schema' after '@crud'")?;
if next_directive != "schema" {
return self.error("@crud annotation can only be used with @schema");
}
self.parse_schema_block_with_crud(true)
}
"schema" => self.parse_schema_block_with_crud(false),
"query" => self.parse_query_block(),
"config" => self.parse_config_block(),
"endpoint" => self.parse_endpoint_block(),
"data" => self.parse_data_block(),
_ => self.parse_custom_block(directive),
}
}
fn parse_schema_block_with_crud(&mut self, crud: bool) -> Result<Block> {
let name = self.expect_identifier("Expected schema name")?;
self.consume(&Token::LBrace, "Expected '{' after schema name")?;
let mut fields = Vec::new();
let mut format = None;
let mut content = None;
while !self.check(&Token::RBrace) && !self.is_at_end() {
self.skip_trivia();
if self.check(&Token::RBrace) {
break;
}
if self.match_identifier("format") {
self.consume(&Token::Equals, "Expected '=' after 'format'")?;
format = Some(self.expect_string("Expected format string")?);
continue;
}
if self.check(&Token::RawContent(String::new())) {
content = Some(self.parse_delimited_content()?);
continue;
}
let embedding_annotation = if self.check(&Token::At) {
self.advance(); if self.match_identifier("embedding") {
let annotation = Some(self.parse_embedding_annotation()?);
self.skip_trivia();
annotation
} else {
self.skip_until_newline();
None
}
} else {
None
};
let field_name = self.expect_identifier("Expected field name")?;
self.consume(&Token::Colon, "Expected ':' after field name")?;
let field_type = self.expect_identifier("Expected field type")?;
fields.push(SchemaField {
name: field_name,
field_type,
nullable: false,
default: None,
embedding_annotation,
});
}
self.consume(&Token::RBrace, "Expected '}' to close schema block")?;
Ok(Block::Schema(SchemaBlock {
name,
format,
fields,
content,
crud,
}))
}
fn parse_query_block(&mut self) -> Result<Block> {
let name = self.expect_identifier("Expected query name")?;
let params = self.parse_parameters()?;
let return_type = if self.check(&Token::Arrow) {
self.advance();
self.expect_type("Expected return type after '->'")?
} else {
String::new()
};
self.consume(&Token::LBrace, "Expected '{' after query signature")?;
let mut language = "hyperql".to_string();
let mut source = String::new();
while !self.check(&Token::RBrace) && !self.is_at_end() {
self.skip_trivia();
if self.check(&Token::RBrace) {
break;
}
if self.match_identifier("language") {
self.consume(&Token::Equals, "Expected '=' after 'language'")?;
language = self.expect_string("Expected language string")?;
continue;
}
if self.check(&Token::RawContent(String::new())) {
source = self.parse_delimited_content()?;
continue;
}
self.advance();
}
self.consume(&Token::RBrace, "Expected '}' to close query block")?;
Ok(Block::Query(QueryBlock {
name,
params,
return_type,
language,
source,
}))
}
fn parse_config_block(&mut self) -> Result<Block> {
let name = self.expect_identifier("Expected config name")?;
self.consume(&Token::LBrace, "Expected '{' after config name")?;
let mut attributes = std::collections::HashMap::new();
while !self.check(&Token::RBrace) && !self.is_at_end() {
self.skip_trivia();
if self.check(&Token::RBrace) {
break;
}
let attr_name = self.expect_identifier("Expected attribute name")?;
self.consume(&Token::Equals, "Expected '=' after attribute name")?;
let value = self.parse_value()?;
attributes.insert(attr_name, value);
}
self.consume(&Token::RBrace, "Expected '}' to close config block")?;
Ok(Block::Config(ConfigBlock { name, attributes }))
}
fn parse_endpoint_block(&mut self) -> Result<Block> {
let method = self.expect_identifier("Expected HTTP method")?;
let path = self.expect_string("Expected endpoint path")?;
self.consume(&Token::LBrace, "Expected '{' after endpoint path")?;
let mut query = String::new();
let mut auth = false;
let mut params = std::collections::HashMap::new();
while !self.check(&Token::RBrace) && !self.is_at_end() {
self.skip_trivia();
if self.check(&Token::RBrace) {
break;
}
let attr_name = self.expect_identifier("Expected attribute name")?;
self.consume(&Token::Equals, "Expected '=' after attribute name")?;
match attr_name.as_str() {
"query" => {
query = self.expect_identifier("Expected query name")?;
}
"auth" => {
auth = self.expect_boolean("Expected boolean for auth")?;
}
_ => {
let value = self.parse_value()?;
if let Some(s) = value.as_string() {
params.insert(attr_name, s.to_string());
}
}
}
}
self.consume(&Token::RBrace, "Expected '}' to close endpoint block")?;
Ok(Block::Endpoint(EndpointBlock {
method,
path,
query,
auth,
params,
}))
}
fn parse_data_block(&mut self) -> Result<Block> {
let name = self.expect_identifier("Expected data block name")?;
self.consume(&Token::LBrace, "Expected '{' after data block name")?;
let mut format = "json".to_string();
let mut content = String::new();
while !self.check(&Token::RBrace) && !self.is_at_end() {
self.skip_trivia();
if self.check(&Token::RBrace) {
break;
}
if self.match_identifier("format") {
self.consume(&Token::Equals, "Expected '=' after 'format'")?;
format = self.expect_string("Expected format string")?;
continue;
}
if self.check(&Token::RawContent(String::new())) {
content = self.parse_delimited_content()?;
continue;
}
self.advance();
}
self.consume(&Token::RBrace, "Expected '}' to close data block")?;
Ok(Block::Data(DataBlock {
name,
format,
content,
}))
}
fn parse_custom_block(&mut self, block_type: String) -> Result<Block> {
let name = if self.check_identifier() {
Some(self.expect_identifier("Expected block name")?)
} else {
None
};
self.consume(&Token::LBrace, "Expected '{' after block header")?;
let mut attributes = std::collections::HashMap::new();
let mut content = None;
while !self.check(&Token::RBrace) && !self.is_at_end() {
self.skip_trivia();
if self.check(&Token::RBrace) {
break;
}
if self.check(&Token::RawContent(String::new())) {
content = Some(self.parse_delimited_content()?);
continue;
}
if self.check_identifier() {
let attr_name = self.expect_identifier("Expected attribute name")?;
self.consume(&Token::Equals, "Expected '=' after attribute name")?;
let value = self.parse_value()?;
attributes.insert(attr_name, value);
} else {
self.advance();
}
}
self.consume(&Token::RBrace, "Expected '}' to close block")?;
Ok(Block::Custom(CustomBlock {
block_type,
name,
attributes,
content,
}))
}
fn parse_delimited_content(&mut self) -> Result<String> {
if let Token::RawContent(content) = &self.current().token {
let result = content.clone();
self.advance();
return Ok(result);
}
Ok(String::new())
}
fn parse_parameters(&mut self) -> Result<Vec<Parameter>> {
if !self.check(&Token::LParen) {
return Ok(Vec::new());
}
self.consume(&Token::LParen, "Expected '('")?;
let mut params = Vec::new();
while !self.check(&Token::RParen) && !self.is_at_end() {
let name = self.expect_identifier("Expected parameter name")?;
self.consume(&Token::Colon, "Expected ':' after parameter name")?;
let param_type = self.expect_type("Expected parameter type")?;
params.push(Parameter { name, param_type });
if !self.check(&Token::RParen) {
self.consume(&Token::Comma, "Expected ',' or ')' after parameter")?;
}
}
self.consume(&Token::RParen, "Expected ')' to close parameters")?;
Ok(params)
}
fn parse_embedding_annotation(&mut self) -> Result<EmbeddingAnnotation> {
self.consume(&Token::LParen, "Expected '(' after @embedding")?;
let mut model = None;
let mut source_field = None;
let mut dimension = None;
let mut paradigm = None;
while !self.check(&Token::RParen) && !self.is_at_end() {
let key = self.expect_identifier("Expected embedding parameter name")?;
self.consume(&Token::Equals, "Expected '=' after parameter name")?;
match key.as_str() {
"model" => {
model = Some(self.expect_string("Expected model string")?);
}
"source_field" => {
source_field = Some(self.expect_string("Expected source_field string")?);
}
"dimension" => {
dimension = Some(self.expect_integer("Expected dimension integer")?);
}
"paradigm" => {
paradigm = Some(self.expect_string("Expected paradigm string")?);
}
_ => {
return self.error(&format!("Unknown embedding parameter '{}'", key));
}
}
if !self.check(&Token::RParen) {
self.consume(&Token::Comma, "Expected ',' or ')' after parameter")?;
}
}
self.consume(&Token::RParen, "Expected ')' to close @embedding")?;
let model = model.ok_or_else(|| Error::Parse {
file: self.path.clone(),
line: self.current().line,
column: self.current().column,
message: "@embedding requires 'model' parameter".to_string(),
})?;
let source_field = source_field.ok_or_else(|| Error::Parse {
file: self.path.clone(),
line: self.current().line,
column: self.current().column,
message: "@embedding requires 'source_field' parameter".to_string(),
})?;
Ok(EmbeddingAnnotation {
model,
source_field,
dimension,
paradigm,
})
}
fn parse_value(&mut self) -> Result<Value> {
match &self.current().token {
Token::String(s) => {
let val = Value::String(s.clone());
self.advance();
Ok(val)
}
Token::Integer(i) => {
let val = Value::Integer(*i);
self.advance();
Ok(val)
}
Token::Float(f) => {
let val = Value::Float(*f);
self.advance();
Ok(val)
}
Token::Boolean(b) => {
let val = Value::Boolean(*b);
self.advance();
Ok(val)
}
_ => self.error("Expected value (string, number, or boolean)"),
}
}
fn expect_type(&mut self, message: &str) -> Result<String> {
let mut type_str = self.expect_identifier(message)?;
if self.check(&Token::LessThan) {
type_str.push('<');
self.advance();
type_str.push_str(&self.expect_type("Expected type inside generic")?);
self.consume(&Token::GreaterThan, "Expected '>' to close generic type")?;
type_str.push('>');
}
Ok(type_str)
}
fn expect_identifier(&mut self, message: &str) -> Result<String> {
match &self.current().token {
Token::Ident(s) => {
let ident = s.clone();
self.advance();
Ok(ident)
}
_ => self.error(message),
}
}
fn expect_string(&mut self, message: &str) -> Result<String> {
match &self.current().token {
Token::String(s) => {
let string = s.clone();
self.advance();
Ok(string)
}
_ => self.error(message),
}
}
fn expect_boolean(&mut self, message: &str) -> Result<bool> {
match &self.current().token {
Token::Boolean(b) => {
let val = *b;
self.advance();
Ok(val)
}
_ => self.error(message),
}
}
fn expect_integer(&mut self, message: &str) -> Result<usize> {
match &self.current().token {
Token::Integer(i) => {
let val = *i as usize;
self.advance();
Ok(val)
}
_ => self.error(message),
}
}
fn check(&self, token: &Token) -> bool {
if self.is_at_end() {
return false;
}
std::mem::discriminant(&self.current().token) == std::mem::discriminant(token)
}
fn check_identifier(&self) -> bool {
matches!(self.current().token, Token::Ident(_))
}
fn match_identifier(&mut self, name: &str) -> bool {
if let Token::Ident(s) = &self.current().token {
if s == name {
self.advance();
return true;
}
}
false
}
fn consume(&mut self, token: &Token, message: &str) -> Result<()> {
if self.check(token) {
self.advance();
Ok(())
} else {
self.error(message)
}
}
fn skip_trivia(&mut self) -> bool {
let mut skipped = false;
while matches!(
self.current().token,
Token::Comment(_) | Token::DocComment(_) | Token::ModuleDoc(_) | Token::Newline
) {
self.advance();
skipped = true;
}
skipped
}
fn skip_until_newline(&mut self) {
while !matches!(self.current().token, Token::Newline | Token::Eof) {
self.advance();
}
}
fn current(&self) -> &LocatedToken {
if self.position >= self.tokens.len() {
self.tokens.last().unwrap()
} else {
&self.tokens[self.position]
}
}
fn advance(&mut self) {
if !self.is_at_end() {
self.position += 1;
}
}
fn is_at_end(&self) -> bool {
matches!(self.current().token, Token::Eof)
}
fn error<T>(&self, message: &str) -> Result<T> {
let current = self.current();
Err(Error::Parse {
file: self.path.clone(),
line: current.line,
column: current.column,
message: format!("{} (found {})", message, current.token.description()),
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_simple_schema() {
let input = r#"
@schema User {
id: EntityId
name: String
}
"#;
let result = GoldParser::parse_str(input, Path::new("test.au"));
if let Err(e) = &result {
eprintln!("Parse error: {}", e);
}
assert!(result.is_ok());
let file = result.unwrap();
assert_eq!(file.blocks.len(), 1);
assert_eq!(file.blocks[0].block_type(), "schema");
}
#[test]
fn test_parse_query() {
let input = r#"
@query get_user(id: EntityId) -> User {
language = "hyperql"
---
SELECT * FROM users WHERE id = :id
---
}
"#;
let result = GoldParser::parse_str(input, Path::new("test.au"));
if let Err(e) = &result {
eprintln!("Parse error: {}", e);
}
assert!(result.is_ok());
let file = result.unwrap();
assert_eq!(file.blocks.len(), 1);
if let Block::Query(query) = &file.blocks[0] {
assert_eq!(query.name, "get_user");
assert_eq!(query.params.len(), 1);
assert_eq!(query.return_type, "User");
assert_eq!(query.language, "hyperql");
} else {
panic!("Expected Query block");
}
}
#[test]
fn test_parse_config() {
let input = r#"
@config database {
path = "./data"
engine = "manifold"
}
"#;
let result = GoldParser::parse_str(input, Path::new("test.au"));
if let Err(e) = &result {
eprintln!("Parse error: {}", e);
}
assert!(result.is_ok());
let file = result.unwrap();
assert_eq!(file.blocks.len(), 1);
if let Block::Config(config) = &file.blocks[0] {
assert_eq!(config.name, "database");
assert_eq!(config.attributes.len(), 2);
} else {
panic!("Expected Config block");
}
}
#[test]
fn test_parse_endpoint() {
let input = r#"
@endpoint GET "/api/users/:id" {
query = get_user
auth = true
}
"#;
let result = GoldParser::parse_str(input, Path::new("test.au"));
if let Err(e) = &result {
eprintln!("Parse error: {}", e);
}
assert!(result.is_ok());
let file = result.unwrap();
assert_eq!(file.blocks.len(), 1);
if let Block::Endpoint(endpoint) = &file.blocks[0] {
assert_eq!(endpoint.method, "GET");
assert_eq!(endpoint.path, "/api/users/:id");
assert_eq!(endpoint.query, "get_user");
assert!(endpoint.auth);
} else {
panic!("Expected Endpoint block");
}
}
#[test]
fn test_parse_multiple_blocks() {
let input = r#"
@schema User {
id: EntityId
name: String
}
@query get_user(id: EntityId) -> User {
language = "hyperql"
---
SELECT * FROM users WHERE id = :id
---
}
"#;
let result = GoldParser::parse_str(input, Path::new("test.au"));
if let Err(e) = &result {
eprintln!("Parse error: {}", e);
}
assert!(result.is_ok());
let file = result.unwrap();
assert_eq!(file.blocks.len(), 2);
}
#[test]
fn test_parse_embedding_annotation() {
let input = r#"
@schema Document {
id: EntityId
content: String
@embedding(model="bge-base-en-v1.5", source_field="content", dimension=768)
embedding: Vector
}
"#;
let result = GoldParser::parse_str(input, Path::new("test.au"));
if let Err(e) = &result {
eprintln!("Parse error: {}", e);
}
assert!(result.is_ok());
let file = result.unwrap();
assert_eq!(file.blocks.len(), 1);
if let Block::Schema(schema) = &file.blocks[0] {
assert_eq!(schema.name, "Document");
assert_eq!(schema.fields.len(), 3);
let embedding_field = &schema.fields[2];
assert_eq!(embedding_field.name, "embedding");
assert_eq!(embedding_field.field_type, "Vector");
assert!(embedding_field.embedding_annotation.is_some());
let annotation = embedding_field.embedding_annotation.as_ref().unwrap();
assert_eq!(annotation.model, "bge-base-en-v1.5");
assert_eq!(annotation.source_field, "content");
assert_eq!(annotation.dimension, Some(768));
assert!(annotation.paradigm.is_none());
} else {
panic!("Expected Schema block");
}
}
#[test]
fn test_parse_embedding_annotation_with_paradigm() {
let input = r#"
@schema Document {
id: EntityId
content: String
@embedding(model="colbert-v2", source_field="content", paradigm="multi-vector")
embedding: Vector
}
"#;
let result = GoldParser::parse_str(input, Path::new("test.au"));
assert!(result.is_ok());
let file = result.unwrap();
if let Block::Schema(schema) = &file.blocks[0] {
let embedding_field = &schema.fields[2];
let annotation = embedding_field.embedding_annotation.as_ref().unwrap();
assert_eq!(annotation.model, "colbert-v2");
assert_eq!(annotation.dimension, None); assert_eq!(annotation.paradigm, Some("multi-vector".to_string()));
} else {
panic!("Expected Schema block");
}
}
#[test]
fn test_parse_embedding_annotation_no_dimension() {
let input = r#"
@schema Document {
id: EntityId
content: String
@embedding(model="bge-base-en-v1.5", source_field="content")
embedding: Vector
}
"#;
let result = GoldParser::parse_str(input, Path::new("test.au"));
assert!(result.is_ok());
let file = result.unwrap();
if let Block::Schema(schema) = &file.blocks[0] {
let embedding_field = &schema.fields[2];
let annotation = embedding_field.embedding_annotation.as_ref().unwrap();
assert_eq!(annotation.model, "bge-base-en-v1.5");
assert_eq!(annotation.source_field, "content");
assert_eq!(annotation.dimension, None); assert_eq!(annotation.paradigm, None);
} else {
panic!("Expected Schema block");
}
}
}