use crate::compress::CompressedParseTable;
use crate::validation::TSLanguage;
use adze_glr_core::ParseTable;
use adze_ir::Grammar;
use proc_macro2::TokenStream;
use quote::quote;
pub struct LanguageBuilder {
grammar: Grammar,
parse_table: ParseTable,
start_can_be_empty: bool,
}
impl LanguageBuilder {
pub fn new(grammar: Grammar, parse_table: ParseTable) -> Self {
Self {
grammar,
parse_table,
start_can_be_empty: false,
}
}
pub fn set_start_can_be_empty(&mut self, value: bool) {
self.start_can_be_empty = value;
}
#[must_use = "generation result must be checked"]
pub fn generate_language(&self) -> Result<TSLanguage, String> {
let compressed = CompressedParseTable::from_parse_table(&self.parse_table);
let language = self.build_language_struct(&compressed)?;
Ok(language)
}
fn build_language_struct(
&self,
compressed: &CompressedParseTable,
) -> Result<TSLanguage, String> {
let symbol_count = self.parse_table.symbol_count as u32;
let state_count = self.parse_table.state_count as u32;
let token_count = (self.grammar.tokens.len() + 1) as u32;
let external_token_count = self.grammar.externals.len() as u32;
let field_count = self.grammar.fields.len() as u32;
let alias_count = 0;
let large_state_count = 0;
let production_id_count = self.grammar.alias_sequences.len() as u32;
let max_alias_sequence_length = 0;
let symbol_names = self.build_symbol_names();
let field_names = self.build_field_names();
let symbol_metadata = self.build_symbol_metadata();
let small_parse_table = self.build_small_parse_table(compressed);
Ok(TSLanguage {
version: 15,
symbol_count,
alias_count,
token_count,
external_token_count,
state_count,
large_state_count,
production_id_count,
field_count,
max_alias_sequence_length,
parse_table: std::ptr::null(),
small_parse_table: Box::leak(Box::new(small_parse_table)).as_ptr(),
small_parse_table_map: std::ptr::null(),
parse_actions: std::ptr::null(),
symbol_names: Box::leak(Box::new(symbol_names)).as_ptr(),
field_names: if field_count > 0 {
Box::leak(Box::new(field_names)).as_ptr()
} else {
std::ptr::null()
},
field_map_slices: std::ptr::null(),
field_map_entries: std::ptr::null(),
symbol_metadata: Box::leak(Box::new(symbol_metadata)).as_ptr(),
public_symbol_map: std::ptr::null(),
alias_map: std::ptr::null(),
alias_sequences: std::ptr::null(),
lex_modes: std::ptr::null(),
lex_fn: None,
keyword_lex_fn: None,
keyword_capture_token: 0,
external_scanner_data: crate::validation::TSExternalScannerData {
states: std::ptr::null(),
symbol_map: std::ptr::null(),
create: None,
destroy: None,
scan: None,
serialize: None,
deserialize: None,
},
primary_state_ids: std::ptr::null(),
})
}
fn build_symbol_names(&self) -> Vec<*const i8> {
let mut names = Vec::new();
for (_, token) in &self.grammar.tokens {
let name = std::ffi::CString::new(token.name.clone())
.expect("symbol name must not contain NUL bytes");
names.push(Box::leak(Box::new(name)).as_ptr());
}
for (symbol_id, _) in &self.grammar.rules {
let name = std::ffi::CString::new(format!("rule_{}", symbol_id.0))
.expect("rule name must not contain NUL bytes");
names.push(Box::leak(Box::new(name)).as_ptr());
}
for external in &self.grammar.externals {
let name = std::ffi::CString::new(external.name.clone())
.expect("external name must not contain NUL bytes");
names.push(Box::leak(Box::new(name)).as_ptr());
}
names
}
fn build_field_names(&self) -> Vec<*const i8> {
let mut names = Vec::new();
let empty = std::ffi::CString::new("").expect("empty string cannot contain NUL bytes");
names.push(Box::leak(Box::new(empty)).as_ptr());
for (_, field_name) in &self.grammar.fields {
let name = std::ffi::CString::new(field_name.clone())
.expect("field name must not contain NUL bytes");
names.push(Box::leak(Box::new(name)).as_ptr());
}
names
}
fn build_symbol_metadata(&self) -> Vec<crate::validation::TSSymbolMetadata> {
let mut metadata = Vec::new();
metadata.push(crate::validation::TSSymbolMetadata {
visible: false,
named: false,
});
for (_, token) in &self.grammar.tokens {
let visible = !token.name.starts_with('_');
let named = matches!(&token.pattern, adze_ir::TokenPattern::Regex(_)) && visible;
metadata.push(crate::validation::TSSymbolMetadata { visible, named });
}
for (symbol_id, _) in &self.grammar.rules {
let rule_name = format!("rule_{}", symbol_id.0);
let visible = !rule_name.starts_with('_');
let named = visible;
metadata.push(crate::validation::TSSymbolMetadata { visible, named });
}
for external in &self.grammar.externals {
let visible = !external.name.starts_with('_');
let named = visible;
metadata.push(crate::validation::TSSymbolMetadata { visible, named });
}
metadata
}
fn build_small_parse_table(&self, _compressed: &CompressedParseTable) -> Vec<u16> {
let table_size = self.parse_table.state_count * self.parse_table.symbol_count;
vec![0xFFFE; table_size] }
pub fn generate_language_code(&self) -> TokenStream {
quote! {
pub fn language() -> *const TSLanguage {
std::ptr::null()
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use adze_glr_core::Action;
use adze_ir::*;
fn create_test_grammar() -> Grammar {
let mut grammar = Grammar {
name: "test".to_string(),
..Default::default()
};
grammar.tokens.insert(
SymbolId(1),
Token {
name: "number".to_string(),
pattern: TokenPattern::Regex(r"\d+".to_string()),
fragile: false,
},
);
grammar.add_rule(Rule {
lhs: SymbolId(0),
rhs: vec![Symbol::Terminal(SymbolId(1))],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(0),
});
grammar.fields.insert(FieldId(0), "value".to_string());
grammar
}
fn create_test_parse_table() -> ParseTable {
let mut table = crate::test_helpers::test::make_minimal_table(
vec![vec![vec![]; 2]; 3], vec![vec![crate::test_helpers::test::INVALID; 2]; 3],
vec![],
adze_ir::SymbolId(1), adze_ir::SymbolId(1), 0, );
table.action_table = vec![vec![vec![Action::Error]; table.symbol_count]; table.state_count];
table
}
#[test]
fn test_language_builder_creation() {
let grammar = create_test_grammar();
let parse_table = create_test_parse_table();
let builder = LanguageBuilder::new(grammar, parse_table);
assert!(builder.grammar.name == "test");
}
#[test]
fn test_generate_language() {
let grammar = create_test_grammar();
let parse_table = create_test_parse_table();
let builder = LanguageBuilder::new(grammar, parse_table);
let result = builder.generate_language();
assert!(result.is_ok());
let language = result.unwrap();
assert_eq!(language.version, 15);
assert_eq!(language.symbol_count, 2);
assert_eq!(language.state_count, 3);
}
#[test]
fn test_build_symbol_names() {
let grammar = create_test_grammar();
let parse_table = create_test_parse_table();
let builder = LanguageBuilder::new(grammar, parse_table);
let names = builder.build_symbol_names();
assert!(!names.is_empty());
assert!(
names.iter().any(|&name| unsafe {
std::ffi::CStr::from_ptr(name).to_str().unwrap() == "number"
})
);
}
#[test]
fn test_build_field_names() {
let grammar = create_test_grammar();
let parse_table = create_test_parse_table();
let builder = LanguageBuilder::new(grammar, parse_table);
let names = builder.build_field_names();
assert_eq!(names.len(), 2);
assert_eq!(
unsafe { std::ffi::CStr::from_ptr(names[0]).to_str().unwrap() },
""
);
assert_eq!(
unsafe { std::ffi::CStr::from_ptr(names[1]).to_str().unwrap() },
"value"
);
}
#[test]
fn test_build_symbol_metadata() {
let grammar = create_test_grammar();
let parse_table = create_test_parse_table();
let builder = LanguageBuilder::new(grammar, parse_table);
let metadata = builder.build_symbol_metadata();
assert!(!metadata.is_empty());
}
#[test]
fn test_language_generator_code() {
let grammar = create_test_grammar();
let parse_table = create_test_parse_table();
let builder = LanguageBuilder::new(grammar, parse_table);
let code = builder.generate_language_code();
let code_str = code.to_string();
assert!(code_str.contains("language"));
assert!(code_str.contains("TSLanguage"));
}
#[test]
fn test_language_with_externals() {
let mut grammar = create_test_grammar();
grammar.externals.push(ExternalToken {
name: "comment".to_string(),
symbol_id: SymbolId(100),
});
let parse_table = create_test_parse_table();
let builder = LanguageBuilder::new(grammar, parse_table);
let result = builder.generate_language();
assert!(result.is_ok());
let language = result.unwrap();
assert_eq!(language.external_token_count, 1);
}
#[test]
fn test_language_with_multiple_fields() {
let mut grammar = create_test_grammar();
grammar.fields.insert(FieldId(1), "left".to_string());
grammar.fields.insert(FieldId(2), "operator".to_string());
grammar.fields.insert(FieldId(3), "right".to_string());
let parse_table = create_test_parse_table();
let builder = LanguageBuilder::new(grammar, parse_table);
let result = builder.generate_language();
assert!(result.is_ok());
let language = result.unwrap();
assert_eq!(language.field_count, 4);
}
}