#![cfg_attr(feature = "strict_docs", allow(missing_docs))]
use adze_glr_core::ParseTable;
use adze_ir::{ExternalToken, Grammar, SymbolId};
use quote::quote;
use std::collections::HashMap;
#[cfg(not(debug_assertions))]
macro_rules! debug_trace {
($($arg:tt)*) => {};
}
#[cfg(debug_assertions)]
macro_rules! debug_trace {
($($arg:tt)*) => {
if std::env::var("RUST_LOG")
.ok()
.unwrap_or_default()
.contains("debug")
{
eprintln!($($arg)*);
}
};
}
pub struct ExternalScannerGenerator {
#[allow(dead_code)]
grammar: Grammar,
external_tokens: Vec<ExternalToken>,
#[allow(dead_code)]
symbol_map: HashMap<SymbolId, usize>,
parse_table: ParseTable,
}
impl ExternalScannerGenerator {
pub fn new(grammar: Grammar, parse_table: ParseTable) -> Self {
let external_tokens = grammar.externals.clone();
let mut symbol_map = HashMap::new();
for (index, token) in external_tokens.iter().enumerate() {
symbol_map.insert(token.symbol_id, index);
}
Self {
grammar,
external_tokens,
symbol_map,
parse_table,
}
}
pub fn compute_state_validity(&self) -> Vec<Vec<bool>> {
self.parse_table.external_scanner_states.clone()
}
pub fn generate_state_bitmap(&self) -> Vec<Vec<bool>> {
self.parse_table.external_scanner_states.clone()
}
pub fn generate_symbol_map(&self) -> Vec<u16> {
let mut map = vec![0u16; self.external_tokens.len()];
for (token_index, token) in self.external_tokens.iter().enumerate() {
map[token_index] = token.symbol_id.0;
}
map
}
pub fn generate_scanner_interface(&self) -> proc_macro2::TokenStream {
if self.external_tokens.is_empty() {
return quote! {};
}
let state_bitmap = self.generate_state_bitmap();
let mut state_data = Vec::new();
for state in &state_bitmap {
for &valid in state {
state_data.push(valid);
}
}
let symbol_map = self.generate_symbol_map();
let external_count = self.external_tokens.len();
let state_count = state_bitmap.len();
quote! {
const EXTERNAL_TOKEN_COUNT: usize = #external_count;
const STATE_COUNT: usize = #state_count;
static EXTERNAL_SCANNER_STATES: &[bool] = &[#(#state_data),*];
static EXTERNAL_SCANNER_SYMBOL_MAP: &[u16] = &[#(#symbol_map),*];
#[allow(dead_code)]
static EXTERNAL_SCANNER_DATA: adze::ffi::TSExternalScannerData = adze::ffi::TSExternalScannerData {
states: EXTERNAL_SCANNER_STATES.as_ptr(),
symbol_map: EXTERNAL_SCANNER_SYMBOL_MAP.as_ptr(),
create: None, destroy: None,
scan: None,
serialize: None,
deserialize: None,
};
#[allow(dead_code)]
fn get_valid_external_tokens(state: usize) -> Vec<bool> {
if state >= STATE_COUNT {
return vec![false; EXTERNAL_TOKEN_COUNT];
}
let start = state * EXTERNAL_TOKEN_COUNT;
let end = start + EXTERNAL_TOKEN_COUNT;
EXTERNAL_SCANNER_STATES[start..end].to_vec()
}
}
}
pub fn has_external_tokens(&self) -> bool {
!self.external_tokens.is_empty()
}
pub fn external_token_count(&self) -> usize {
self.external_tokens.len()
}
pub fn debug_print_validity(&self) {
let state_bitmap = self.compute_state_validity();
debug_trace!("External Token Validity Matrix:");
debug_trace!("States x External Tokens");
let mut header = String::from("State |");
for token in &self.external_tokens {
header.push_str(&format!(" {} |", token.name));
}
debug_trace!("{}", header);
for (state_idx, state_validity) in state_bitmap.iter().enumerate() {
let mut row = format!("{:5} |", state_idx);
for &valid in state_validity {
row.push_str(&format!(" {:5} |", if valid { "✓" } else { " " }));
}
debug_trace!("{}", row);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use adze_glr_core::{Action, FirstFollowSets, build_lr1_automaton};
use adze_ir::{ProductionId, Rule, Symbol, Token, TokenPattern};
#[test]
fn test_state_validity_computation() {
let mut grammar = Grammar::new("test".to_string());
grammar.externals.push(ExternalToken {
name: "INDENT".to_string(),
symbol_id: SymbolId(100),
});
grammar.externals.push(ExternalToken {
name: "DEDENT".to_string(),
symbol_id: SymbolId(101),
});
let mut parse_table = crate::test_helpers::test::make_minimal_table(
vec![vec![vec![Action::Error]; 2]; 2], vec![vec![crate::test_helpers::test::INVALID; 2]; 2],
vec![],
adze_ir::SymbolId(1), adze_ir::SymbolId(1), 0, );
parse_table.external_scanner_states = vec![
vec![true, false], vec![false, true], ];
parse_table.symbol_to_index.insert(SymbolId(100), 0); parse_table.symbol_to_index.insert(SymbolId(101), 1);
parse_table.action_table[0][0] = vec![Action::Shift(adze_ir::StateId(1))];
parse_table.action_table[1][1] = vec![Action::Shift(adze_ir::StateId(2))];
let generator = ExternalScannerGenerator::new(grammar, parse_table);
let validity = generator.compute_state_validity();
assert_eq!(validity[0], vec![true, false]);
assert_eq!(validity[1], vec![false, true]);
}
#[test]
fn test_symbol_map_generation() {
let mut grammar = Grammar::new("test".to_string());
let start_symbol = SymbolId(1);
let dummy_token_id = SymbolId(2);
grammar.tokens.insert(
dummy_token_id,
Token {
name: "dummy".to_string(),
pattern: TokenPattern::String("dummy".to_string()),
fragile: false,
},
);
grammar.rules.insert(
start_symbol,
vec![Rule {
lhs: start_symbol,
rhs: vec![Symbol::Terminal(dummy_token_id)],
precedence: None,
associativity: None,
fields: vec![],
production_id: ProductionId(0),
}],
);
grammar.externals.push(ExternalToken {
name: "TOKEN1".to_string(),
symbol_id: SymbolId(200),
});
grammar.externals.push(ExternalToken {
name: "TOKEN2".to_string(),
symbol_id: SymbolId(201),
});
let first_follow = FirstFollowSets::compute(&grammar).unwrap();
let parse_table = build_lr1_automaton(&grammar, &first_follow).unwrap();
let generator = ExternalScannerGenerator::new(grammar, parse_table);
let symbol_map = generator.generate_symbol_map();
assert_eq!(symbol_map, vec![200, 201]);
}
}