use crate::external_scanner::{ExternalScanner, ScanResult};
use crate::external_scanner_ffi::{CExternalScanner, TSExternalScannerData};
use adze_ir::SymbolId;
use once_cell::sync::Lazy;
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
pub trait DynExternalScanner: Send + Sync {
fn scan(
&mut self,
_lexer: &mut dyn crate::external_scanner::Lexer,
valid_symbols: &[bool],
) -> Option<ScanResult>;
fn serialize(&self, buffer: &mut Vec<u8>);
fn deserialize(&mut self, buffer: &[u8]);
}
struct RustScannerWrapper<S: ExternalScanner> {
scanner: S,
}
struct NoopScanner;
impl DynExternalScanner for NoopScanner {
fn scan(
&mut self,
_lexer: &mut dyn crate::external_scanner::Lexer,
_valid_symbols: &[bool],
) -> Option<ScanResult> {
None
}
fn serialize(&self, _buffer: &mut Vec<u8>) {}
fn deserialize(&mut self, _buffer: &[u8]) {}
}
impl<S: ExternalScanner> DynExternalScanner for RustScannerWrapper<S> {
fn scan(
&mut self,
lexer: &mut dyn crate::external_scanner::Lexer,
valid_symbols: &[bool],
) -> Option<ScanResult> {
self.scanner.scan(lexer, valid_symbols)
}
fn serialize(&self, buffer: &mut Vec<u8>) {
self.scanner.serialize(buffer)
}
fn deserialize(&mut self, buffer: &[u8]) {
self.scanner.deserialize(buffer)
}
}
struct CScannerWrapper {
scanner: CExternalScanner,
external_tokens: Vec<SymbolId>,
}
impl DynExternalScanner for CScannerWrapper {
fn scan(
&mut self,
_lexer: &mut dyn crate::external_scanner::Lexer,
valid_symbols: &[bool],
) -> Option<ScanResult> {
let input = &[]; let position = 0; use crate::external_scanner_ffi::RustLexerAdapter;
let mut adapter = RustLexerAdapter::new(input, position);
let mut ts_lexer = adapter.as_ts_lexer();
let scan_result = unsafe { self.scanner.scan(&mut ts_lexer, valid_symbols) };
if scan_result {
let symbol_index = ts_lexer.result_symbol as usize;
if symbol_index < self.external_tokens.len() {
Some(ScanResult {
symbol: self.external_tokens[symbol_index].0,
length: adapter.token_length(),
})
} else {
None
}
} else {
None
}
}
fn serialize(&self, buffer: &mut Vec<u8>) {
unsafe {
self.scanner.serialize(buffer);
}
}
fn deserialize(&mut self, buffer: &[u8]) {
unsafe { self.scanner.deserialize(buffer) }
}
}
pub type ScannerFactory = Box<dyn Fn() -> Box<dyn DynExternalScanner> + Send + Sync>;
static SCANNER_REGISTRY: Lazy<Arc<Mutex<ScannerRegistry>>> =
Lazy::new(|| Arc::new(Mutex::new(ScannerRegistry::new())));
pub struct ScannerRegistry {
scanners: HashMap<String, ScannerFactory>,
}
impl Default for ScannerRegistry {
fn default() -> Self {
Self::new()
}
}
impl ScannerRegistry {
pub fn new() -> Self {
ScannerRegistry {
scanners: HashMap::new(),
}
}
pub fn register_rust_scanner<S>(&mut self, language: &str)
where
S: ExternalScanner + Default + Send + Sync + 'static,
{
let factory: ScannerFactory = Box::new(|| {
Box::new(RustScannerWrapper {
scanner: S::default(),
})
});
self.scanners.insert(language.to_string(), factory);
}
pub fn register_c_scanner(
&mut self,
language: &str,
data: TSExternalScannerData,
external_tokens: Vec<SymbolId>,
) {
let _language_owned = language.to_string();
let factory: ScannerFactory = Box::new(move || {
let scanner = unsafe { CExternalScanner::new(&data) };
if let Some(scanner) = scanner {
Box::new(CScannerWrapper {
scanner,
external_tokens: external_tokens.clone(),
})
} else {
Box::new(NoopScanner)
}
});
self.scanners.insert(language.to_string(), factory);
}
pub fn get_factory(&self, language: &str) -> Option<&ScannerFactory> {
self.scanners.get(language)
}
pub fn create_scanner(&self, language: &str) -> Option<Box<dyn DynExternalScanner>> {
self.scanners.get(language).map(|factory| factory())
}
}
pub fn get_global_registry() -> Arc<Mutex<ScannerRegistry>> {
SCANNER_REGISTRY.clone()
}
pub fn register_rust_scanner<S>(language: &str)
where
S: ExternalScanner + Default + Send + Sync + 'static,
{
let registry = get_global_registry();
let mut registry = registry.lock().unwrap_or_else(|err| err.into_inner());
registry.register_rust_scanner::<S>(language);
}
pub fn register_c_scanner(
language: &str,
data: TSExternalScannerData,
external_tokens: Vec<SymbolId>,
) {
let registry = get_global_registry();
let mut registry = registry.lock().unwrap_or_else(|err| err.into_inner());
registry.register_c_scanner(language, data, external_tokens);
}
pub struct ExternalScannerBuilder {
language: String,
external_tokens: Vec<SymbolId>,
}
impl ExternalScannerBuilder {
pub fn new(language: impl Into<String>) -> Self {
ExternalScannerBuilder {
language: language.into(),
external_tokens: Vec::new(),
}
}
pub fn with_external_tokens(mut self, tokens: Vec<SymbolId>) -> Self {
self.external_tokens = tokens;
self
}
pub fn register_rust<S>(self) -> Self
where
S: ExternalScanner + Default + Send + Sync + 'static,
{
register_rust_scanner::<S>(&self.language);
self
}
pub fn register_c(self, data: TSExternalScannerData) -> Self {
register_c_scanner(&self.language, data, self.external_tokens.clone());
self
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::external_scanner::StringScanner;
#[test]
fn test_scanner_registry() {
let mut registry = ScannerRegistry::new();
registry.register_rust_scanner::<StringScanner>("test_lang");
let scanner = registry.create_scanner("test_lang");
assert!(scanner.is_some());
let scanner = registry.create_scanner("unknown_lang");
assert!(scanner.is_none());
}
#[test]
fn test_scanner_builder() {
let builder = ExternalScannerBuilder::new("python")
.with_external_tokens(vec![SymbolId(100), SymbolId(101)]);
assert_eq!(builder.language, "python");
assert_eq!(builder.external_tokens.len(), 2);
}
}