#![cfg_attr(feature = "strict_docs", allow(missing_docs))]
use crate::linecol::LineCol;
use std::ffi::c_void;
use std::os::raw::{c_char, c_uint};
pub type CreateFn = extern "C" fn() -> *mut c_void;
pub type DestroyFn = extern "C" fn(payload: *mut c_void);
pub type ScanFn =
extern "C" fn(payload: *mut c_void, lexer: *mut TSLexer, valid_symbols: *const bool) -> bool;
pub type SerializeFn = extern "C" fn(payload: *mut c_void, buffer: *mut c_char) -> c_uint;
pub type DeserializeFn = extern "C" fn(payload: *mut c_void, buffer: *const c_char, length: c_uint);
#[repr(C)]
pub struct TSLexer {
pub lookahead: extern "C" fn(*mut TSLexer) -> u32,
pub advance: extern "C" fn(*mut TSLexer, skip: bool),
pub mark_end: extern "C" fn(*mut TSLexer),
pub get_column: extern "C" fn(*mut TSLexer) -> u32,
pub is_at_included_range_start: extern "C" fn(*const TSLexer) -> bool,
pub eof: extern "C" fn(*const TSLexer) -> bool,
pub context: *mut c_void,
pub result_symbol: u16,
}
const _: () = {
use core::mem::{align_of, size_of};
const MIN_POINTERS: usize = 6;
const MIN_U32S: usize = 2;
const MIN_LEXER_SIZE: usize =
MIN_POINTERS * size_of::<*mut c_void>() + MIN_U32S * size_of::<u32>();
assert!(
size_of::<TSLexer>() >= MIN_LEXER_SIZE,
"TSLexer size too small for required fields"
);
assert!(
align_of::<TSLexer>() >= align_of::<*mut c_void>(),
"TSLexer alignment mismatch"
);
};
#[repr(C)]
#[derive(Copy, Clone)]
pub struct TSExternalScannerData {
pub states: *const bool,
pub symbol_map: *const u16,
pub create: Option<CreateFn>,
pub destroy: Option<DestroyFn>,
pub scan: Option<ScanFn>,
pub serialize: Option<SerializeFn>,
pub deserialize: Option<DeserializeFn>,
}
const _: () = {
use core::mem::{align_of, size_of};
#[cfg(target_pointer_width = "64")]
const MIN_SCANNER_DATA_SIZE: usize = 8 * 7;
#[cfg(target_pointer_width = "32")]
const MIN_SCANNER_DATA_SIZE: usize = 4 * 7;
assert!(
size_of::<TSExternalScannerData>() >= MIN_SCANNER_DATA_SIZE,
"TSExternalScannerData size mismatch"
);
assert!(
align_of::<TSExternalScannerData>() >= align_of::<*const u8>(),
"TSExternalScannerData alignment mismatch"
);
};
unsafe impl Send for TSExternalScannerData {}
unsafe impl Sync for TSExternalScannerData {}
pub struct CExternalScanner {
payload: *mut c_void,
destroy: Option<DestroyFn>,
scan: Option<ScanFn>,
serialize: Option<SerializeFn>,
deserialize: Option<DeserializeFn>,
}
unsafe impl Send for CExternalScanner {}
unsafe impl Sync for CExternalScanner {}
impl CExternalScanner {
pub unsafe fn new(data: &TSExternalScannerData) -> Option<Self> {
let create = data.create?;
let payload = create();
if payload.is_null() {
return None;
}
Some(CExternalScanner {
payload,
destroy: data.destroy,
scan: data.scan,
serialize: data.serialize,
deserialize: data.deserialize,
})
}
pub unsafe fn scan(&mut self, lexer: &mut TSLexer, valid_symbols: &[bool]) -> bool {
if let Some(scan_fn) = self.scan {
scan_fn(self.payload, lexer as *mut TSLexer, valid_symbols.as_ptr())
} else {
false
}
}
pub unsafe fn serialize(&self, buffer: &mut Vec<u8>) -> usize {
if let Some(serialize_fn) = self.serialize {
const BUFFER_SIZE: usize = 1024;
let mut temp_buffer = vec![0u8; BUFFER_SIZE];
let bytes_written = serialize_fn(self.payload, temp_buffer.as_mut_ptr() as *mut c_char);
let bytes_written = bytes_written as usize;
if bytes_written > 0 && bytes_written <= BUFFER_SIZE {
buffer.extend_from_slice(&temp_buffer[..bytes_written]);
bytes_written
} else {
0
}
} else {
0
}
}
pub unsafe fn deserialize(&mut self, buffer: &[u8]) {
if let Some(deserialize_fn) = self.deserialize {
deserialize_fn(
self.payload,
buffer.as_ptr() as *const c_char,
buffer.len() as c_uint,
)
}
}
}
impl Drop for CExternalScanner {
fn drop(&mut self) {
if let Some(destroy_fn) = self.destroy {
#[allow(unused_unsafe)]
unsafe {
destroy_fn(self.payload);
}
}
}
}
pub struct RustLexerAdapter<'a> {
input: &'a [u8],
position: usize,
token_end: usize,
result_symbol: u16,
line: u32,
line_start: usize, }
impl<'a> RustLexerAdapter<'a> {
pub fn new(input: &'a [u8], position: usize) -> Self {
let (line, line_start) = Self::calculate_line_info(input, position);
RustLexerAdapter {
input,
position,
token_end: position,
result_symbol: 0,
line,
line_start,
}
}
fn calculate_line_info(input: &[u8], position: usize) -> (u32, usize) {
let tracker = LineCol::at_position(input, position);
(tracker.line as u32, tracker.line_start)
}
pub fn get_column(&self) -> u32 {
(self.position.saturating_sub(self.line_start)) as u32
}
pub fn as_ts_lexer(&mut self) -> TSLexer {
TSLexer {
lookahead: rust_lexer_lookahead,
advance: rust_lexer_advance,
mark_end: rust_lexer_mark_end,
get_column: rust_lexer_get_column,
is_at_included_range_start: rust_lexer_is_at_included_range_start,
eof: rust_lexer_eof,
context: (self as *mut RustLexerAdapter).cast(),
result_symbol: self.result_symbol,
}
}
pub fn token_length(&self) -> usize {
self.token_end - self.position
}
}
#[inline]
unsafe fn as_adapter(lexer: *mut TSLexer) -> *mut RustLexerAdapter<'static> {
unsafe { (*lexer).context as *mut RustLexerAdapter<'static> }
}
extern "C" fn rust_lexer_lookahead(lexer: *mut TSLexer) -> u32 {
unsafe {
let adapter = &mut *as_adapter(lexer);
if adapter.position < adapter.input.len() {
adapter.input[adapter.position] as u32
} else {
0
}
}
}
extern "C" fn rust_lexer_advance(lexer: *mut TSLexer, skip: bool) {
unsafe {
let adapter = &mut *as_adapter(lexer);
if adapter.position < adapter.input.len() {
let byte = adapter.input[adapter.position];
adapter.position += 1;
let next_byte = if adapter.position < adapter.input.len() {
Some(adapter.input[adapter.position])
} else {
None
};
if byte == b'\n' {
adapter.line += 1;
adapter.line_start = adapter.position;
} else if byte == b'\r' {
if next_byte == Some(b'\n') {
adapter.position += 1; }
adapter.line += 1;
adapter.line_start = adapter.position;
}
if !skip && adapter.token_end < adapter.position {
adapter.token_end = adapter.position;
}
}
}
}
extern "C" fn rust_lexer_mark_end(lexer: *mut TSLexer) {
unsafe {
let adapter = &mut *as_adapter(lexer);
adapter.token_end = adapter.position;
}
}
extern "C" fn rust_lexer_get_column(lexer: *mut TSLexer) -> u32 {
unsafe {
let adapter = &mut *as_adapter(lexer);
adapter.get_column()
}
}
extern "C" fn rust_lexer_is_at_included_range_start(_lexer: *const TSLexer) -> bool {
false
}
extern "C" fn rust_lexer_eof(lexer: *const TSLexer) -> bool {
unsafe {
let adapter = &*((*lexer).context as *const RustLexerAdapter<'static>);
adapter.position >= adapter.input.len()
}
}
pub unsafe fn destroy_lexer(lexer: *mut TSLexer) {
unsafe {
if !lexer.is_null() {
let _ = Box::from_raw(lexer);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_rust_lexer_adapter() {
let input = b"hello world";
let mut adapter = RustLexerAdapter::new(input, 0);
assert_eq!(adapter.position, 0);
assert_eq!(adapter.input[adapter.position], b'h');
adapter.position += 1;
adapter.token_end = adapter.position;
assert_eq!(adapter.input[adapter.position], b'e');
assert!(adapter.position < adapter.input.len());
}
}