pub const SEARCH_BATCH_SIZE: usize = 32;
#[repr(C, align(64))]
pub struct SafeByteMatchTable([bool; 256]);
impl SafeByteMatchTable {
pub const fn new(bytes: [bool; 256]) -> Self {
let mut unicode_start_all_match = true;
let mut unicode_cont_all_no_match = true;
let mut i = 0;
while i < 256 {
let m = bytes[i];
if m {
if i >= 0x80 && i < 0xc0 {
unicode_cont_all_no_match = false;
}
} else if i >= 0xc0 && i < 0xf8 {
unicode_start_all_match = false;
}
i += 1;
}
assert!(
unicode_start_all_match || unicode_cont_all_no_match,
"Cannot create SafeByteMatchTable with an unsafe pattern"
);
Self(bytes)
}
#[inline]
pub const fn use_table(&self) {}
#[inline(always)]
pub fn matches(&self, b: u8) -> bool {
unsafe { *self.0.get_unchecked(b as usize) }
}
}
#[macro_export]
macro_rules! safe_byte_match_table {
(|$byte:ident| $body:expr) => {{
use $crate::lexer::search::SafeByteMatchTable;
#[allow(clippy::eq_op, clippy::allow_attributes)]
const TABLE: SafeByteMatchTable = seq_macro::seq!($byte in 0u8..=255 {
SafeByteMatchTable::new([#($body,)*])
});
TABLE
}};
}
#[macro_export]
macro_rules! byte_search {
(
lexer: $lexer:ident,
table: $table:ident,
handle_eof: $eof_handler:expr $(,)?
) => {
byte_search! {
lexer: $lexer,
table: $table,
continue_if: (_byte, _pos) false,
handle_eof: $eof_handler,
}
};
(
lexer: $lexer:ident,
table: $table:ident,
start_at: $start_at:expr,
handle_eof: $eof_handler:expr $(,)?
) => {
byte_search! {
lexer: $lexer,
table: $table,
start_at: $start_at,
continue_if: (_byte, _pos) false,
handle_eof: $eof_handler,
}
};
(
lexer: $lexer:ident,
table: $table:ident,
continue_if: ($byte:ident, $pos:ident) $should_continue:expr,
handle_eof: $eof_handler:expr $(,)?
) => {
byte_search! {
lexer: $lexer,
table: $table,
start_at: 0,
continue_if: ($byte, $pos) $should_continue,
handle_eof: $eof_handler,
}
};
(
lexer: $lexer:ident,
table: $table:ident,
start_at: $start_at:expr,
continue_if: ($byte:ident, $pos:ident) $should_continue:expr,
handle_eof: $eof_handler:expr $(,)?
) => {{
$table.use_table();
let mut $pos = $start_at;
let bytes = $lexer.input().as_str().as_bytes();
let len = bytes.len();
let bytes = bytes.as_ptr();
let $byte = 'outer: loop {
let batch_end = $pos + $crate::lexer::search::SEARCH_BATCH_SIZE;
let $byte = if batch_end < len {
'inner: loop {
let mut i = 0;
while i < $crate::lexer::search::SEARCH_BATCH_SIZE {
let byte = unsafe { *bytes.add($pos + i) };
if $table.matches(byte) {
$pos += i;
break 'inner byte;
}
i += 1;
}
$pos = batch_end;
continue 'outer;
}
} else {
'inner: loop {
let remaining_len = len - $pos;
let mut i = 0;
while i < remaining_len {
let byte = unsafe { *bytes.add($pos + i) };
if $table.matches(byte) {
$pos += i;
break 'inner byte;
}
i += 1;
}
unsafe {
$lexer.input_mut().bump_bytes(len);
}
$eof_handler
}
};
if $should_continue {
$pos += 1;
continue;
}
break $byte;
};
unsafe {
$lexer.input_mut().bump_bytes($pos);
}
$byte
}};
}