#[cfg(target_arch = "x86_64")]
use crate::error::{Error, Result};
#[cfg(target_arch = "x86_64")]
use crate::TransitionTable;
#[cfg(target_arch = "x86_64")]
use matchkit::Match;
#[cfg(target_arch = "x86_64")]
pub struct ExecutableBuffer {
ptr: *mut u8,
len: usize,
table: Option<TransitionTable>,
is_jit: bool,
accept_pattern: Vec<u32>,
output_links: Vec<u32>,
}
type JitFn = unsafe extern "sysv64" fn(*const u8, u64, *mut Match, u64) -> u64;
#[cfg(target_arch = "x86_64")]
impl std::fmt::Debug for ExecutableBuffer {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ExecutableBuffer")
.field("len", &self.len)
.field("is_jit", &self.is_jit)
.finish_non_exhaustive()
}
}
#[cfg(target_arch = "x86_64")]
unsafe impl Send for ExecutableBuffer {}
#[cfg(target_arch = "x86_64")]
unsafe impl Sync for ExecutableBuffer {}
#[cfg(target_arch = "x86_64")]
impl Drop for ExecutableBuffer {
fn drop(&mut self) {
if !self.ptr.is_null() && self.len > 0 {
unsafe {
libc::munmap(self.ptr.cast::<libc::c_void>(), self.len);
}
}
}
}
#[cfg(target_arch = "x86_64")]
impl ExecutableBuffer {
pub fn scan(&self, input: &[u8], matches: &mut [Match]) -> usize {
if self.is_jit {
self.scan_jit(input, matches)
} else {
self.scan_interpreted(input, matches)
}
}
fn scan_jit(&self, input: &[u8], matches: &mut [Match]) -> usize {
if input.is_empty() {
return 0;
}
let max_matches = matches.len();
let func: JitFn = unsafe { std::mem::transmute(self.ptr) };
let count = unsafe {
func(
input.as_ptr(),
input.len() as u64,
matches.as_mut_ptr(),
max_matches as u64,
)
};
(count as usize).min(max_matches)
}
pub fn scan_count(&self, input: &[u8]) -> usize {
if self.is_jit {
self.scan_count_jit(input)
} else {
self.scan_count_interpreted(input)
}
}
fn scan_count_jit(&self, input: &[u8]) -> usize {
if input.is_empty() {
return 0;
}
let func: JitFn = unsafe { std::mem::transmute(self.ptr) };
let count = unsafe { func(input.as_ptr(), input.len() as u64, std::ptr::null_mut(), 0) };
count as usize
}
fn scan_count_interpreted(&self, input: &[u8]) -> usize {
let Some(table) = self.table.as_ref() else {
return 0;
};
let mut state = 0u32;
let mut count = 0usize;
for &byte in input {
let idx = state as usize * table.class_count() + byte as usize;
let next = table.transitions().get(idx).copied().unwrap_or(0);
let clean_next = next & 0x7FFF_FFFF;
if self
.accept_pattern
.get(clean_next as usize)
.copied()
.unwrap_or(0xFFFF_FFFF)
== 0xFFFF_FFFF
{
state = clean_next;
} else {
let mut output_state = clean_next;
while output_state != 0xFFFF_FFFF {
count += 1;
output_state = self
.output_links
.get(output_state as usize)
.copied()
.unwrap_or(0xFFFF_FFFF);
}
state = 0;
}
}
count
}
fn scan_interpreted(&self, input: &[u8], matches: &mut [Match]) -> usize {
let Some(table) = self.table.as_ref() else {
return 0;
};
let mut state = 0u32;
let mut count = 0usize;
for (pos, &byte) in input.iter().enumerate() {
let idx = state as usize * table.class_count() + byte as usize;
let next = table.transitions().get(idx).copied().unwrap_or(0);
let clean_next = next & 0x7FFF_FFFF;
if self
.accept_pattern
.get(clean_next as usize)
.copied()
.unwrap_or(0xFFFF_FFFF)
== 0xFFFF_FFFF
{
state = clean_next;
} else {
let mut output_state = clean_next;
while output_state != 0xFFFF_FFFF {
let pid = self.accept_pattern.get(output_state as usize).copied().unwrap_or(0);
if count < matches.len() {
let end = (pos + 1) as u32;
let pat_len = table
.pattern_lengths()
.get(pid as usize)
.copied()
.unwrap_or(0);
let start = end.saturating_sub(pat_len);
matches[count] = Match::from_parts(pid, start, end);
}
count += 1;
output_state = self
.output_links
.get(output_state as usize)
.copied()
.unwrap_or(0xFFFF_FFFF);
}
state = 0;
}
}
count.min(matches.len())
}
}
#[cfg(target_arch = "x86_64")]
const MAX_JIT_STATES: usize = 4096;
#[cfg(target_arch = "x86_64")]
pub fn compile_x86_64(table: &TransitionTable, output_links: &[u32]) -> Result<ExecutableBuffer> {
if table.state_count() > 65_536 {
return Err(Error::TooManyStates {
states: table.state_count(),
max: 65_536,
});
}
if table.state_count() > MAX_JIT_STATES {
return compile_interpreted_fallback(table, output_links);
}
let mut flagged = table.transitions().to_vec();
let mut accept_pattern: Vec<u32> = vec![0xFFFF_FFFF; table.state_count()];
for &(state, pattern_id) in table.accept_states() {
if (state as usize) < accept_pattern.len() {
accept_pattern[state as usize] = pattern_id;
}
}
for t in &mut flagged {
let target = (*t & 0x7FFF_FFFF) as usize;
if target < accept_pattern.len() && accept_pattern[target] != 0xFFFF_FFFF {
*t = target as u32 | 0x8000_0000;
} else {
*t = target as u32;
}
}
let mut output_link = output_links.to_vec();
if output_link.len() < table.state_count() {
output_link.resize(table.state_count(), 0xFFFF_FFFF);
}
let mut c: Vec<u8> = Vec::with_capacity(4096);
c.extend_from_slice(&[0x53]); c.extend_from_slice(&[0x55]); c.extend_from_slice(&[0x41, 0x54]); c.extend_from_slice(&[0x41, 0x55]); c.extend_from_slice(&[0x41, 0x56]); c.extend_from_slice(&[0x41, 0x57]);
c.extend_from_slice(&[0x49, 0x89, 0xFC]); c.extend_from_slice(&[0x48, 0x89, 0xF5]); c.extend_from_slice(&[0x49, 0x89, 0xD6]); c.extend_from_slice(&[0x48, 0x89, 0xCB]);
c.extend_from_slice(&[0x45, 0x31, 0xED]); c.extend_from_slice(&[0x45, 0x31, 0xFF]); c.extend_from_slice(&[0x45, 0x31, 0xDB]);
c.extend_from_slice(&[0x49, 0x39, 0xED]); c.extend_from_slice(&[0x0F, 0x83]); let empty_patch = c.len();
c.extend_from_slice(&[0; 4]);
let scan_top = c.len();
c.extend_from_slice(&[0x43, 0x0F, 0xB6, 0x04, 0x2C]);
c.extend_from_slice(&[0x41, 0x69, 0xD3]);
c.extend_from_slice(&(table.class_count() as u32).to_le_bytes());
c.extend_from_slice(&[0x01, 0xC2]);
let trans_patch = c.len();
c.push(0x48);
c.push(0xBF);
c.extend_from_slice(&[0; 8]);
c.extend_from_slice(&[0x8B, 0x04, 0x97]);
c.extend_from_slice(&[0x89, 0xC1]); c.push(0x25);
c.extend_from_slice(&0x7FFF_FFFFu32.to_le_bytes()); c.extend_from_slice(&[0x41, 0x89, 0xC3]);
c.extend_from_slice(&[0xF7, 0xC1]);
c.extend_from_slice(&0x8000_0000u32.to_le_bytes());
c.extend_from_slice(&[0x0F, 0x84]);
let skip_match_patch = c.len();
c.extend_from_slice(&[0; 4]);
c.extend_from_slice(&[0x45, 0x89, 0xD8]);
let accept_loop = c.len();
c.extend_from_slice(&[0x49, 0x39, 0xDF]); c.extend_from_slice(&[0x0F, 0x83]); let skip_write_match_patch = c.len();
c.extend_from_slice(&[0; 4]);
let accept_patch = c.len();
c.push(0x48);
c.push(0xBF); c.extend_from_slice(&[0; 8]);
c.extend_from_slice(&[0x42, 0x8B, 0x04, 0x87]);
c.extend_from_slice(&[0x4C, 0x89, 0xFF]); c.extend_from_slice(&[0x48, 0x6B, 0xFF, 0x10]); c.extend_from_slice(&[0x4C, 0x01, 0xF7]);
c.extend_from_slice(&[0x89, 0x07]);
c.extend_from_slice(&[0x89, 0xC1]);
let patlen_patch = c.len();
c.push(0x48);
c.push(0xBA); c.extend_from_slice(&[0; 8]);
c.extend_from_slice(&[0x8B, 0x0C, 0x8A]);
c.extend_from_slice(&[0x44, 0x89, 0xEA]); c.extend_from_slice(&[0x83, 0xC2, 0x01]); c.extend_from_slice(&[0x89, 0x57, 0x08]);
c.extend_from_slice(&[0x29, 0xCA]); c.extend_from_slice(&[0x73, 0x02]); c.extend_from_slice(&[0x31, 0xD2]); c.extend_from_slice(&[0x89, 0x57, 0x04]);
let skip_write_match_target = c.len();
patch_rel32(&mut c, skip_write_match_patch, skip_write_match_target);
c.extend_from_slice(&[0x49, 0xFF, 0xC7]);
let output_patch = c.len();
c.push(0x48);
c.push(0xBF); c.extend_from_slice(&[0; 8]);
c.extend_from_slice(&[0x46, 0x8B, 0x04, 0x87]);
c.extend_from_slice(&[0x41, 0x81, 0xF8]);
c.extend_from_slice(&0xFFFF_FFFFu32.to_le_bytes());
c.extend_from_slice(&[0x0F, 0x85]);
let accept_loop_patch = c.len();
c.extend_from_slice(&[0; 4]);
patch_rel32(&mut c, accept_loop_patch, accept_loop);
c.extend_from_slice(&[0x45, 0x31, 0xDB]);
let skip_match_target = c.len();
patch_rel32(&mut c, skip_match_patch, skip_match_target);
c.extend_from_slice(&[0x49, 0xFF, 0xC5]);
c.extend_from_slice(&[0x43, 0x0F, 0x18, 0x44, 0x2C, 0x40]);
c.extend_from_slice(&[0x49, 0x39, 0xED]); c.extend_from_slice(&[0x0F, 0x82]); let loop_patch = c.len();
c.extend_from_slice(&[0; 4]);
patch_rel32(&mut c, loop_patch, scan_top);
let epilogue = c.len();
patch_rel32(&mut c, empty_patch, epilogue);
c.extend_from_slice(&[0x4C, 0x89, 0xF8]); c.extend_from_slice(&[0x41, 0x5F]); c.extend_from_slice(&[0x41, 0x5E]); c.extend_from_slice(&[0x41, 0x5D]); c.extend_from_slice(&[0x41, 0x5C]); c.push(0x5D); c.push(0x5B); c.push(0xC3);
while c.len() % 8 != 0 {
c.push(0xCC);
}
let trans_offset = c.len();
for &t in &flagged {
c.extend_from_slice(&t.to_le_bytes());
}
let accept_offset = c.len();
for &p in &accept_pattern {
c.extend_from_slice(&p.to_le_bytes());
}
let patlen_offset = c.len();
if table.pattern_lengths().is_empty() {
c.extend_from_slice(&0u32.to_le_bytes());
} else {
for &l in table.pattern_lengths() {
c.extend_from_slice(&l.to_le_bytes());
}
}
let output_offset = c.len();
for &o in &output_link {
c.extend_from_slice(&o.to_le_bytes());
}
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as usize }.max(4096);
let alloc_size = (c.len() + page_size - 1) & !(page_size - 1);
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
alloc_size,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-1,
0,
)
};
if ptr == libc::MAP_FAILED {
return Err(Error::MemoryAllocation {
reason: format!(
"mmap(RW, {alloc_size}) failed: {}",
std::io::Error::last_os_error()
),
});
}
let buf = ptr.cast::<u8>();
unsafe {
std::ptr::copy_nonoverlapping(c.as_ptr(), buf, c.len());
}
let base = buf as u64;
patch_imm64(&mut c, buf, trans_patch + 2, base + trans_offset as u64);
patch_imm64(&mut c, buf, accept_patch + 2, base + accept_offset as u64);
patch_imm64(&mut c, buf, patlen_patch + 2, base + patlen_offset as u64);
patch_imm64(&mut c, buf, output_patch + 2, base + output_offset as u64);
let prot = unsafe { libc::mprotect(ptr, alloc_size, libc::PROT_READ | libc::PROT_EXEC) };
if prot != 0 {
unsafe {
libc::munmap(ptr, alloc_size);
}
return Err(Error::MemoryAllocation {
reason: format!("mprotect(RX) failed: {}", std::io::Error::last_os_error()),
});
}
Ok(ExecutableBuffer {
ptr: buf,
len: alloc_size,
table: None,
is_jit: true,
accept_pattern,
output_links: output_link,
})
}
#[cfg(target_arch = "x86_64")]
fn patch_rel32(code: &mut [u8], site: usize, target: usize) {
let rel = target as isize - (site + 4) as isize;
let rel = i32::try_from(rel).unwrap_or(0);
code[site..site + 4].copy_from_slice(&rel.to_le_bytes());
}
#[cfg(target_arch = "x86_64")]
fn patch_imm64(code: &mut [u8], buf: *mut u8, offset: usize, value: u64) {
let bytes = value.to_le_bytes();
code[offset..offset + 8].copy_from_slice(&bytes);
unsafe {
std::ptr::copy_nonoverlapping(bytes.as_ptr(), buf.add(offset), 8);
}
}
#[cfg(target_arch = "x86_64")]
fn compile_interpreted_fallback(
table: &TransitionTable,
output_links: &[u32],
) -> Result<ExecutableBuffer> {
const FALLBACK_CODE: [u8; 1] = [0xC3];
let page_size = 4096usize;
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
page_size,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_PRIVATE | libc::MAP_ANONYMOUS,
-1,
0,
)
};
if ptr == libc::MAP_FAILED {
return Err(Error::MemoryAllocation {
reason: format!("mmap failed: {}", std::io::Error::last_os_error()),
});
}
unsafe {
std::ptr::copy_nonoverlapping(
FALLBACK_CODE.as_ptr(),
ptr.cast::<u8>(),
FALLBACK_CODE.len(),
);
}
let prot = unsafe { libc::mprotect(ptr, page_size, libc::PROT_READ | libc::PROT_EXEC) };
if prot != 0 {
unsafe {
libc::munmap(ptr, page_size);
}
return Err(Error::MemoryAllocation {
reason: format!("mprotect failed: {}", std::io::Error::last_os_error()),
});
}
let mut accept_pattern = vec![0xFFFF_FFFF; table.state_count()];
for &(state, pid) in table.accept_states() {
if (state as usize) < accept_pattern.len() {
accept_pattern[state as usize] = pid;
}
}
let mut output_link = output_links.to_vec();
if output_link.len() < table.state_count() {
output_link.resize(table.state_count(), 0xFFFF_FFFF);
}
Ok(ExecutableBuffer {
ptr: ptr.cast::<u8>(),
len: page_size,
table: Some(table.clone()),
is_jit: false,
accept_pattern,
output_links: output_link,
})
}