use super::types::*;
use std::fmt;
const ABORT_SENTINEL: u32 = u32::MAX;
pub const MAX_RUNTIME_PROGRAMS: usize = 4_096;
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum VmError {
ProgramSignalCountMismatch {
signal_count_entries: usize,
program_count: usize,
},
UnsortedFileBoundaries,
BoundaryPastBuffer,
RuntimeProgramCountTooLarge {
max_programs: usize,
},
UnknownOpcode {
opcode: u32,
},
StackUnderflow,
ReductionStackUnderflow {
width: u32,
stack_len: usize,
},
FiredProgramLimitExceeded {
fired_count: usize,
max_fired: u32,
},
}
impl fmt::Display for VmError {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ProgramSignalCountMismatch {
signal_count_entries,
program_count,
} => write!(
formatter,
"Fix: make eval plan program_signal_counts match bytecode programs; got {signal_count_entries} signal-count entries for {program_count} programs."
),
Self::UnsortedFileBoundaries => write!(
formatter,
"Fix: sort file_boundaries in ascending order before evaluating the VM frame."
),
Self::BoundaryPastBuffer => write!(
formatter,
"Fix: keep every file_boundaries offset within the file byte buffer length."
),
Self::RuntimeProgramCountTooLarge { max_programs } => write!(
formatter,
"Fix: split the eval frame into bounded batches of at most {max_programs} programs."
),
Self::UnknownOpcode { opcode } => write!(
formatter,
"Fix: regenerate bytecode or update the VM opcode table; unknown opcode {opcode}."
),
Self::StackUnderflow => write!(
formatter,
"Fix: regenerate bytecode with enough stack operands before each VM instruction."
),
Self::ReductionStackUnderflow { width, stack_len } => write!(
formatter,
"Fix: regenerate bytecode so reduction width {width} has at least {width} stack values; stack had {stack_len}."
),
Self::FiredProgramLimitExceeded {
fired_count,
max_fired,
} => write!(
formatter,
"Fix: enforce the evaluation plan's bounded-output limit; fired {fired_count} programs, max_fired is {max_fired}."
),
}
}
}
impl std::error::Error for VmError {}
#[derive(Debug, Clone)]
pub(crate) struct RuntimeProgram {
pub counts: Vec<u32>,
pub positions: Vec<Vec<u32>>,
pub lengths: Vec<Vec<u32>>,
}
#[inline]
pub(crate) fn evaluate_programs(
programs: &[ConformProgram],
plan: ConformEvaluationPlan<'_>,
file_bytes: &[u8],
matches: &[ConformMatchEvent],
file_ctx: ConformFileContext,
) -> Result<Vec<bool>, String> {
evaluate_programs_typed(programs, plan, file_bytes, matches, file_ctx)
.map_err(|error| error.to_string())
}
#[inline]
pub(crate) fn evaluate_programs_typed(
programs: &[ConformProgram],
plan: ConformEvaluationPlan<'_>,
file_bytes: &[u8],
matches: &[ConformMatchEvent],
file_ctx: ConformFileContext,
) -> Result<Vec<bool>, VmError> {
if plan.program_signal_counts.len() != programs.len() {
return Err(VmError::ProgramSignalCountMismatch {
signal_count_entries: plan.program_signal_counts.len(),
program_count: programs.len(),
});
}
validate_boundaries(plan.file_boundaries, file_bytes.len())?;
let runtimes = build_runtime_programs(&plan, matches)?;
let file_ctx = enrich_file_context(file_ctx, &plan, matches);
let fired = programs
.iter()
.enumerate()
.map(|(program_id, program)| {
evaluate_program(
program,
&runtimes[program_id],
file_bytes,
plan.file_boundaries,
file_ctx,
)
})
.collect::<Result<Vec<_>, _>>()?;
let fired_count = fired.iter().filter(|&&did_fire| did_fire).count();
if fired_count > plan.max_fired as usize {
return Err(VmError::FiredProgramLimitExceeded {
fired_count,
max_fired: plan.max_fired,
});
}
Ok(fired)
}
fn validate_boundaries(boundaries: &[u32], file_len: usize) -> Result<(), VmError> {
if boundaries.windows(2).any(|pair| pair[0] > pair[1]) {
return Err(VmError::UnsortedFileBoundaries);
}
if boundaries.iter().any(|&offset| offset as usize > file_len) {
debug_diagnostic(format_args!(
"signature boundary mismatch: file_len={file_len}, boundaries={boundaries:?}"
));
return Err(VmError::BoundaryPastBuffer);
}
Ok(())
}
#[inline]
pub(crate) fn debug_diagnostic(args: std::fmt::Arguments<'_>) {
if std::env::var_os("VYRE_CONFORM_DEBUG_DIAGNOSTICS").as_deref()
== Some(std::ffi::OsStr::new("1"))
{
tracing::debug!("{args}");
}
}
#[inline]
pub(crate) fn build_runtime_programs(
plan: &ConformEvaluationPlan<'_>,
matches: &[ConformMatchEvent],
) -> Result<Vec<RuntimeProgram>, VmError> {
if plan.program_signal_counts.len() > MAX_RUNTIME_PROGRAMS {
return Err(VmError::RuntimeProgramCountTooLarge {
max_programs: MAX_RUNTIME_PROGRAMS,
});
}
let mut out: Vec<RuntimeProgram> = plan
.program_signal_counts
.iter()
.map(|&signal_count| RuntimeProgram {
counts: vec![0; signal_count],
positions: vec![Vec::new(); signal_count],
lengths: vec![Vec::new(); signal_count],
})
.collect();
for matched in matches {
if plan.sentinel_signal_ids.contains(&matched.signal_id) {
continue;
}
let Some([start, count]) = plan.signal_to_programs.get(matched.signal_id as usize) else {
continue;
};
for offset in *start..start.saturating_add(*count) {
let Some(&program_id) = plan.program_list.get(offset as usize) else {
continue;
};
let Some(&signal_id) = plan.signal_local_ids.get(offset as usize) else {
continue;
};
let Some(program) = out.get_mut(program_id as usize) else {
continue;
};
let Some(count_slot) = program.counts.get_mut(signal_id as usize) else {
continue;
};
*count_slot = count_slot.saturating_add(1);
if program.positions[signal_id as usize].len() < plan.max_cached_positions {
program.positions[signal_id as usize].push(matched.start);
program.lengths[signal_id as usize].push(matched.end.saturating_sub(matched.start));
}
}
}
Ok(out)
}
fn enrich_file_context(
mut file_ctx: ConformFileContext,
plan: &ConformEvaluationPlan<'_>,
matches: &[ConformMatchEvent],
) -> ConformFileContext {
let mut hit_counts = vec![0u32; plan.signal_to_programs.len()];
for matched in matches {
if plan.sentinel_signal_ids.contains(&matched.signal_id) {
continue;
}
if let Some(slot) = hit_counts.get_mut(matched.signal_id as usize) {
*slot = slot.saturating_add(1);
}
}
file_ctx.unique_pattern_count = hit_counts.iter().filter(|&&count| count != 0).count() as u32;
file_ctx.total_match_count = hit_counts.iter().copied().sum();
file_ctx
}
fn evaluate_program(
program: &ConformProgram,
runtime: &RuntimeProgram,
file_bytes: &[u8],
file_boundaries: &[u32],
file_ctx: ConformFileContext,
) -> Result<bool, VmError> {
let mut stack = Vec::with_capacity(32);
for instruction in &program.instructions {
let Some(kind) = instruction.kind() else {
return Err(VmError::UnknownOpcode {
opcode: instruction.opcode,
});
};
match kind {
ConformOpcode::PushTrue => stack.push(1),
ConformOpcode::PushFalse => stack.push(0),
ConformOpcode::PushImmediate => stack.push(instruction.operand),
ConformOpcode::PushFileSize => stack.push(file_ctx.file_size),
ConformOpcode::PushStringMatched => {
stack.push(u32::from(count(runtime, instruction.operand) != 0));
}
ConformOpcode::PushStringCount => stack.push(count(runtime, instruction.operand)),
ConformOpcode::PushStringOffset => {
let slot = pop(&mut stack)?;
stack.push(position(runtime, instruction.operand, slot));
}
ConformOpcode::PushStringLength => {
let slot = pop(&mut stack)?;
stack.push(length(runtime, instruction.operand, slot));
}
ConformOpcode::PushNumStrings | ConformOpcode::PushEntryCount => {
stack.push(runtime.counts.iter().filter(|&&c| c != 0).count() as u32);
}
ConformOpcode::UniquePatternCount => stack.push(file_ctx.unique_pattern_count),
ConformOpcode::TotalMatchCount => stack.push(file_ctx.total_match_count),
ConformOpcode::PushEntropy => stack.push(file_ctx.entropy_bucket),
ConformOpcode::PushIsPe => stack.push(file_ctx.is_pe),
ConformOpcode::PushIsDll => stack.push(file_ctx.is_dll),
ConformOpcode::PushIs64bit => stack.push(file_ctx.is_64bit),
ConformOpcode::PushHasSignature => stack.push(file_ctx.has_signature),
ConformOpcode::PushMagicU32 => stack.push(file_ctx.magic_u32),
ConformOpcode::PushNumSections => stack.push(file_ctx.num_sections),
ConformOpcode::PushNumImports => stack.push(file_ctx.num_imports),
ConformOpcode::PushEntryPoint => stack.push(file_ctx.entry_point_rva),
ConformOpcode::PushFileAge => stack.push(file_ctx.file_age_seconds),
ConformOpcode::And => binary_bool(&mut stack, |a, b| a != 0 && b != 0)?,
ConformOpcode::Or => binary_bool(&mut stack, |a, b| a != 0 || b != 0)?,
ConformOpcode::Not => {
let value = pop(&mut stack)?;
stack.push(u32::from(value == 0));
}
ConformOpcode::Eq => binary_bool(&mut stack, |a, b| a == b)?,
ConformOpcode::Neq => binary_bool(&mut stack, |a, b| a != b)?,
ConformOpcode::Lt => binary_bool(&mut stack, |a, b| a < b)?,
ConformOpcode::Gt => binary_bool(&mut stack, |a, b| a > b)?,
ConformOpcode::Lte => binary_bool(&mut stack, |a, b| a <= b)?,
ConformOpcode::Gte => binary_bool(&mut stack, |a, b| a >= b)?,
ConformOpcode::Add => binary_value(&mut stack, u32::wrapping_add)?,
ConformOpcode::Sub => binary_value(&mut stack, u32::wrapping_sub)?,
ConformOpcode::Mul => binary_value(&mut stack, u32::wrapping_mul)?,
ConformOpcode::Div => binary_value(&mut stack, |a, b| if b == 0 { 0 } else { a / b })?,
ConformOpcode::Mod => binary_value(&mut stack, |a, b| if b == 0 { 0 } else { a % b })?,
ConformOpcode::BitAnd => binary_value(&mut stack, |a, b| a & b)?,
ConformOpcode::BitOr => binary_value(&mut stack, |a, b| a | b)?,
ConformOpcode::BitXor => binary_value(&mut stack, |a, b| a ^ b)?,
ConformOpcode::Shl => binary_value(&mut stack, |a, b| a << (b & 31))?,
ConformOpcode::Shr => binary_value(&mut stack, |a, b| a >> (b & 31))?,
ConformOpcode::AnyOf => reduce(&mut stack, instruction.operand, |values| {
values.iter().any(|&v| v != 0)
})?,
ConformOpcode::AllOf => reduce(&mut stack, instruction.operand, |values| {
values.iter().all(|&v| v != 0)
})?,
ConformOpcode::CountOf => {
let needed = instruction.operand & 0xFFFF;
let width = instruction.operand >> 16;
reduce(&mut stack, width, |values| {
values.iter().filter(|&&v| v != 0).count() as u32 >= needed
})?;
}
ConformOpcode::StringAt => {
let wanted = pop(&mut stack)?;
stack.push(u32::from(
positions(runtime, instruction.operand).contains(&wanted),
));
}
ConformOpcode::StringIn => {
let hi = pop(&mut stack)?;
let lo = pop(&mut stack)?;
stack.push(u32::from(
positions(runtime, instruction.operand)
.iter()
.any(|&pos| pos >= lo && pos <= hi),
));
}
ConformOpcode::MatchOrder => {
let b = pop(&mut stack)?;
let a = pop(&mut stack)?;
let pa = first_position(runtime, a);
let pb = first_position(runtime, b);
stack.push(u32::from(
pa != ABORT_SENTINEL && pb != ABORT_SENTINEL && pa < pb,
));
}
ConformOpcode::MatchDistance => {
let b = pop(&mut stack)?;
let a = pop(&mut stack)?;
stack.push(match_distance(runtime, a, b));
}
ConformOpcode::MatchBetween => {
let c = pop(&mut stack)?;
let b = pop(&mut stack)?;
let a = pop(&mut stack)?;
let pa = first_position(runtime, a);
let pb = first_position(runtime, b);
let lo = pa.min(pb);
let hi = pa.max(pb);
stack.push(u32::from(
pa != ABORT_SENTINEL
&& pb != ABORT_SENTINEL
&& positions(runtime, c).iter().any(|&p| p >= lo && p <= hi),
));
}
ConformOpcode::SameFile | ConformOpcode::DifferentFile => {
let b = pop(&mut stack)?;
let a = pop(&mut stack)?;
let pa = first_position(runtime, a);
let pb = first_position(runtime, b);
let same = pa != ABORT_SENTINEL
&& pb != ABORT_SENTINEL
&& file_id_at(file_boundaries, pa) == file_id_at(file_boundaries, pb);
stack.push(u32::from(if kind == ConformOpcode::SameFile {
same
} else {
!same && pa != ABORT_SENTINEL && pb != ABORT_SENTINEL
}));
}
ConformOpcode::FileIdOf => {
let pattern = pop(&mut stack)?;
let pos = first_position(runtime, pattern);
stack.push(if pos == ABORT_SENTINEL {
ABORT_SENTINEL
} else {
file_id_at(file_boundaries, pos)
});
}
ConformOpcode::CrossFileChain => {
let c = pop(&mut stack)?;
let b = pop(&mut stack)?;
let a = pop(&mut stack)?;
let pa = first_position(runtime, a);
let pb = first_position(runtime, b);
let pc = first_position(runtime, c);
let fa = file_id_at(file_boundaries, pa);
stack.push(u32::from(
pa != ABORT_SENTINEL
&& pb != ABORT_SENTINEL
&& pc != ABORT_SENTINEL
&& fa == file_id_at(file_boundaries, pc)
&& fa != file_id_at(file_boundaries, pb),
));
}
ConformOpcode::ReadByteAt | ConformOpcode::ByteAt => {
let offset = pop(&mut stack)? as usize;
stack.push(file_bytes.get(offset).copied().unwrap_or(0) as u32);
}
ConformOpcode::Halt => return Ok(stack.last().copied().unwrap_or(0) != 0),
}
}
Ok(false)
}
fn pop(stack: &mut Vec<u32>) -> Result<u32, VmError> {
stack.pop().ok_or(VmError::StackUnderflow)
}
fn binary_bool(stack: &mut Vec<u32>, f: impl FnOnce(u32, u32) -> bool) -> Result<(), VmError> {
let b = pop(stack)?;
let a = pop(stack)?;
stack.push(u32::from(f(a, b)));
Ok(())
}
fn binary_value(stack: &mut Vec<u32>, f: impl FnOnce(u32, u32) -> u32) -> Result<(), VmError> {
let b = pop(stack)?;
let a = pop(stack)?;
stack.push(f(a, b));
Ok(())
}
fn reduce(stack: &mut Vec<u32>, width: u32, f: impl FnOnce(&[u32]) -> bool) -> Result<(), VmError> {
if width == 0 || stack.len() < width as usize {
return Err(VmError::ReductionStackUnderflow {
width,
stack_len: stack.len(),
});
}
let start = stack.len() - width as usize;
let ok = f(&stack[start..]);
stack.truncate(start);
stack.push(u32::from(ok));
Ok(())
}
fn count(runtime: &RuntimeProgram, signal: u32) -> u32 {
runtime.counts.get(signal as usize).copied().unwrap_or(0)
}
fn positions(runtime: &RuntimeProgram, signal: u32) -> &[u32] {
runtime
.positions
.get(signal as usize)
.map_or(&[], Vec::as_slice)
}
fn first_position(runtime: &RuntimeProgram, signal: u32) -> u32 {
positions(runtime, signal)
.first()
.copied()
.unwrap_or(ABORT_SENTINEL)
}
fn position(runtime: &RuntimeProgram, signal: u32, slot: u32) -> u32 {
positions(runtime, signal)
.get(slot as usize)
.copied()
.unwrap_or(ABORT_SENTINEL)
}
fn length(runtime: &RuntimeProgram, signal: u32, slot: u32) -> u32 {
runtime
.lengths
.get(signal as usize)
.and_then(|v| v.get(slot as usize))
.copied()
.unwrap_or(0)
}
fn match_distance(runtime: &RuntimeProgram, a: u32, b: u32) -> u32 {
let mut best = ABORT_SENTINEL;
let a_positions = positions(runtime, a);
let b_positions = positions(runtime, b);
for (i, &a_start) in a_positions.iter().enumerate() {
let a_end = a_start.saturating_add(length(runtime, a, i as u32));
for (j, &b_start) in b_positions.iter().enumerate() {
let b_end = b_start.saturating_add(length(runtime, b, j as u32));
let gap = if a_end < b_start {
b_start - a_end
} else {
a_start.saturating_sub(b_end)
};
best = best.min(gap);
}
}
if best == ABORT_SENTINEL {
0
} else {
best
}
}
fn file_id_at(boundaries: &[u32], offset: u32) -> u32 {
boundaries
.partition_point(|&start| start <= offset)
.saturating_sub(1) as u32
}