use crate::unified_trace::{SyscallSpan, UnifiedTrace};
#[derive(Debug, Clone, PartialEq)]
pub enum ValidationResult {
Pass {
confidence: f64,
matched_syscalls: usize,
performance: PerformanceComparison,
},
Fail {
divergence_point: DivergencePoint,
explanation: String,
},
}
#[derive(Debug, Clone, PartialEq)]
pub struct PerformanceComparison {
pub original_runtime_nanos: u64,
pub transpiled_runtime_nanos: u64,
pub speedup: f64,
pub memory_delta: Option<MemoryDelta>,
}
impl PerformanceComparison {
pub fn new(original_runtime_nanos: u64, transpiled_runtime_nanos: u64) -> Self {
let speedup = if transpiled_runtime_nanos > 0 {
original_runtime_nanos as f64 / transpiled_runtime_nanos as f64
} else {
1.0
};
PerformanceComparison {
original_runtime_nanos,
transpiled_runtime_nanos,
speedup,
memory_delta: None,
}
}
pub fn with_memory(mut self, original_bytes: usize, transpiled_bytes: usize) -> Self {
self.memory_delta = Some(MemoryDelta {
original_bytes,
transpiled_bytes,
reduction_percentage: if original_bytes > 0 {
((original_bytes as f64 - transpiled_bytes as f64) / original_bytes as f64) * 100.0
} else {
0.0
},
});
self
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct MemoryDelta {
pub original_bytes: usize,
pub transpiled_bytes: usize,
pub reduction_percentage: f64,
}
#[derive(Debug, Clone, PartialEq)]
pub struct DivergencePoint {
pub syscall_index: usize,
pub original_syscall: String,
pub transpiled_syscall: String,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ObservableSyscall {
pub name: String,
pub args: Vec<String>,
pub return_value: i64,
}
impl ObservableSyscall {
pub fn from_syscall_span(span: &SyscallSpan) -> Self {
ObservableSyscall {
name: span.name.to_string(),
args: span.args.iter().map(|(_, v)| v.clone()).collect(),
return_value: span.return_value,
}
}
pub fn is_equivalent(&self, other: &ObservableSyscall) -> bool {
if self.name != other.name {
return false;
}
let self_success = self.return_value >= 0;
let other_success = other.return_value >= 0;
if self_success != other_success {
return false;
}
true
}
}
pub struct SemanticValidator {
tolerance: f64,
}
impl SemanticValidator {
pub fn new() -> Self {
SemanticValidator { tolerance: 0.05 }
}
pub fn with_tolerance(tolerance: f64) -> Self {
SemanticValidator { tolerance: tolerance.clamp(0.0, 1.0) }
}
pub fn tolerance(&self) -> f64 {
self.tolerance
}
pub fn validate(&self, original: &UnifiedTrace, transpiled: &UnifiedTrace) -> ValidationResult {
let obs_original = self.filter_observable_syscalls(original);
let obs_transpiled = self.filter_observable_syscalls(transpiled);
let diff = self.diff_with_tolerance(&obs_original, &obs_transpiled);
if diff.is_equivalent {
let performance = self.calculate_performance(original, transpiled);
ValidationResult::Pass {
confidence: diff.similarity_score,
matched_syscalls: diff.matched_count,
performance,
}
} else {
ValidationResult::Fail {
divergence_point: diff.divergence_point.unwrap_or_else(|| DivergencePoint {
syscall_index: 0,
original_syscall: "unknown".to_string(),
transpiled_syscall: "unknown".to_string(),
}),
explanation: diff.explanation,
}
}
}
fn filter_observable_syscalls(&self, trace: &UnifiedTrace) -> Vec<ObservableSyscall> {
let observable_syscalls = [
"open",
"openat",
"read",
"write",
"close",
"stat",
"fstat",
"lstat",
"lseek",
"pread",
"pwrite",
"readv",
"writev",
"fsync",
"fdatasync",
"rename",
"unlink",
"mkdir",
"rmdir",
"chmod",
"chown",
"truncate",
"ftruncate",
"socket",
"connect",
"bind",
"listen",
"accept",
"send",
"recv",
"sendto",
"recvfrom",
"sendmsg",
"recvmsg",
"shutdown",
"fork",
"vfork",
"clone",
"exec",
"execve",
"wait",
"waitpid",
"exit",
"kill",
"pipe",
"pipe2",
"dup",
"dup2",
"dup3",
];
trace
.syscall_spans
.iter()
.filter(|span| {
let name: &str = &span.name;
observable_syscalls.contains(&name)
})
.map(ObservableSyscall::from_syscall_span)
.collect()
}
fn diff_with_tolerance(
&self,
original: &[ObservableSyscall],
transpiled: &[ObservableSyscall],
) -> TraceDiff {
let orig_len = original.len();
let trans_len = transpiled.len();
let max_len = orig_len.max(trans_len) as f64;
let length_diff = (orig_len as f64 - trans_len as f64).abs();
let length_similarity = if max_len > 0.0 { 1.0 - (length_diff / max_len) } else { 1.0 };
if length_similarity < (1.0 - self.tolerance) {
return TraceDiff {
is_equivalent: false,
similarity_score: length_similarity,
matched_count: 0,
divergence_point: Some(DivergencePoint {
syscall_index: orig_len.min(trans_len),
original_syscall: format!("<end of trace, {orig_len} syscalls>"),
transpiled_syscall: format!("<end of trace, {trans_len} syscalls>"),
}),
explanation: format!(
"Length mismatch: original={orig_len}, transpiled={trans_len} (diff={length_diff})"
),
};
}
let mut matched = 0;
let mut divergence_point = None;
for (i, (orig, trans)) in original.iter().zip(transpiled.iter()).enumerate() {
if orig.is_equivalent(trans) {
matched += 1;
} else if divergence_point.is_none() {
divergence_point = Some(DivergencePoint {
syscall_index: i,
original_syscall: format!("{} -> {}", orig.name, orig.return_value),
transpiled_syscall: format!("{} -> {}", trans.name, trans.return_value),
});
}
}
let min_len = orig_len.min(trans_len);
let match_rate = if min_len > 0 { matched as f64 / min_len as f64 } else { 1.0 };
let is_equivalent = match_rate >= (1.0 - self.tolerance);
let explanation = if is_equivalent {
format!(
"Traces are equivalent: {}/{} syscalls matched ({:.1}%)",
matched,
min_len,
match_rate * 100.0
)
} else {
format!(
"Traces diverged: only {}/{} syscalls matched ({:.1}%)",
matched,
min_len,
match_rate * 100.0
)
};
TraceDiff {
is_equivalent,
similarity_score: match_rate,
matched_count: matched,
divergence_point,
explanation,
}
}
fn calculate_performance(
&self,
original: &UnifiedTrace,
transpiled: &UnifiedTrace,
) -> PerformanceComparison {
let orig_runtime: u64 = original.syscall_spans.iter().map(|s| s.duration_nanos).sum();
let trans_runtime: u64 = transpiled.syscall_spans.iter().map(|s| s.duration_nanos).sum();
PerformanceComparison::new(orig_runtime, trans_runtime)
}
}
impl Default for SemanticValidator {
fn default() -> Self {
Self::new()
}
}
struct TraceDiff {
is_equivalent: bool,
similarity_score: f64,
matched_count: usize,
divergence_point: Option<DivergencePoint>,
explanation: String,
}
static_assertions::assert_impl_all!(ValidationResult: Send, Sync);
static_assertions::assert_impl_all!(PerformanceComparison: Send, Sync);
static_assertions::assert_impl_all!(MemoryDelta: Send, Sync);
static_assertions::assert_impl_all!(DivergencePoint: Send, Sync);
static_assertions::assert_impl_all!(ObservableSyscall: Send, Sync);
#[cfg(test)]
mod tests {
use super::*;
use crate::trace_context::LamportClock;
use std::borrow::Cow;
fn create_test_trace(pid: i32, name: &str) -> UnifiedTrace {
UnifiedTrace::new(pid, name.to_string())
}
fn add_observable_syscall(trace: &mut UnifiedTrace, name: &'static str, return_value: i64) {
let parent_id = trace.process_span.span_id;
let syscall = SyscallSpan::new(
parent_id,
Cow::Borrowed(name),
vec![],
return_value,
trace.clock.now(),
1000,
None,
&trace.clock,
);
trace.add_syscall(syscall);
}
#[test]
fn test_validator_default() {
let validator = SemanticValidator::new();
assert!((validator.tolerance() - 0.05).abs() < 1e-6);
}
#[test]
fn test_validator_with_tolerance() {
let validator = SemanticValidator::with_tolerance(0.1);
assert!((validator.tolerance() - 0.1).abs() < 1e-6);
}
#[test]
fn test_tolerance_clamp_high() {
let validator = SemanticValidator::with_tolerance(1.5);
assert!((validator.tolerance() - 1.0).abs() < 1e-6);
}
#[test]
fn test_tolerance_clamp_low() {
let validator = SemanticValidator::with_tolerance(-0.5);
assert!((validator.tolerance() - 0.0).abs() < 1e-6);
}
#[test]
fn test_observable_syscall_equality() {
let syscall1 = ObservableSyscall {
name: "read".to_string(),
args: vec!["3".to_string(), "buf".to_string(), "100".to_string()],
return_value: 100,
};
let syscall2 = ObservableSyscall {
name: "read".to_string(),
args: vec!["4".to_string(), "buf".to_string(), "100".to_string()],
return_value: 150, };
assert!(syscall1.is_equivalent(&syscall2));
}
#[test]
fn test_observable_syscall_different_name() {
let syscall1 =
ObservableSyscall { name: "read".to_string(), args: vec![], return_value: 100 };
let syscall2 =
ObservableSyscall { name: "write".to_string(), args: vec![], return_value: 100 };
assert!(!syscall1.is_equivalent(&syscall2));
}
#[test]
fn test_observable_syscall_success_vs_failure() {
let syscall1 = ObservableSyscall {
name: "open".to_string(),
args: vec![],
return_value: 3, };
let syscall2 = ObservableSyscall {
name: "open".to_string(),
args: vec![],
return_value: -1, };
assert!(!syscall1.is_equivalent(&syscall2));
}
#[test]
fn test_validate_identical() {
let mut trace1 = create_test_trace(1000, "test1");
let mut trace2 = create_test_trace(2000, "test2");
add_observable_syscall(&mut trace1, "open", 3);
add_observable_syscall(&mut trace1, "read", 100);
add_observable_syscall(&mut trace1, "close", 0);
add_observable_syscall(&mut trace2, "open", 4); add_observable_syscall(&mut trace2, "read", 100);
add_observable_syscall(&mut trace2, "close", 0);
trace1.end_process(0);
trace2.end_process(0);
let validator = SemanticValidator::new();
let result = validator.validate(&trace1, &trace2);
match result {
ValidationResult::Pass { confidence, matched_syscalls, .. } => {
assert!(confidence >= 0.95);
assert_eq!(matched_syscalls, 3);
}
ValidationResult::Fail { .. } => panic!("Expected Pass"),
}
}
#[test]
fn test_validate_divergent() {
let mut trace1 = create_test_trace(1000, "test1");
let mut trace2 = create_test_trace(2000, "test2");
add_observable_syscall(&mut trace1, "open", 3);
add_observable_syscall(&mut trace1, "read", 100);
add_observable_syscall(&mut trace2, "open", 4);
add_observable_syscall(&mut trace2, "write", 100);
let validator = SemanticValidator::new();
let result = validator.validate(&trace1, &trace2);
match result {
ValidationResult::Pass { .. } => panic!("Expected Fail"),
ValidationResult::Fail { divergence_point, .. } => {
assert_eq!(divergence_point.syscall_index, 1);
}
}
}
#[test]
fn test_filter_observable() {
let mut trace = create_test_trace(1000, "test");
add_observable_syscall(&mut trace, "open", 3);
add_observable_syscall(&mut trace, "read", 100);
add_observable_syscall(&mut trace, "mmap", 0x1000);
add_observable_syscall(&mut trace, "futex", 0);
add_observable_syscall(&mut trace, "close", 0);
let validator = SemanticValidator::new();
let observable = validator.filter_observable_syscalls(&trace);
assert_eq!(observable.len(), 3);
assert_eq!(observable[0].name, "open");
assert_eq!(observable[1].name, "read");
assert_eq!(observable[2].name, "close");
}
#[test]
fn test_performance_comparison() {
let perf = PerformanceComparison::new(1000000, 500000);
assert_eq!(perf.original_runtime_nanos, 1000000);
assert_eq!(perf.transpiled_runtime_nanos, 500000);
assert!((perf.speedup - 2.0).abs() < 1e-6);
assert!(perf.memory_delta.is_none());
}
#[test]
fn test_performance_with_memory() {
let perf = PerformanceComparison::new(1000000, 500000).with_memory(1000000, 600000);
assert!(perf.memory_delta.is_some());
let mem = perf.memory_delta.expect("test");
assert_eq!(mem.original_bytes, 1000000);
assert_eq!(mem.transpiled_bytes, 600000);
assert!((mem.reduction_percentage - 40.0).abs() < 0.1);
}
#[test]
fn test_default_trait() {
let validator: SemanticValidator = Default::default();
assert!((validator.tolerance() - 0.05).abs() < 1e-6);
}
#[test]
fn test_validate_length_within_tolerance() {
let mut trace1 = create_test_trace(1000, "test1");
let mut trace2 = create_test_trace(2000, "test2");
for _ in 0..20 {
add_observable_syscall(&mut trace1, "read", 100);
}
for _ in 0..21 {
add_observable_syscall(&mut trace2, "read", 100);
}
trace1.end_process(0);
trace2.end_process(0);
let validator = SemanticValidator::new();
let result = validator.validate(&trace1, &trace2);
match result {
ValidationResult::Pass { .. } => {} ValidationResult::Fail { explanation, .. } => {
panic!("Expected Pass, got Fail: {}", explanation)
}
}
}
#[test]
fn test_validate_length_beyond_tolerance() {
let mut trace1 = create_test_trace(1000, "test1");
let mut trace2 = create_test_trace(2000, "test2");
for _ in 0..10 {
add_observable_syscall(&mut trace1, "read", 100);
}
for _ in 0..20 {
add_observable_syscall(&mut trace2, "read", 100);
}
let validator = SemanticValidator::new();
let result = validator.validate(&trace1, &trace2);
match result {
ValidationResult::Pass { .. } => panic!("Expected Fail"),
ValidationResult::Fail { .. } => {} }
}
#[test]
fn test_empty_traces_equivalent() {
let mut trace1 = create_test_trace(1000, "test1");
let mut trace2 = create_test_trace(2000, "test2");
trace1.end_process(0);
trace2.end_process(0);
let validator = SemanticValidator::new();
let result = validator.validate(&trace1, &trace2);
match result {
ValidationResult::Pass { confidence, .. } => {
assert!((confidence - 1.0).abs() < 1e-6);
}
ValidationResult::Fail { .. } => panic!("Expected Pass"),
}
}
#[test]
fn test_from_syscall_span() {
let clock = LamportClock::new();
let span = SyscallSpan::new(
1,
Cow::Borrowed("open"),
vec![
(Cow::Borrowed("path"), "/tmp/test.txt".to_string()),
(Cow::Borrowed("flags"), "O_RDONLY".to_string()),
],
3,
clock.now(),
1000,
None,
&clock,
);
let obs = ObservableSyscall::from_syscall_span(&span);
assert_eq!(obs.name, "open");
assert_eq!(obs.args.len(), 2);
assert_eq!(obs.return_value, 3);
}
#[test]
fn test_divergence_point_details() {
let mut trace1 = create_test_trace(1000, "test1");
let mut trace2 = create_test_trace(2000, "test2");
add_observable_syscall(&mut trace1, "open", 3);
add_observable_syscall(&mut trace1, "read", 100);
add_observable_syscall(&mut trace1, "write", 50);
add_observable_syscall(&mut trace2, "open", 4);
add_observable_syscall(&mut trace2, "read", 100);
add_observable_syscall(&mut trace2, "close", 0);
let validator = SemanticValidator::new();
let result = validator.validate(&trace1, &trace2);
match result {
ValidationResult::Pass { .. } => panic!("Expected Fail"),
ValidationResult::Fail { divergence_point, .. } => {
assert_eq!(divergence_point.syscall_index, 2);
assert!(divergence_point.original_syscall.contains("write"));
assert!(divergence_point.transpiled_syscall.contains("close"));
}
}
}
#[test]
fn test_high_tolerance() {
let mut trace1 = create_test_trace(1000, "test1");
let mut trace2 = create_test_trace(2000, "test2");
for _ in 0..10 {
add_observable_syscall(&mut trace1, "read", 100);
}
for _ in 0..8 {
add_observable_syscall(&mut trace2, "read", 100);
}
add_observable_syscall(&mut trace2, "write", 100);
add_observable_syscall(&mut trace2, "write", 100);
let validator1 = SemanticValidator::with_tolerance(0.05);
let result1 = validator1.validate(&trace1, &trace2);
assert!(matches!(result1, ValidationResult::Fail { .. }));
let validator2 = SemanticValidator::with_tolerance(0.25);
let result2 = validator2.validate(&trace1, &trace2);
assert!(matches!(result2, ValidationResult::Pass { .. }));
}
#[test]
fn test_memory_delta() {
let mem =
MemoryDelta { original_bytes: 1000, transpiled_bytes: 800, reduction_percentage: 20.0 };
assert_eq!(mem.original_bytes, 1000);
assert_eq!(mem.transpiled_bytes, 800);
assert!((mem.reduction_percentage - 20.0).abs() < 1e-6);
}
}