use super::{ModelConfig, ModelFixture, ModelFormat};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Device {
Cpu,
Cuda,
}
impl std::fmt::Display for Device {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Device::Cpu => write!(f, "CPU"),
Device::Cuda => write!(f, "CUDA"),
}
}
}
#[derive(Debug)]
pub struct ForwardResult {
pub logits: Vec<f32>,
pub format: ModelFormat,
pub device: Device,
pub tokens_processed: usize,
}
impl ForwardResult {
pub fn has_nan(&self) -> bool {
self.logits.iter().any(|x| x.is_nan())
}
pub fn has_inf(&self) -> bool {
self.logits.iter().any(|x| x.is_infinite())
}
pub fn argmax(&self) -> Option<usize> {
self.logits
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map(|(idx, _)| idx)
}
pub fn l2_distance(&self, other: &ForwardResult) -> f32 {
if self.logits.len() != other.logits.len() {
return f32::INFINITY;
}
self.logits
.iter()
.zip(other.logits.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f32>()
.sqrt()
}
pub fn relative_l2(&self, other: &ForwardResult) -> f32 {
let norm: f32 = other.logits.iter().map(|x| x.powi(2)).sum::<f32>().sqrt();
if norm < 1e-10 {
return f32::INFINITY;
}
self.l2_distance(other) / norm * 100.0
}
}
#[derive(Debug)]
pub struct FalsificationResult {
pub id: &'static str,
pub description: &'static str,
pub passed: bool,
pub details: String,
}
impl FalsificationResult {
pub fn pass(id: &'static str, desc: &'static str) -> Self {
Self {
id,
description: desc,
passed: true,
details: "Corroborated".to_string(),
}
}
pub fn fail(id: &'static str, desc: &'static str, details: String) -> Self {
Self {
id,
description: desc,
passed: false,
details,
}
}
}
pub fn falsify(result: &ForwardResult) -> Vec<FalsificationResult> {
let mut results = Vec::new();
results.push(if result.has_nan() {
FalsificationResult::fail(
"F012",
"Output contains NaN",
format!(
"FALSIFIED: {}:{} produced NaN logits",
result.format, result.device
),
)
} else {
FalsificationResult::pass("F012", "No NaN in output")
});
results.push(if result.has_inf() {
FalsificationResult::fail(
"F013",
"Output contains Inf",
format!(
"FALSIFIED: {}:{} produced Inf logits",
result.format, result.device
),
)
} else {
FalsificationResult::pass("F013", "No Inf in output")
});
results.push(if result.logits.is_empty() {
FalsificationResult::fail(
"F099",
"Output is empty",
format!(
"FALSIFIED: {}:{} produced empty logits",
result.format, result.device
),
)
} else {
FalsificationResult::pass("F099", "Output is non-empty")
});
results
}
pub fn falsify_parity(a: &ForwardResult, b: &ForwardResult) -> Vec<FalsificationResult> {
let mut results = Vec::new();
let argmax_a = a.argmax();
let argmax_b = b.argmax();
results.push(if argmax_a != argmax_b {
FalsificationResult::fail(
"F015",
"Argmax mismatch",
format!(
"FALSIFIED: {}:{} argmax={:?} vs {}:{} argmax={:?}",
a.format, a.device, argmax_a, b.format, b.device, argmax_b
),
)
} else {
FalsificationResult::pass("F015", "Argmax matches")
});
let l2_pct = a.relative_l2(b);
results.push(if l2_pct > 10.0 {
FalsificationResult::fail(
"F021",
"L2 distance > 10%",
format!(
"FALSIFIED: {}:{} vs {}:{} L2={:.2}%",
a.format, a.device, b.format, b.device, l2_pct
),
)
} else {
FalsificationResult::pass("F021", "L2 within tolerance")
});
results
}
use crate::error::Result;
pub fn forward_gguf_cpu(fixture: &ModelFixture, tokens: &[u32]) -> Result<ForwardResult> {
forward_gguf_cpu_path(fixture.path(), tokens)
}
pub fn forward_gguf_cpu_path(path: &std::path::Path, tokens: &[u32]) -> Result<ForwardResult> {
use crate::gguf::{MappedGGUFModel, OwnedQuantizedModel};
let mapped = MappedGGUFModel::from_path(path)?;
let model = OwnedQuantizedModel::from_mapped(&mapped)?;
let logits = model.forward(tokens)?;
Ok(ForwardResult {
logits,
format: ModelFormat::Gguf,
device: Device::Cpu,
tokens_processed: tokens.len(),
})
}
pub fn forward_apr_cpu(fixture: &ModelFixture, tokens: &[u32]) -> Result<ForwardResult> {
forward_apr_cpu_path(fixture.path(), tokens)
}
pub fn forward_apr_cpu_path(path: &std::path::Path, tokens: &[u32]) -> Result<ForwardResult> {
use crate::apr::AprV2Model;
let model = AprV2Model::load(path)?;
let logits = model.forward(tokens)?;
Ok(ForwardResult {
logits,
format: ModelFormat::Apr,
device: Device::Cpu,
tokens_processed: tokens.len(),
})
}
pub fn forward_safetensors_cpu(fixture: &ModelFixture, tokens: &[u32]) -> Result<ForwardResult> {
use crate::safetensors_infer::SafetensorsToAprConverter;
let transformer = SafetensorsToAprConverter::convert(fixture.path())?;
let logits = transformer.forward(tokens)?;
Ok(ForwardResult {
logits,
format: ModelFormat::SafeTensors,
device: Device::Cpu,
tokens_processed: tokens.len(),
})
}
#[cfg(feature = "cuda")]
pub fn forward_gguf_cuda(fixture: &ModelFixture, tokens: &[u32]) -> Result<ForwardResult> {
use crate::gguf::{MappedGGUFModel, OwnedQuantizedModel, OwnedQuantizedModelCuda};
let mapped = MappedGGUFModel::from_path(fixture.path())?;
let cpu_model = OwnedQuantizedModel::from_mapped(&mapped)?;
let mut cuda_model = OwnedQuantizedModelCuda::new(cpu_model, 0)?;
let logits = cuda_model.forward_cuda(tokens)?;
Ok(ForwardResult {
logits,
format: ModelFormat::Gguf,
device: Device::Cuda,
tokens_processed: tokens.len(),
})
}
#[cfg(feature = "cuda")]
pub fn forward_apr_cuda(fixture: &ModelFixture, tokens: &[u32]) -> Result<ForwardResult> {
use crate::apr::{AprV2Model, AprV2ModelCuda};
let apr_model = AprV2Model::load(fixture.path())?;
let mut cuda_model = AprV2ModelCuda::new(apr_model, 0)?;
let logits = cuda_model.forward_cuda(tokens)?;
Ok(ForwardResult {
logits,
format: ModelFormat::Apr,
device: Device::Cuda,
tokens_processed: tokens.len(),
})
}
#[cfg(feature = "cuda")]
pub fn forward_safetensors_cuda(_fixture: &ModelFixture, _tokens: &[u32]) -> Result<ForwardResult> {
Err(crate::error::RealizarError::UnsupportedOperation {
operation: "forward_safetensors_cuda".to_string(),
reason: "SafeTensors CUDA path not yet wired to public API".to_string(),
})
}
include!("falsification_tests_t005_safetensors.rs");