#![allow(non_camel_case_types)]
use std::collections::HashMap;
use std::fmt;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ConversionDecision {
QuantQ4_0_Block32,
QuantQ4_K_Block32,
QuantQ4_K_Block256,
QuantQ6_K_Block256,
QuantQ8_0_Block32,
DtypeF16,
DtypeF32,
DtypeBF16,
LayoutRowMajor,
LayoutColMajor,
TransposeDims,
TransposeNone,
VocabMerge,
HeaderRewrite,
MetadataPreserve,
GgufMagicWrite,
SafeTensorsJsonWrite,
AprV2HeaderWrite,
TensorZeroPad,
TensorTruncate,
TensorReshape,
}
impl fmt::Display for ConversionDecision {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{self:?}")
}
}
#[derive(Debug, Clone, Default)]
pub struct DecisionStats {
pub passed: usize,
pub failed: usize,
}
impl DecisionStats {
#[must_use]
pub fn pass_rate(&self) -> f32 {
let total = self.passed + self.failed;
if total == 0 {
0.0
} else {
self.passed as f32 / total as f32
}
}
}
#[derive(Debug, Clone, Default)]
pub struct TarantulaTracker {
stats: HashMap<ConversionDecision, DecisionStats>,
total_passed: usize,
total_failed: usize,
}
impl TarantulaTracker {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn record_pass(&mut self, decisions: &[ConversionDecision]) {
self.total_passed += 1;
for &decision in decisions {
self.stats.entry(decision).or_default().passed += 1;
}
}
pub fn record_fail(&mut self, decisions: &[ConversionDecision]) {
self.total_failed += 1;
for &decision in decisions {
self.stats.entry(decision).or_default().failed += 1;
}
}
#[must_use]
pub fn suspiciousness(&self, decision: ConversionDecision) -> f32 {
if self.total_failed == 0 || self.total_passed == 0 {
return 0.0;
}
let stats = self
.stats
.get(&decision)
.map_or(DecisionStats::default(), std::clone::Clone::clone);
let failed_ratio = stats.failed as f32 / self.total_failed as f32;
let passed_ratio = stats.passed as f32 / self.total_passed as f32;
let denominator = failed_ratio + passed_ratio;
if denominator == 0.0 {
0.0
} else {
failed_ratio / denominator
}
}
#[must_use]
pub fn ranked_suspiciousness(&self) -> Vec<(ConversionDecision, f32)> {
let mut ranked: Vec<_> = self
.stats
.keys()
.map(|&d| (d, self.suspiciousness(d)))
.collect();
ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
ranked
}
#[must_use]
pub fn priority(&self, decision: ConversionDecision) -> Priority {
let stats = self.stats.get(&decision);
if stats.is_none() {
return Priority::Low;
}
let stats = stats.expect("checked above");
let total = stats.passed + stats.failed;
if total == 0 {
return Priority::Low;
}
let failure_rate = stats.failed as f32 / total as f32;
if failure_rate > 0.20 {
Priority::Critical
} else if failure_rate > 0.10 {
Priority::High
} else if failure_rate > 0.05 {
Priority::Medium
} else {
Priority::Low
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Priority {
Low,
Medium,
High,
Critical,
}
#[derive(Debug, Clone, Default)]
pub struct TensorFeatures {
pub mean: f32,
pub std: f32,
pub min: f32,
pub max: f32,
pub kurtosis: f32,
pub skewness: f32,
pub sparsity: f32,
pub l1_norm: f32,
pub l2_norm: f32,
pub inf_norm: f32,
pub nan_count: f32,
pub inf_count: f32,
}
impl TensorFeatures {
#[must_use]
pub fn from_data(data: &[f32]) -> Self {
if data.is_empty() {
return Self::default();
}
let n = data.len() as f32;
let mut sum = 0.0f64;
let mut min = f32::INFINITY;
let mut max = f32::NEG_INFINITY;
let mut zeros = 0usize;
let mut nans = 0usize;
let mut infs = 0usize;
let mut l1 = 0.0f64;
let mut l2 = 0.0f64;
let mut inf_norm = 0.0f32;
for &x in data {
if x.is_nan() {
nans += 1;
continue;
}
if x.is_infinite() {
infs += 1;
continue;
}
sum += f64::from(x);
if x < min {
min = x;
}
if x > max {
max = x;
}
if x == 0.0 {
zeros += 1;
}
let abs_x = x.abs();
l1 += f64::from(abs_x);
l2 += f64::from(abs_x * abs_x);
if abs_x > inf_norm {
inf_norm = abs_x;
}
}
let valid_n = n - nans as f32 - infs as f32;
let mean = if valid_n > 0.0 {
(sum / f64::from(valid_n)) as f32
} else {
0.0
};
let mut m2 = 0.0f64; let mut m3 = 0.0f64; let mut m4 = 0.0f64;
for &x in data {
if x.is_nan() || x.is_infinite() {
continue;
}
let diff = f64::from(x - mean);
m2 += diff * diff;
m3 += diff * diff * diff;
m4 += diff * diff * diff * diff;
}
let variance = if valid_n > 1.0 {
(m2 / f64::from(valid_n - 1.0)) as f32
} else {
0.0
};
let std = variance.sqrt();
let skewness = if std > 0.0 && valid_n > 0.0 {
let n64 = f64::from(valid_n);
((m3 / n64) / f64::from(std * std * std)) as f32
} else {
0.0
};
let kurtosis = if std > 0.0 && valid_n > 0.0 {
let n64 = f64::from(valid_n);
let var64 = f64::from(variance);
((m4 / n64) / (var64 * var64)) as f32 - 3.0 } else {
0.0
};
Self {
mean,
std,
min: if min.is_finite() { min } else { 0.0 },
max: if max.is_finite() { max } else { 0.0 },
kurtosis,
skewness,
sparsity: zeros as f32 / n,
l1_norm: l1 as f32,
l2_norm: (l2 as f32).sqrt(),
inf_norm,
nan_count: nans as f32,
inf_count: infs as f32,
}
}
#[must_use]
pub fn to_vec(&self) -> Vec<f32> {
vec![
self.mean,
self.std,
self.min,
self.max,
self.kurtosis,
self.skewness,
self.sparsity,
self.l1_norm,
self.l2_norm,
self.inf_norm,
self.nan_count,
self.inf_count,
]
}
#[must_use]
pub fn has_jidoka_violation(&self) -> Option<JidokaViolation> {
if self.nan_count > 0.0 {
return Some(JidokaViolation::NaN {
count: self.nan_count as usize,
});
}
if self.inf_count > 0.0 {
return Some(JidokaViolation::Inf {
count: self.inf_count as usize,
});
}
if self.std == 0.0 && self.mean != 0.0 {
return Some(JidokaViolation::ZeroVariance { mean: self.mean });
}
None
}
}
include!("jidoka.rs");
include!("error_pattern.rs");
include!("canary.rs");