impl ConvertReport {
fn build(
original_size: usize,
output_path: &Path,
tensor_count: usize,
quantization: Option<QuantizationType>,
compression: Option<Compression>,
) -> Self {
let converted_size = fs::metadata(output_path)
.map(|m| m.len() as usize)
.unwrap_or(0);
let reduction_ratio = if converted_size > 0 {
original_size as f64 / converted_size as f64
} else {
0.0
};
Self {
original_size,
converted_size,
tensor_count,
quantization,
compression,
reduction_ratio,
}
}
#[must_use]
pub fn reduction_percent(&self) -> String {
if self.original_size > 0 && self.converted_size > 0 {
let reduction = 100.0 * (1.0 - self.converted_size as f64 / self.original_size as f64);
format!("{:.1}%", reduction)
} else {
"N/A".to_string()
}
}
}
fn detect_format(path: &Path) -> Result<crate::format::rosetta::FormatType> {
use crate::format::rosetta::FormatType;
FormatType::from_magic(path).or_else(|_| FormatType::from_extension(path))
}
pub fn load_model_tensors(path: &Path) -> Result<BTreeMap<String, (Vec<f32>, Vec<usize>)>> {
use crate::format::rosetta::FormatType;
let format = detect_format(path)?;
match format {
FormatType::SafeTensors => load_safetensors_tensors(path),
FormatType::Apr => load_apr_tensors_f32(path),
FormatType::Gguf => load_gguf_tensors_f32(path),
}
}
pub(crate) fn load_model_tensors_provenance(path: &Path) -> Result<TensorProvenance> {
use crate::format::rosetta::FormatType;
let format = detect_format(path)?;
match format {
FormatType::SafeTensors => {
let tensors = load_safetensors_tensors(path)?;
Ok(TensorProvenance::Native(NativeF32Tensors::new(tensors)))
}
FormatType::Apr => {
if let Some(quant) = export::detect_apr_quantization(path) {
let tensors = load_apr_tensors_f32(path)?;
Ok(TensorProvenance::Dequantized(DequantizedTensors::new(
tensors, quant,
)))
} else {
let tensors = load_apr_tensors_f32(path)?;
Ok(TensorProvenance::Native(NativeF32Tensors::new(tensors)))
}
}
FormatType::Gguf => {
let tensors = load_gguf_tensors_f32(path)?;
Ok(TensorProvenance::Dequantized(DequantizedTensors::new(
tensors,
QuantizationType::Q4K,
)))
}
}
}
fn load_gguf_tensors_f32(path: &Path) -> Result<BTreeMap<String, (Vec<f32>, Vec<usize>)>> {
let reader = GgufReader::from_file(path)?;
use std::io::{IsTerminal, Write};
let is_tty = std::io::stderr().is_terminal();
let tensors = reader.get_all_tensors_f32_with_progress(|current, total, name| {
if is_tty {
eprint!("\r [{current}/{total}] Dequantizing {name}");
let _ = std::io::stderr().flush();
}
})?;
if is_tty {
eprint!("\r{}\r", " ".repeat(72));
let _ = std::io::stderr().flush();
}
for (name, (data, _shape)) in &tensors {
validate_tensor_values(name, data)?;
}
use crate::format::layout_contract::enforce_import_contract;
let tensors = tensors
.into_iter()
.map(|(name, (data, shape))| {
let (apr_shape, needs_data_transpose) = enforce_import_contract(&name, &shape, 0, 0);
assert!(
!needs_data_transpose,
"CONTRACT BUG: enforce_import_contract returned needs_data_transpose=true for '{}'. \
GGUF→APR NEVER needs data transpose.",
name
);
(name, (data, apr_shape))
})
.collect();
Ok(tensors)
}
fn load_apr_tensors_f32(path: &Path) -> Result<BTreeMap<String, (Vec<f32>, Vec<usize>)>> {
use crate::format::v2::AprV2Reader;
let data = fs::read(path).map_err(|e| AprenderError::FormatError {
message: format!("Failed to read APR file: {e}"),
})?;
let reader = AprV2Reader::from_bytes(&data).map_err(|e| AprenderError::FormatError {
message: format!("Failed to parse APR file: {e:?}"),
})?;
let mut tensors = BTreeMap::new();
for name in reader.tensor_names() {
let entry = reader
.get_tensor(name)
.ok_or_else(|| AprenderError::FormatError {
message: format!("Tensor '{}' missing from index", name),
})?;
let shape = entry.shape.clone();
let f32_data =
reader
.get_tensor_as_f32(name)
.ok_or_else(|| AprenderError::FormatError {
message: format!("Failed to dequantize tensor '{}'", name),
})?;
validate_tensor_values(name, &f32_data)?;
tensors.insert(name.to_string(), (f32_data, shape));
}
Ok(tensors)
}
pub(crate) fn validate_tensor_values(name: &str, data: &[f32]) -> Result<()> {
if data.is_empty() {
return Ok(());
}
let mut nan_count = 0;
let mut inf_count = 0;
let mut sum: f64 = 0.0;
for &value in data {
if value.is_nan() {
nan_count += 1;
} else if value.is_infinite() {
inf_count += 1;
} else {
sum += value as f64;
}
}
if nan_count > 0 {
return Err(AprenderError::FormatError {
message: format!(
"PMAT-187: Tensor '{}' contains {} NaN values (data corruption detected). \
Toyota Way: Stop the line - do not pass defects downstream.",
name, nan_count
),
});
}
if inf_count > 0 {
return Err(AprenderError::FormatError {
message: format!(
"PMAT-187: Tensor '{}' contains {} Inf values (numerical overflow detected). \
Toyota Way: Stop the line - do not pass defects downstream.",
name, inf_count
),
});
}
let valid_count = data.len() - nan_count - inf_count;
if valid_count > 0 {
let mean = sum / valid_count as f64;
if mean.abs() > 100.0 {
return Err(AprenderError::FormatError {
message: format!(
"PMAT-187: Tensor '{}' has explosive mean={:.2e} (expected [-100, 100]). \
This indicates corrupted quantization scale factors. \
Toyota Way: Stop the line - do not pass defects downstream.",
name, mean
),
});
}
}
Ok(())
}
#[cfg(test)]
fn dequantize_f16_to_f32(bytes: &[u8], _num_elements: usize) -> Vec<f32> {
bytes
.chunks_exact(2)
.map(|c| {
let bits = u16::from_le_bytes([c[0], c[1]]);
f16_to_f32(bits)
})
.collect()
}
#[cfg(test)]
fn dequantize_bf16_to_f32(bytes: &[u8], _num_elements: usize) -> Vec<f32> {
bytes
.chunks_exact(2)
.map(|c| {
let bits = u16::from_le_bytes([c[0], c[1]]);
f32::from_bits((bits as u32) << 16)
})
.collect()
}
#[cfg(test)]
fn dequantize_q8_0_to_f32(bytes: &[u8], num_elements: usize) -> Vec<f32> {
const BLOCK_SIZE: usize = 32;
const BLOCK_BYTES: usize = 2 + 32;
let num_blocks = (num_elements + BLOCK_SIZE - 1) / BLOCK_SIZE;
let mut result = Vec::with_capacity(num_elements);
for i in 0..num_blocks {
let block_start = i * BLOCK_BYTES;
if block_start + BLOCK_BYTES > bytes.len() {
break;
}
let scale_bits = u16::from_le_bytes([bytes[block_start], bytes[block_start + 1]]);
let scale_raw = f16_to_f32(scale_bits);
let scale =
if scale_raw.is_nan() || scale_raw.is_infinite() || scale_raw.abs() < F16_MIN_NORMAL {
0.0
} else {
scale_raw
};
for j in 0..BLOCK_SIZE {
if result.len() >= num_elements {
break;
}
let q = bytes[block_start + 2 + j] as i8;
result.push(q as f32 * scale);
}
}
result
}
pub(crate) fn calculate_tensor_size(tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>) -> usize {
tensors.values().map(|(data, _)| data.len() * 4).sum()
}
pub(crate) fn quantize_tensors(
tensors: &NativeF32Tensors,
quant_type: &QuantizationType,
) -> Result<NativeF32Tensors> {
contract_pre_quantization_bounds!();
let mut result = BTreeMap::new();
for (name, (data, shape)) in tensors.as_ref() {
let is_embedding = name.contains("embed_tokens")
|| name.contains("token_embd")
|| name.contains("wte") || name.contains("word_embeddings");
let is_lm_head = name.contains("lm_head") || name == "output.weight";
let quantized_data = if is_embedding || is_lm_head {
data.clone()
} else {
match quant_type {
QuantizationType::Fp16 => quantize_fp16(data),
QuantizationType::Int8 => quantize_int8(data),
QuantizationType::Int4 => quantize_int4(data),
QuantizationType::Q4K => {
let q4k_bytes = quantize_q4_k(data);
dequantize_q4_k_to_f32(&q4k_bytes, data.len())
}
}
};
result.insert(name.clone(), (quantized_data, shape.clone()));
}
let result = NativeF32Tensors::new(result);
contract_post_quantization_bounds!(&result);
Ok(result)
}
fn quantize_fp16(data: &[f32]) -> Vec<f32> {
data.iter()
.map(|&v| {
let f16_bits = f32_to_f16(v);
f16_to_f32(f16_bits)
})
.collect()
}
fn f32_normal_to_f16(sign: u16, new_exp: i32, mantissa: u32) -> u16 {
if new_exp >= 31 {
return sign | 0x7C00; }
if new_exp > 0 {
return sign | ((new_exp as u16) << 10) | ((mantissa >> 13) as u16);
}
let full_mantissa = mantissa | 0x800000;
let shift = 14 - new_exp;
if shift > 24 {
return sign; }
let round_bit = 1u32 << (shift - 1);
let subnormal = (full_mantissa.saturating_add(round_bit) >> shift) as u16;
sign | (subnormal & 0x3FF)
}
fn f32_to_f16(value: f32) -> u16 {
let bits = value.to_bits();
let sign = ((bits >> 16) & 0x8000) as u16;
let exp = (bits >> 23) & 0xFF;
let mantissa = bits & 0x7FFFFF;
match exp {
0 => sign, 0xFF => sign | 0x7C00 | if mantissa != 0 { 0x0200 } else { 0 }, _ => f32_normal_to_f16(sign, exp as i32 - 127 + 15, mantissa), }
}