pub mod gguf_mobile;
pub mod int4;
pub use gguf_mobile::{
GgufHeader, GgufLayerInfo, GgufMobileConfig, GgufMobileLoader, GgufQuantType, GgufReader,
};
pub use int4::{
pack_int4, unpack_int4, Int4Config, Int4Gemv, Int4QuantConfig, Int4Tensor, MobileQuantError,
QuantizationMetrics,
};
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_mod_pack_unpack_roundtrip_full_range() {
let values: Vec<i8> = (-7..=7).collect();
let packed = pack_int4(&values);
let unpacked = unpack_int4(&packed, values.len());
assert_eq!(unpacked, values, "full [-7,7] roundtrip failed");
}
#[test]
fn test_mod_pack_int4_zero_values() {
let values = vec![0i8, 0, 0, 0];
let packed = pack_int4(&values);
let unpacked = unpack_int4(&packed, 4);
assert_eq!(unpacked, values);
}
#[test]
fn test_mod_pack_int4_single_value() {
let values = vec![3i8];
let packed = pack_int4(&values);
let unpacked = unpack_int4(&packed, 1);
assert_eq!(unpacked[0], 3i8);
}
#[test]
fn test_mod_unpack_count_shorter_than_packed() {
let values: Vec<i8> = vec![-1, 2, -3, 4];
let packed = pack_int4(&values);
let short = unpack_int4(&packed, 2);
assert_eq!(short.len(), 2);
assert_eq!(short[0], -1);
assert_eq!(short[1], 2);
}
#[test]
fn test_mod_int4_config_default_group_size() {
let cfg = Int4Config::default();
assert_eq!(cfg.group_size, 128);
}
#[test]
fn test_mod_int4_config_symmetric_default() {
let cfg = Int4Config::default();
assert!(cfg.symmetric);
assert!(!cfg.zero_point);
}
#[test]
fn test_mod_int4_config_per_channel_off_default() {
let cfg = Int4Config::default();
assert!(!cfg.per_channel);
}
#[test]
fn test_mod_int4_config_custom() {
let cfg = Int4Config {
group_size: 64,
zero_point: true,
symmetric: false,
per_channel: true,
};
assert_eq!(cfg.group_size, 64);
assert!(cfg.zero_point);
assert!(!cfg.symmetric);
assert!(cfg.per_channel);
}
#[test]
fn test_mod_mobile_quant_error_empty_input() {
let result = Int4Tensor::from_config(&[], &Int4Config::default());
assert!(matches!(result, Err(MobileQuantError::EmptyInput)));
}
#[test]
fn test_mod_mobile_quant_error_group_size_zero() {
let cfg = Int4Config { group_size: 0, ..Default::default() };
let result = Int4Tensor::from_config(&[1.0, 2.0], &cfg);
assert!(matches!(result, Err(MobileQuantError::InvalidGroupSize(0))));
}
#[test]
fn test_mod_mobile_quant_error_shape_mismatch_message() {
let e = MobileQuantError::ShapeMismatch { expected: 10, got: 5 };
let msg = e.to_string();
assert!(msg.contains("10") && msg.contains('5'));
}
#[test]
fn test_mod_gemv_output_shape_2x4() {
let rows: usize = 2;
let cols: usize = 4;
let packed = vec![0x88u8; rows * cols.div_ceil(2)]; let scales = vec![1.0f32; rows];
let input = vec![1.0f32; cols];
let out = Int4Gemv::compute(&packed, &scales, &input, rows, cols);
assert_eq!(out.len(), rows);
}
#[test]
fn test_mod_gemv_zero_input_produces_zero() {
let rows: usize = 3;
let cols: usize = 6;
let packed = vec![0xAAu8; rows * cols.div_ceil(2)]; let scales = vec![1.0f32; rows];
let input = vec![0.0f32; cols];
let out = Int4Gemv::compute(&packed, &scales, &input, rows, cols);
for &v in &out {
assert!(v.abs() < 1e-6, "with zero input, output should be zero, got {v}");
}
}
#[test]
fn test_mod_gemv_scale_zero_produces_zero() {
let rows: usize = 2;
let cols: usize = 4;
let packed = vec![0xAAu8; rows * cols.div_ceil(2)];
let scales = vec![0.0f32; rows];
let input = vec![1.0f32; cols];
let out = Int4Gemv::compute(&packed, &scales, &input, rows, cols);
for &v in &out {
assert!(v.abs() < 1e-6, "with scale=0, output should be zero, got {v}");
}
}
#[test]
fn test_mod_gguf_mobile_config_default() {
let cfg = GgufMobileConfig::default();
assert!(cfg.max_model_size_mb > 0.0);
assert!(cfg.mmap);
assert_eq!(cfg.offload_layers, 0);
}
#[test]
fn test_mod_gguf_mobile_config_custom() {
let cfg = GgufMobileConfig {
max_model_size_mb: 512.0,
offload_layers: 4,
mmap: false,
};
assert_eq!(cfg.max_model_size_mb, 512.0);
assert_eq!(cfg.offload_layers, 4);
assert!(!cfg.mmap);
}
#[test]
fn test_mod_gguf_layer_info_construction() {
let info = GgufLayerInfo::new("layer.weight", GgufQuantType::Q4_0, 1024, vec![32, 32]);
assert_eq!(info.name, "layer.weight");
assert_eq!(info.quant_type, GgufQuantType::Q4_0);
assert_eq!(info.size_bytes, 1024);
assert_eq!(info.tensor_shape, vec![32, 32]);
}
#[test]
fn test_mod_estimate_memory_requirement_empty() {
assert_eq!(GgufMobileLoader::estimate_memory_requirement(&[]), 0);
}
#[test]
fn test_mod_estimate_memory_requirement_sum() {
let layers = vec![
GgufLayerInfo::new("a", GgufQuantType::Q4_0, 1000, vec![]),
GgufLayerInfo::new("b", GgufQuantType::Q8_0, 2000, vec![]),
GgufLayerInfo::new("c", GgufQuantType::F16, 3000, vec![]),
];
assert_eq!(GgufMobileLoader::estimate_memory_requirement(&layers), 6000);
}
#[test]
fn test_mod_layers_that_fit_all_fit() {
let layers = vec![
GgufLayerInfo::new("a", GgufQuantType::Q4_0, 100, vec![]),
GgufLayerInfo::new("b", GgufQuantType::Q4_0, 200, vec![]),
];
let budget_mb = 10.0; let indices = GgufMobileLoader::layers_that_fit(&layers, budget_mb);
assert_eq!(indices, vec![0, 1]);
}
#[test]
fn test_mod_layers_that_fit_tight_budget() {
let mb = 1.0 / (1024.0 * 1024.0); let layers = vec![
GgufLayerInfo::new("tiny", GgufQuantType::Q4_0, 1, vec![]),
GgufLayerInfo::new("big", GgufQuantType::F32, 10 * 1024 * 1024, vec![]),
];
let indices = GgufMobileLoader::layers_that_fit(&layers, mb * 1.0);
assert_eq!(indices, vec![0]);
}
#[test]
fn test_mod_layers_that_fit_none_fit() {
let layers = vec![
GgufLayerInfo::new("huge", GgufQuantType::F32, 100 * 1024 * 1024, vec![]),
];
let indices = GgufMobileLoader::layers_that_fit(&layers, 0.001); assert!(indices.is_empty());
}
#[test]
fn test_mod_effective_bits_q4_0() {
assert_eq!(GgufMobileLoader::effective_bits_per_weight(GgufQuantType::Q4_0), 4.5);
}
#[test]
fn test_mod_effective_bits_q4_1() {
assert_eq!(GgufMobileLoader::effective_bits_per_weight(GgufQuantType::Q4_1), 5.0);
}
#[test]
fn test_mod_effective_bits_q5_0() {
assert_eq!(GgufMobileLoader::effective_bits_per_weight(GgufQuantType::Q5_0), 5.5);
}
#[test]
fn test_mod_effective_bits_q5_1() {
assert_eq!(GgufMobileLoader::effective_bits_per_weight(GgufQuantType::Q5_1), 6.0);
}
#[test]
fn test_mod_effective_bits_q8_0() {
assert_eq!(GgufMobileLoader::effective_bits_per_weight(GgufQuantType::Q8_0), 8.5);
}
#[test]
fn test_mod_effective_bits_f16() {
assert_eq!(GgufMobileLoader::effective_bits_per_weight(GgufQuantType::F16), 16.0);
}
#[test]
fn test_mod_effective_bits_f32() {
assert_eq!(GgufMobileLoader::effective_bits_per_weight(GgufQuantType::F32), 32.0);
}
#[test]
fn test_mod_compression_ratio_f32() {
let ratio = GgufMobileLoader::compression_ratio_vs_f32(GgufQuantType::F32);
assert!((ratio - 1.0).abs() < 1e-5, "expected 1.0, got {ratio}");
}
#[test]
fn test_mod_compression_ratio_q8_0() {
let ratio = GgufMobileLoader::compression_ratio_vs_f32(GgufQuantType::Q8_0);
assert!(ratio > 3.5 && ratio < 4.0, "Q8_0 ratio unexpected: {ratio}");
}
#[test]
fn test_mod_compression_ratio_q4_0() {
let ratio = GgufMobileLoader::compression_ratio_vs_f32(GgufQuantType::Q4_0);
assert!(ratio > 7.0 && ratio < 7.5, "Q4_0 ratio unexpected: {ratio}");
}
#[test]
fn test_mod_reexports_work() {
let _cfg = Int4QuantConfig::default();
let _tensor_result = Int4Tensor::quantize(&[1.0f32, 2.0, 3.0, 4.0], &[4], &Int4QuantConfig { group_size: 4, ..Default::default() });
let _qt = GgufQuantType::Q4_0;
}
#[test]
fn test_mod_gguf_reader_reexport() {
use gguf_mobile::make_minimal_gguf;
let data = make_minimal_gguf("test");
let reader = GgufReader::from_bytes(&data).expect("should parse");
assert_eq!(reader.architecture(), Some("test"));
}
#[test]
fn test_mod_int4_compression_ratio_large() {
let n = 8192usize;
let data: Vec<f32> = (0..n).map(|i| i as f32 * 0.0001 - 0.4096).collect();
let config = Int4QuantConfig { group_size: 128, ..Default::default() };
let tensor = Int4Tensor::quantize(&data, &[n], &config).expect("quantize");
let ratio = tensor.compression_ratio();
assert!(ratio > 7.0, "expected ratio > 7.0 for large tensor, got {ratio}");
}
}