use crate::error::{AprenderError, Result};
pub use trueno_quant::F16_MIN_NORMAL;
pub(crate) use trueno_quant::{dequantize_q4_k_to_f32, quantize_q4_k, quantize_q4_k_matrix};
use crate::format::gguf::{
load_gguf_raw, load_gguf_with_tokenizer, GgufModelConfig, GgufRawTensor, GgufReader,
GgufTokenizer,
};
use crate::format::v2::{AprV2Metadata, AprV2Writer, QuantizationMetadata};
use crate::format::Compression;
use crate::serialization::safetensors::{save_safetensors, SafeTensorsMetadata, TensorMetadata};
use std::collections::BTreeMap;
use std::fs;
use std::path::Path;
#[cfg(feature = "hf-hub-integration")]
pub use crate::format::converter_types::parse_import_error;
pub use crate::format::converter_types::{
detect_sharded_model, Architecture, DequantizedTensors, ImportError, ImportOptions,
NativeF32Tensors, QuantizationType, ShardedIndex, Source, TensorExpectation, TensorProvenance,
ValidationConfig,
};
pub(crate) use import::infer_model_config_from_tensors;
use import::load_safetensors_tensors;
pub(crate) use import::map_tensor_names;
pub use import::sanitize_hf_json;
#[cfg(test)]
pub(crate) use crate::format::validation::{AprValidator, TensorStats};
#[cfg(test)]
pub(crate) use import::{
compute_std, compute_tensor_stats, parse_tokenizer_json, validate_single_tensor,
TensorAccumulator,
};
#[cfg(test)]
pub(crate) use merge::calculate_merge_weights;
#[cfg(test)]
pub(crate) use std::path::PathBuf;
#[derive(Debug)]
pub struct AprConverter {
source: Option<Source>,
architecture: Architecture,
validation: ValidationConfig,
quantize: Option<QuantizationType>,
compress: Option<Compression>,
}
impl AprConverter {
#[must_use]
pub fn new() -> Self {
Self {
source: None,
architecture: Architecture::Auto,
validation: ValidationConfig::Strict,
quantize: None,
compress: None,
}
}
pub fn source(mut self, source: &str) -> Result<Self> {
self.source = Some(Source::parse(source)?);
Ok(self)
}
#[must_use]
pub fn architecture(mut self, arch: Architecture) -> Self {
self.architecture = arch;
self
}
#[must_use]
pub fn validate(mut self, config: ValidationConfig) -> Self {
self.validation = config;
self
}
#[must_use]
pub fn quantize(mut self, quant: QuantizationType) -> Self {
self.quantize = Some(quant);
self
}
#[must_use]
pub fn compress(mut self, comp: Compression) -> Self {
self.compress = Some(comp);
self
}
pub fn convert(self) -> Result<Vec<u8>> {
let source = self.source.ok_or_else(|| AprenderError::FormatError {
message: "No source specified".to_string(),
})?;
Err(AprenderError::FormatError {
message: format!(
"Conversion from {:?} not yet implemented - see GH-80",
source
),
})
}
}
impl Default for AprConverter {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct ConvertOptions {
pub quantize: Option<QuantizationType>,
pub compress: Option<Compression>,
pub validate: bool,
}
impl Default for ConvertOptions {
fn default() -> Self {
Self {
quantize: None,
compress: None,
validate: true,
}
}
}
fn try_gguf_q4k_passthrough(
input_path: &Path,
output_path: &Path,
options: &ConvertOptions,
) -> Result<Option<ConvertReport>> {
let raw_result = match load_gguf_raw(input_path) {
Ok(r) => r,
Err(_) => return Ok(None),
};
let has_q4k = raw_result
.tensors
.values()
.any(|t| t.dtype == 12 || t.dtype == 13 || t.dtype == 14);
if !has_q4k {
return Ok(None);
}
eprintln!("[GH-181] Detected Q4K source, using raw byte pass-through");
let mapped_tensors: BTreeMap<String, GgufRawTensor> = raw_result
.tensors
.into_iter()
.map(|(name, tensor)| {
let mapped_name = Architecture::Qwen2.map_name(&name);
(mapped_name, tensor)
})
.collect();
let original_size: usize = mapped_tensors.values().map(|t| t.data.len()).sum();
let original_count = mapped_tensors.len();
let import_opts = ImportOptions {
architecture: Architecture::Qwen2,
allow_no_config: true,
..Default::default()
};
write_apr_file_raw(
&mapped_tensors,
output_path,
&import_opts,
Some(&raw_result.tokenizer),
Some(&raw_result.model_config),
)?;
Ok(Some(ConvertReport::build(
original_size,
output_path,
original_count,
options.quantize,
options.compress,
)))
}
fn extract_gguf_config(input_path: &Path) -> (Option<GgufModelConfig>, Option<GgufTokenizer>) {
match load_gguf_with_tokenizer(input_path) {
Ok(result) => {
eprintln!(
"[PMAT-113] Extracted tokenizer with {} vocabulary tokens",
result.tokenizer.vocabulary.len()
);
(Some(result.model_config), Some(result.tokenizer))
}
Err(_) => (None, None),
}
}
fn apply_gguf_name_mapping(
tensors: BTreeMap<String, (Vec<f32>, Vec<usize>)>,
) -> BTreeMap<String, (Vec<f32>, Vec<usize>)> {
eprintln!(
"[PMAT-205] Mapping {} GGUF tensor names to APR canonical format...",
tensors.len()
);
let mapped = map_tensor_names(&tensors, Architecture::Qwen2);
for (i, name) in mapped.keys().take(5).enumerate() {
eprintln!("[PMAT-205] {}: {}", i, name);
}
mapped
}
fn save_output(
tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
output_path: &Path,
compression: Option<Compression>,
gguf_config: Option<&GgufModelConfig>,
gguf_tokenizer: Option<&GgufTokenizer>,
quantize: Option<QuantizationType>,
) -> Result<()> {
if let Some(config) = gguf_config {
save_model_tensors_with_gguf_config_and_tokenizer(
tensors,
output_path,
compression,
config,
gguf_tokenizer,
quantize,
)
} else {
save_model_tensors(tensors, output_path, compression, quantize)
}
}
pub fn apr_convert<P: AsRef<Path>>(
input: P,
output: P,
options: ConvertOptions,
) -> Result<ConvertReport> {
let input_path = input.as_ref();
let output_path = output.as_ref();
let is_gguf = crate::format::rosetta::FormatType::from_magic(input_path)
.map(|f| matches!(f, crate::format::rosetta::FormatType::Gguf))
.unwrap_or_else(|_| input_path.extension().and_then(|e| e.to_str()) == Some("gguf"));
if is_gguf && options.quantize == Some(QuantizationType::Q4K) {
if let Some(report) = try_gguf_q4k_passthrough(input_path, output_path, &options)? {
return Ok(report);
}
}
let (gguf_config, gguf_tokenizer) = if is_gguf {
extract_gguf_config(input_path)
} else {
(None, None)
};
let tensors = load_model_tensors(input_path)?;
let original_size = fs::metadata(input_path)
.map(|m| m.len() as usize)
.unwrap_or_else(|_| calculate_tensor_size(&tensors));
let original_count = tensors.len();
let tensors = if is_gguf {
apply_gguf_name_mapping(tensors)
} else {
tensors
};
if options.quantize == Some(QuantizationType::Q4K) {
save_model_tensors_q4k(&tensors, output_path, gguf_tokenizer.as_ref())?;
return Ok(ConvertReport::build(
original_size,
output_path,
original_count,
options.quantize,
options.compress,
));
}
save_output(
&tensors,
output_path,
options.compress,
gguf_config.as_ref(),
gguf_tokenizer.as_ref(),
options.quantize,
)?;
Ok(ConvertReport::build(
original_size,
output_path,
original_count,
options.quantize,
options.compress,
))
}
#[derive(Debug, Clone)]
pub struct ConvertReport {
pub original_size: usize,
pub converted_size: usize,
pub tensor_count: usize,
pub quantization: Option<QuantizationType>,
pub compression: Option<Compression>,
pub reduction_ratio: f64,
}
include!("convert_report.rs");
include!("f16_convert.rs");
include!("infer_q4k_config.rs");