fn f16_to_f32(bits: u16) -> f32 {
let sign = ((bits & 0x8000) as u32) << 16;
let exp = (bits >> 10) & 0x1F;
let mantissa = (bits & 0x3FF) as u32;
if exp == 0 {
if mantissa == 0 {
f32::from_bits(sign)
} else {
let mut m = mantissa;
let mut e = 0i32;
while (m & 0x400) == 0 {
m <<= 1;
e -= 1;
}
let new_exp = ((127 - 15 + 1 + e) as u32) << 23;
let new_mantissa = (m & 0x3FF) << 13;
f32::from_bits(sign | new_exp | new_mantissa)
}
} else if exp == 31 {
f32::from_bits(sign | 0x7F800000 | (mantissa << 13))
} else {
let new_exp = (exp as u32 + 127 - 15) << 23;
let new_mantissa = mantissa << 13;
f32::from_bits(sign | new_exp | new_mantissa)
}
}
#[inline]
fn max_abs_value(data: &[f32]) -> f32 {
data.iter().map(|v| v.abs()).fold(0.0f32, f32::max)
}
fn symmetric_quantize_dequantize(data: &[f32], max_level: f32, min_level: f32) -> Vec<f32> {
if data.is_empty() {
return vec![];
}
let max_abs = max_abs_value(data);
if max_abs == 0.0 {
return vec![0.0; data.len()];
}
let scale = max_abs / max_level;
data.iter()
.map(|&v| {
let quantized = (v / scale).round().clamp(min_level, max_level) as i8;
f32::from(quantized) * scale
})
.collect()
}
fn quantize_int8(data: &[f32]) -> Vec<f32> {
symmetric_quantize_dequantize(data, 127.0, -127.0)
}
fn quantize_int4(data: &[f32]) -> Vec<f32> {
symmetric_quantize_dequantize(data, 7.0, -8.0) }
#[allow(dead_code)]
fn needs_transpose(name: &str, shape: &[usize]) -> bool {
if shape.len() != 2 {
return false;
}
let weight_patterns = [
"attn_output.weight",
"attn_k.weight",
"attn_q.weight",
"attn_v.weight",
"ffn_gate.weight",
"ffn_up.weight",
"ffn_down.weight",
"output.weight",
"lm_head.weight",
"q_proj.weight",
"k_proj.weight",
"v_proj.weight",
"o_proj.weight",
"gate_proj.weight",
"up_proj.weight",
"down_proj.weight",
];
weight_patterns.iter().any(|pattern| name.contains(pattern))
}
#[inline]
fn name_matches_any(name: &str, patterns: &[&str]) -> bool {
patterns.iter().any(|p| name.contains(p))
}
const NORM_BIAS_PATTERNS: &[&str] = &["bias", "layernorm", "layer_norm", "norm.weight"];
pub(super) fn should_skip_quantization(name: &str, element_count: usize) -> bool {
name_matches_any(name, NORM_BIAS_PATTERNS) || element_count < 1024
}
fn add_tensor_with_quantization(
writer: &mut AprV2Writer,
name: &str,
shape: &[usize],
data: &[f32],
quantize: Option<QuantizationType>,
) {
let should_skip = should_skip_quantization(name, data.len());
match quantize {
Some(QuantizationType::Fp16) => {
writer.add_f16_tensor(name, shape.to_vec(), data);
}
Some(QuantizationType::Int8) if !should_skip => {
writer.add_q8_tensor(name, shape.to_vec(), data);
}
Some(QuantizationType::Int4) if !should_skip => {
writer.add_q4_tensor(name, shape.to_vec(), data);
}
Some(QuantizationType::Q4K) if !should_skip => {
let q4k_bytes = quantize_q4_k(data);
writer.add_q4k_raw_tensor(name, shape.to_vec(), q4k_bytes);
}
Some(QuantizationType::Int8 | QuantizationType::Int4 | QuantizationType::Q4K) => {
writer.add_f32_tensor(name, shape.to_vec(), data);
}
None => {
writer.add_f32_tensor(name, shape.to_vec(), data);
}
}
}
fn save_model_tensors(
tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
output: &Path,
compression: Option<Compression>,
quantize: Option<QuantizationType>,
) -> Result<()> {
let extension = output.extension().and_then(|e| e.to_str()).unwrap_or("");
if extension == "apr" {
return save_model_tensors_with_config(tensors, output, compression, quantize);
}
if let Some(quant) = quantize {
return save_safetensors_quantized(tensors, output, quant);
}
save_safetensors(output, tensors).map_err(|e| AprenderError::FormatError {
message: format!("Failed to save converted model: {e}"),
})
}
fn save_safetensors_quantized(
tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
output: &Path,
quant: QuantizationType,
) -> Result<()> {
let mut metadata = SafeTensorsMetadata::new();
let mut raw_data = Vec::new();
let mut current_offset = 0;
for (name, (data, shape)) in tensors {
let (dtype_str, tensor_bytes) = if should_skip_quantization(name, data.len()) {
let bytes: Vec<u8> = data.iter().flat_map(|f| f.to_le_bytes()).collect();
("F32", bytes)
} else {
quantize_for_safetensors(data, quant)
};
let start_offset = current_offset;
let end_offset = current_offset + tensor_bytes.len();
metadata.insert(
name.clone(),
TensorMetadata {
dtype: dtype_str.to_string(),
shape: shape.clone(),
data_offsets: [start_offset, end_offset],
},
);
raw_data.extend_from_slice(&tensor_bytes);
current_offset = end_offset;
}
let metadata_json =
serde_json::to_string(&metadata).map_err(|e| AprenderError::FormatError {
message: format!("JSON serialization failed: {e}"),
})?;
let header_bytes = metadata_json.as_bytes();
let header_len = header_bytes.len() as u64;
let mut file_data = Vec::new();
file_data.extend_from_slice(&header_len.to_le_bytes());
file_data.extend_from_slice(header_bytes);
file_data.extend_from_slice(&raw_data);
fs::write(output, file_data).map_err(|e| AprenderError::FormatError {
message: format!("Failed to write output file: {e}"),
})
}
fn quantize_for_safetensors(data: &[f32], quant: QuantizationType) -> (&'static str, Vec<u8>) {
match quant {
QuantizationType::Fp16 => ("F16", f32_slice_to_f16_le_bytes(data)),
QuantizationType::Int8 | QuantizationType::Q4K => {
("I8", symmetric_quantize_i8(data, 127.0))
}
QuantizationType::Int4 => ("U8", quantize_int4_packed(data)),
}
}
fn symmetric_quantize_i8(data: &[f32], max_val: f32) -> Vec<u8> {
let max_abs = max_abs_value(data);
let scale = if max_abs > 0.0 { max_val / max_abs } else { 1.0 };
data.iter()
.map(|&v| (v * scale).round().clamp(-128.0, 127.0) as i8 as u8)
.collect()
}
fn quantize_int4_packed(data: &[f32]) -> Vec<u8> {
let max_abs = max_abs_value(data);
let scale = if max_abs > 0.0 { 7.0 / max_abs } else { 1.0 };
let quantized: Vec<u8> = data
.iter()
.map(|&v| ((v * scale).round().clamp(-8.0, 7.0) as i8 + 8) as u8)
.collect();
quantized
.chunks(2)
.map(|chunk| {
let low = chunk[0] & 0x0F;
let high = chunk.get(1).map_or(0, |v| v & 0x0F);
low | (high << 4)
})
.collect()
}
fn f32_to_f16_bits(value: f32) -> u16 {
let bits = value.to_bits();
let sign = (bits >> 16) & 0x8000;
let exponent = ((bits >> 23) & 0xFF) as i32;
let mantissa = bits & 0x007F_FFFF;
let f16 = if exponent == 0xFF {
sign | 0x7C00 | if mantissa != 0 { 0x0200 } else { 0 }
} else if exponent > 142 {
sign | 0x7C00
} else if exponent < 113 {
sign
} else {
sign | (((exponent - 112) as u32) << 10) | ((mantissa >> 13) & 0x3FF)
};
f16 as u16
}
fn f32_slice_to_f16_le_bytes(data: &[f32]) -> Vec<u8> {
data.iter()
.flat_map(|&v| f32_to_f16_bits(v).to_le_bytes())
.collect()
}
fn save_model_tensors_with_config(
tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
output: &Path,
_compression: Option<Compression>,
quantize: Option<QuantizationType>,
) -> Result<()> {
let config = infer_model_config_from_tensors(tensors);
let mut metadata = AprV2Metadata::new("unknown");
metadata.original_format = Some("safetensors".to_string());
if let Some(cfg) = config {
metadata.model_type = "qwen2".to_string(); metadata.hidden_size = cfg.hidden_size;
metadata.num_layers = cfg.num_layers;
metadata.vocab_size = cfg.vocab_size;
metadata.num_heads = cfg.num_heads;
metadata.num_kv_heads = cfg.num_kv_heads;
metadata.intermediate_size = cfg.intermediate_size;
}
let mut writer = AprV2Writer::new(metadata);
for (name, (data, shape)) in tensors {
add_tensor_with_quantization(&mut writer, name, shape, data, quantize);
}
let apr_bytes = writer.write().map_err(|e| AprenderError::FormatError {
message: format!("Failed to write APR format: {e}"),
})?;
fs::write(output, apr_bytes).map_err(|e| AprenderError::FormatError {
message: format!("Failed to write output file: {e}"),
})
}
include!("inferred_q4k_config.rs");