fn collect_raw_fusion_sources(
rule: &FusionExportRule,
layer: usize,
reader: &crate::format::v2::AprV2Reader,
) -> Option<(Vec<u8>, Vec<Vec<usize>>, crate::format::gguf::GgmlType)> {
let mut all_bytes: Vec<u8> = Vec::new();
let mut all_shapes: Vec<Vec<usize>> = Vec::new();
let mut dtype = crate::format::gguf::GgmlType::F32;
for apr_suffix in &rule.apr_suffixes {
let apr_name = format!("model.layers.{layer}.{apr_suffix}");
let entry = reader.get_tensor(&apr_name)?;
let raw = reader.get_tensor_data(&apr_name)?;
all_bytes.extend_from_slice(raw);
all_shapes.push(entry.shape.clone());
dtype = apr_dtype_to_ggml(entry.dtype)?;
}
Some((all_bytes, all_shapes, dtype))
}
fn build_fused_tensors_raw(
mapper: &GgufNameMapper,
reader: &crate::format::v2::AprV2Reader,
) -> Vec<crate::format::gguf::GgufTensor> {
use crate::format::gguf::GgufTensor;
let rules = mapper.fusion_rules();
if rules.is_empty() {
return Vec::new();
}
let names = reader.tensor_names();
let num_layers = detect_num_layers_from_names(names.iter().map(|s| s.as_ref()));
let mut fused = Vec::new();
for rule in rules {
for layer in 0..num_layers {
let Some((all_bytes, all_shapes, dtype)) =
collect_raw_fusion_sources(rule, layer, reader)
else {
continue;
};
let Some(fused_shape) = compute_fused_shape(&all_shapes) else {
continue;
};
let gguf_shape = shape_to_gguf(&fused_shape);
let gguf_name = format!("blk.{layer}.{}", rule.gguf_suffix);
fused.push(GgufTensor {
name: gguf_name,
shape: gguf_shape,
dtype,
data: all_bytes,
});
}
}
fused
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ExportFormat {
SafeTensors,
Gguf,
Mlx,
Onnx,
OpenVino,
CoreMl,
TorchScript,
}
impl std::str::FromStr for ExportFormat {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"safetensors" | "st" => Ok(Self::SafeTensors),
"gguf" => Ok(Self::Gguf),
"mlx" => Ok(Self::Mlx),
"onnx" => Ok(Self::Onnx),
"openvino" | "ov" => Ok(Self::OpenVino),
"coreml" | "mlpackage" => Ok(Self::CoreMl),
"torchscript" | "pt" | "torch" => Ok(Self::TorchScript),
_ => Err(format!("Unknown export format: {s}")),
}
}
}
impl ExportFormat {
#[must_use]
pub fn extension(&self) -> &'static str {
match self {
Self::SafeTensors => "safetensors",
Self::Gguf => "gguf",
Self::Mlx => "mlx",
Self::Onnx => "onnx",
Self::OpenVino => "xml",
Self::CoreMl => "mlpackage",
Self::TorchScript => "pt",
}
}
#[must_use]
pub fn is_supported(&self) -> bool {
matches!(self, Self::SafeTensors | Self::Gguf | Self::Mlx)
}
#[must_use]
pub fn display_name(&self) -> &'static str {
match self {
Self::SafeTensors => "SafeTensors",
Self::Gguf => "GGUF",
Self::Mlx => "MLX",
Self::Onnx => "ONNX",
Self::OpenVino => "OpenVINO",
Self::CoreMl => "CoreML",
Self::TorchScript => "TorchScript",
}
}
#[must_use]
pub fn all() -> &'static [ExportFormat] {
&[
Self::SafeTensors,
Self::Gguf,
Self::Mlx,
Self::Onnx,
Self::OpenVino,
Self::CoreMl,
Self::TorchScript,
]
}
}
#[derive(Debug, Clone)]
pub struct ExportOptions {
pub format: ExportFormat,
pub quantize: Option<QuantizationType>,
pub include_tokenizer: bool,
pub include_config: bool,
pub skip_completeness_check: bool,
}
impl Default for ExportOptions {
fn default() -> Self {
Self {
format: ExportFormat::SafeTensors,
quantize: None,
include_tokenizer: true, include_config: true,
skip_completeness_check: false,
}
}
}
#[derive(Debug, Clone)]
pub struct ExportReport {
pub original_size: usize,
pub exported_size: usize,
pub tensor_count: usize,
pub format: ExportFormat,
pub quantization: Option<QuantizationType>,
}
#[derive(Debug)]
pub(crate) struct ValidatedGgufMetadata {
inner: Vec<(String, crate::format::gguf::GgufValue)>,
}
pub(crate) fn dedup_token_table(metadata: &mut [(String, crate::format::gguf::GgufValue)]) {
let Some(pos) = metadata
.iter()
.position(|(k, _)| k == "tokenizer.ggml.tokens")
else {
return;
};
let crate::format::gguf::GgufValue::ArrayString(tokens) = &metadata[pos].1 else {
return;
};
let mut seen = std::collections::HashSet::with_capacity(tokens.len());
let mut dedup_count = 0u32;
let deduped: Vec<String> = tokens
.iter()
.enumerate()
.map(|(idx, tok)| {
if seen.contains(tok.as_str()) {
dedup_count += 1;
format!("[PAD{idx}]")
} else {
seen.insert(tok.clone());
tok.clone()
}
})
.collect();
if dedup_count > 0 {
eprintln!("[GH-277] Deduped {dedup_count} duplicate token(s) → [PAD{{id}}] format");
metadata[pos] = (
"tokenizer.ggml.tokens".to_string(),
crate::format::gguf::GgufValue::ArrayString(deduped),
);
}
}
impl ValidatedGgufMetadata {
pub(crate) fn validate(
mut metadata: Vec<(String, crate::format::gguf::GgufValue)>,
) -> Result<Self> {
let has_key = |k: &str| metadata.iter().any(|(name, _)| name == k);
if !has_key("general.architecture") {
return Err(AprenderError::FormatError {
message: "[GH-253-4] GGUF export missing required key: general.architecture"
.to_string(),
});
}
let has_tokens = has_key("tokenizer.ggml.tokens");
let has_model = has_key("tokenizer.ggml.model");
if has_tokens && !has_model {
return Err(AprenderError::FormatError {
message:
"[GH-253-4] GGUF export has tokenizer.ggml.tokens but missing tokenizer.ggml.model"
.to_string(),
});
}
if has_model && !has_tokens {
return Err(AprenderError::FormatError {
message:
"[GH-253-4] GGUF export has tokenizer.ggml.model but missing tokenizer.ggml.tokens"
.to_string(),
});
}
dedup_token_table(&mut metadata);
Ok(Self { inner: metadata })
}
pub(crate) fn as_slice(&self) -> &[(String, crate::format::gguf::GgufValue)] {
&self.inner
}
}