use crate::error::{AprenderError, Result};
use crate::format::converter_types::{
Architecture, ImportError, ImportOptions, QuantizationType, Source, TensorExpectation,
ValidationConfig,
};
use crate::format::gguf::{
load_gguf_raw, load_gguf_with_tokenizer, GgufModelConfig, GgufRawTensor, GgufTokenizer,
};
use crate::format::layout_contract::{contract, enforce_import_contract};
use crate::format::sharded::ShardIndex;
use crate::format::validation::{AprValidator, TensorStats, ValidationReport};
use crate::serialization::safetensors::{MappedSafeTensors, UserMetadata};
use provable_contracts_macros::{ensures, requires};
use std::collections::BTreeMap;
use super::{validate_tensor_values, write_apr_file, write_apr_file_raw};
use std::fs;
use std::path::{Path, PathBuf};
#[cfg(feature = "hf-hub-integration")]
use crate::format::converter_types::parse_import_error;
#[requires(!source.is_empty())]
pub fn apr_import<P: AsRef<Path>>(
source: &str,
output: P,
options: ImportOptions,
) -> Result<ValidationReport> {
let parsed_source = Source::parse(source)?;
let output_path = output.as_ref();
let local_path = resolve_source(&parsed_source, options.cache)?;
let is_gguf = crate::format::rosetta::FormatType::from_magic(&local_path)
.map(|f| matches!(f, crate::format::rosetta::FormatType::Gguf))
.unwrap_or_else(|_| local_path.extension().and_then(|e| e.to_str()) == Some("gguf"));
if is_gguf {
if let Some(report) = try_gguf_raw_import(&local_path, output_path, &options)? {
return Ok(report);
}
}
let is_sharded_safetensors = local_path
.file_name()
.is_some_and(|n| n.to_string_lossy().ends_with(".index.json"));
if is_sharded_safetensors {
return streaming_sharded_import(&local_path, output_path, &options);
}
let mut load_result = load_source_tensors(&local_path, &options)?;
if options.architecture.is_llm() || options.architecture == Architecture::Auto {
resolve_hf_tokenizer_fallback(&mut load_result, &parsed_source);
}
if let Some(config) = load_result.model_config.as_ref() {
config.warn_out_of_bounds();
}
let metadata_arch = infer_architecture(
&options.architecture,
load_result
.model_config
.as_ref()
.and_then(|c| c.architecture.as_deref()),
);
let user_specified = options.architecture != Architecture::Auto;
let effective_arch = verify_architecture_from_tensor_evidence(
metadata_arch,
load_result.tensors.keys().map(String::as_str),
user_specified,
);
warn_unverified_architecture(&effective_arch, options.strict)?;
let mut mapped_tensors = map_tensor_names(&load_result.tensors, effective_arch);
if effective_arch == Architecture::Gpt2 {
Architecture::split_gpt2_fused_qkv(&mut mapped_tensors);
}
if effective_arch == Architecture::GptNeoX {
Architecture::split_neox_fused_qkv(&mut mapped_tensors);
}
if is_gguf {
let vocab_size_for_contract = load_result
.model_config
.as_ref()
.and_then(|c| c.vocab_size)
.unwrap_or(0);
let hidden_dim_for_contract = load_result
.model_config
.as_ref()
.and_then(|c| c.hidden_size)
.unwrap_or(0);
mapped_tensors = mapped_tensors
.into_iter()
.map(|(name, (data, shape))| {
let (apr_shape, needs_data_transpose) =
enforce_import_contract(&name, &shape, vocab_size_for_contract, hidden_dim_for_contract);
assert!(
!needs_data_transpose,
"CONTRACT BUG: enforce_import_contract returned needs_data_transpose=true for '{}'. \
GGUF→APR NEVER needs data transpose. See GH-208, BUG-IMPORT-001.",
name
);
(name, (data, apr_shape))
})
.collect();
let transformed_count = mapped_tensors.len();
eprintln!(
"[BUG-IMPORT-001] Applied GGUF→APR shape contract to {} tensors (dequant fallback path)",
transformed_count
);
}
let mapped_f16_raw: BTreeMap<String, (Vec<u8>, Vec<usize>, bool)> = load_result
.f16_raw_tensors
.iter()
.map(|(name, (bytes, shape, is_bf16))| {
let mapped_name = effective_arch.map_name(name);
(mapped_name, (bytes.clone(), shape.clone(), *is_bf16))
})
.collect();
let layout_contract = contract();
let vocab_size = load_result
.model_config
.as_ref()
.and_then(|c| c.vocab_size)
.unwrap_or(0);
let hidden_dim = load_result
.model_config
.as_ref()
.and_then(|c| c.hidden_size)
.unwrap_or(0);
validate_contract_f32(
&layout_contract,
&mapped_tensors,
vocab_size,
hidden_dim,
options.strict,
)?;
if let Some(config) = load_result.model_config.as_ref() {
enforce_arch_completeness_gate_f32(&effective_arch, &mapped_tensors, config)?;
} else {
enforce_arch_completeness_gate_inferred(&effective_arch, &mapped_tensors)?;
}
let validation_result = validate_tensors(&mapped_tensors, &options)?;
let tokenizer_for_write = if effective_arch.is_llm() {
load_result.tokenizer.as_ref()
} else {
None
};
write_apr_file(
&mapped_tensors,
&mapped_f16_raw,
output_path,
&options,
tokenizer_for_write,
load_result.model_config.as_ref(),
&load_result.user_metadata,
)?;
Ok(validation_result)
}
fn try_gguf_raw_import(
path: &Path,
output_path: &Path,
options: &ImportOptions,
) -> Result<Option<ValidationReport>> {
match apr_import_gguf_raw(path, output_path, options) {
Ok(report) => Ok(Some(report)),
Err(e) => {
let msg = e.to_string();
if msg.contains("cannot represent exactly") || msg.contains("not yet supported") {
eprintln!(
"[GH-375] Raw import failed ({}), falling back to dequant→requant path",
msg.lines().next().unwrap_or("unsupported dtype")
);
Ok(None)
} else {
Err(e)
}
}
}
}
#[requires(gguf_path.exists())]
pub(crate) fn apr_import_gguf_raw(
gguf_path: &Path,
output_path: &Path,
options: &ImportOptions,
) -> Result<ValidationReport> {
let raw_result = load_gguf_raw(gguf_path)?;
raw_result.model_config.warn_out_of_bounds();
let effective_tokenizer = resolve_gguf_tokenizer(
&raw_result.tokenizer,
gguf_path,
options.tokenizer_path.as_deref(),
)?;
let metadata_arch = resolve_and_log_architecture(
&options.architecture,
raw_result.model_config.architecture.as_deref(),
options.strict,
)?;
let user_specified_gguf = options.architecture != Architecture::Auto;
let effective_arch = verify_architecture_from_tensor_evidence(
metadata_arch,
raw_result.tensors.keys().map(String::as_str),
user_specified_gguf,
);
let mapped_tensors = map_and_enforce_raw_tensors(
raw_result.tensors,
&effective_arch,
&raw_result.model_config,
)?;
enforce_arch_completeness_gate(&effective_arch, &mapped_tensors, &raw_result.model_config)?;
let mut validation_result = ValidationReport::new();
validation_result.total_score = 85;
write_apr_file_raw(
&mapped_tensors,
output_path,
options,
Some(&effective_tokenizer),
Some(&raw_result.model_config),
)?;
Ok(validation_result)
}
fn resolve_and_log_architecture(
user_arch: &Architecture,
gguf_arch: Option<&str>,
strict: bool,
) -> Result<Architecture> {
let effective_arch = infer_architecture(user_arch, gguf_arch);
if effective_arch != Architecture::Auto {
eprintln!(
"[PMAT-222] Auto-detected architecture: {:?} (tensor names will be mapped)",
effective_arch
);
}
warn_unverified_architecture(&effective_arch, strict)?;
Ok(effective_arch)
}
#[requires(!tensors.is_empty())]
#[ensures(ret.is_ok())]
fn map_and_enforce_raw_tensors(
tensors: BTreeMap<String, GgufRawTensor>,
effective_arch: &Architecture,
model_config: &crate::format::gguf::GgufModelConfig,
) -> Result<BTreeMap<String, GgufRawTensor>> {
use crate::format::layout_contract::enforce_import_contract;
let mut mapped: BTreeMap<String, GgufRawTensor> = tensors
.into_iter()
.map(|(name, tensor)| (effective_arch.map_name(&name), tensor))
.collect();
if *effective_arch == Architecture::Gpt2 {
Architecture::split_gpt2_fused_qkv_raw(&mut mapped);
}
if *effective_arch == Architecture::GptNeoX {
Architecture::split_neox_fused_qkv_raw(&mut mapped);
}
let vocab_size = model_config.vocab_size.unwrap_or(0);
let hidden_dim = model_config.hidden_size.unwrap_or(0);
if vocab_size == 0 || hidden_dim == 0 {
return Err(AprenderError::FormatError {
message: format!(
"CONTRACT ENFORCEMENT FAILED: Missing vocab_size ({}) or hidden_dim ({}). \
Cannot validate tensor layouts without model config. \
This GGUF file may be malformed.",
vocab_size, hidden_dim
),
});
}
let mapped: BTreeMap<String, GgufRawTensor> = mapped
.into_iter()
.map(|(name, mut tensor)| {
let (apr_shape, needs_data_transpose) =
enforce_import_contract(&name, &tensor.shape, vocab_size, hidden_dim);
assert!(
!needs_data_transpose,
"CONTRACT BUG: enforce_import_contract returned needs_data_transpose=true for '{}'. \
GGUF→APR NEVER needs data transpose. See GH-208.",
name
);
tensor.shape = apr_shape;
(name, tensor)
})
.collect();
eprintln!(
"[CONTRACT-ENFORCED] {} tensors transformed via tensor-layout-v1.yaml (vocab={}, hidden={})",
mapped.len(),
vocab_size,
hidden_dim
);
Ok(mapped)
}
fn enforce_arch_completeness_gate(
arch: &Architecture,
tensors: &BTreeMap<String, GgufRawTensor>,
config: &GgufModelConfig,
) -> Result<()> {
let Some(arch_key) = arch.completeness_key() else {
return Ok(()); };
let Some(num_layers) = config.num_layers else {
return Ok(()); };
let has_layers = tensors
.keys()
.any(|n| n.contains("model.layers.") || n.contains("blk."));
if !has_layers {
return Ok(());
}
let names: Vec<&str> = tensors.keys().map(String::as_str).collect();
crate::format::layout_contract::enforce_architecture_completeness(&names, arch_key, num_layers)
.map_err(|e| AprenderError::FormatError {
message: format!("GH-279 architecture completeness gate: {e}"),
})
}
fn enforce_arch_completeness_gate_f32(
arch: &Architecture,
tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
config: &GgufModelConfig,
) -> Result<()> {
let Some(arch_key) = arch.completeness_key() else {
return Ok(());
};
let Some(num_layers) = config.num_layers else {
return Ok(());
};
let has_layers = tensors
.keys()
.any(|n| n.contains("model.layers.") || n.contains("blk."));
if !has_layers {
return Ok(());
}
let names: Vec<&str> = tensors.keys().map(String::as_str).collect();
crate::format::layout_contract::enforce_architecture_completeness(&names, arch_key, num_layers)
.map_err(|e| AprenderError::FormatError {
message: format!("GH-279 architecture completeness gate: {e}"),
})
}
fn enforce_arch_completeness_gate_inferred(
arch: &Architecture,
tensors: &BTreeMap<String, (Vec<f32>, Vec<usize>)>,
) -> Result<()> {
let Some(arch_key) = arch.completeness_key() else {
return Ok(());
};
let num_layers = infer_num_layers_from_tensor_names(tensors.keys().map(String::as_str));
if num_layers == 0 {
return Ok(()); }
let names: Vec<&str> = tensors.keys().map(String::as_str).collect();
crate::format::layout_contract::enforce_architecture_completeness(&names, arch_key, num_layers)
.map_err(|e| AprenderError::FormatError {
message: format!("GH-279 architecture completeness gate (inferred): {e}"),
})
}
fn infer_num_layers_from_tensor_names<'a>(names: impl Iterator<Item = &'a str>) -> usize {
let mut max_layer: Option<usize> = None;
for name in names {
let idx = if let Some(rest) = name.strip_prefix("blk.") {
rest.split('.').next().and_then(|s| s.parse::<usize>().ok())
} else if let Some(rest) = name.strip_prefix("model.layers.") {
rest.split('.').next().and_then(|s| s.parse::<usize>().ok())
} else {
None
};
if let Some(i) = idx {
max_layer = Some(max_layer.map_or(i, |m: usize| m.max(i)));
}
}
max_layer.map_or(0, |m| m + 1)
}
fn resolve_hf_tokenizer_fallback(load_result: &mut SourceLoadResult, source: &Source) {
if load_result.tokenizer.is_some() {
return;
}
if let Source::HuggingFace { org, repo, .. } = source {
if let Some(tokenizer_path) = find_in_cache(org, repo, "tokenizer.json") {
load_result.tokenizer = load_tokenizer_from_json(&tokenizer_path);
}
}
}
pub(crate) fn resolve_source(source: &Source, cache: bool) -> Result<PathBuf> {
match source {
Source::Local(path) => resolve_local_source(path),
Source::HuggingFace { org, repo, file } => {
resolve_hf_source(org, repo, file.as_ref(), cache)
}
Source::Url(url) => resolve_url_source(url),
}
}
fn resolve_local_source(path: &Path) -> Result<PathBuf> {
if !path.exists() {
let err = ImportError::NotFound {
resource: path.display().to_string(),
status: 0, };
return Err(AprenderError::from(err));
}
if path.is_dir() {
return resolve_local_directory(path);
}
Ok(path.to_path_buf())
}
fn resolve_local_directory(path: &Path) -> Result<PathBuf> {
let index = path.join("model.safetensors.index.json");
if index.exists() {
return Ok(index);
}
let single = path.join("model.safetensors");
if single.exists() {
return Ok(single);
}
Err(AprenderError::FormatError {
message: format!(
"Directory {} contains no model.safetensors.index.json or model.safetensors",
path.display()
),
})
}
fn resolve_hf_source(org: &str, repo: &str, file: Option<&String>, cache: bool) -> Result<PathBuf> {
let filename = file.map(String::as_str).unwrap_or_else(|| {
if repo.to_lowercase().contains("gguf") {
"model.gguf" } else {
"model.safetensors"
}
});
if cache {
if let Some(path) = find_hf_in_cache(org, repo, file, filename) {
return Ok(path);
}
if file.is_none() && filename == "model.safetensors" {
if let Some(path) = find_in_cache(org, repo, "model.safetensors.index.json") {
return Ok(path);
}
}
}
#[cfg(feature = "hf-hub-integration")]
{
let repo_id = format!("{org}/{repo}");
download_from_hf(&repo_id, filename)
}
#[cfg(not(feature = "hf-hub-integration"))]
Err(AprenderError::FormatError {
message: format!(
"HuggingFace model not found in cache. Download manually:\n\
huggingface-cli download {org}/{repo} {filename}\n\
Or provide a local path to the SafeTensors/GGUF file.",
),
})
}
fn find_hf_in_cache(
org: &str,
repo: &str,
file: Option<&String>,
filename: &str,
) -> Option<PathBuf> {
if repo.to_lowercase().contains("gguf") && file.is_none() {
let base_name = repo
.to_lowercase()
.replace("-gguf", "")
.replace("_gguf", "");
let gguf_patterns = [
format!("{base_name}-q4_k_m.gguf"),
format!("{base_name}-q4_k.gguf"),
format!("{base_name}-q8_0.gguf"),
"model.gguf".to_string(),
];
for pattern in &gguf_patterns {
if let Some(path) = find_in_cache(org, repo, pattern) {
return Some(path);
}
}
}
find_in_cache(org, repo, filename)
}
fn resolve_url_source(url: &str) -> Result<PathBuf> {
Err(AprenderError::FormatError {
message: format!("URL download not yet implemented: {url}"),
})
}
fn streaming_dispatch_quantize(
writer: &mut crate::format::v2::AprV2StreamingWriter,
name: &str,
data: &[f32],
shape: Vec<usize>,
quantize: Option<QuantizationType>,
) -> std::result::Result<(), crate::format::v2::V2FormatError> {
let should_skip = super::should_skip_quantization(name, data.len());
match quantize {
Some(QuantizationType::Fp16) => writer.add_f16_tensor(name, shape, data),
Some(QuantizationType::Int8) if !should_skip => writer.add_q8_tensor(name, shape, data),
Some(QuantizationType::Int4) if !should_skip => writer.add_q4_tensor(name, shape, data),
Some(QuantizationType::Q4K) if !should_skip => {
let q4k_bytes = super::quantize_q4_k_matrix(data, &shape);
writer.add_q4k_raw_tensor(name, shape, &q4k_bytes)
}
Some(QuantizationType::Int8 | QuantizationType::Int4 | QuantizationType::Q4K) => {
writer.add_f32_tensor(name, shape, data)
}
None => writer.add_f32_tensor(name, shape, data),
}
}
fn streaming_sharded_import(
index_path: &Path,
output_path: &Path,
options: &ImportOptions,
) -> Result<ValidationReport> {
use crate::format::v2::{AprV2Metadata, AprV2StreamingWriter};
let content = fs::read_to_string(index_path).map_err(|e| AprenderError::FormatError {
message: format!("Failed to read shard index {}: {e}", index_path.display()),
})?;
let index = ShardIndex::from_json(&content)?;
if index.shard_count() == 0 {
return Err(AprenderError::FormatError {
message: "Shard index contains no shard files".to_string(),
});
}
let canonical_index =
std::fs::canonicalize(index_path).unwrap_or_else(|_| index_path.to_path_buf());
let base_dir = canonical_index
.parent()
.ok_or_else(|| AprenderError::FormatError {
message: format!(
"Cannot determine parent directory of {}",
index_path.display()
),
})?;
let sibling_path = base_dir.join("model.safetensors.index.json");
let model_config = load_model_config_from_json(&sibling_path);
let tokenizer = load_tokenizer_from_json(&sibling_path);
if model_config.is_none() && !options.allow_no_config {
return Err(AprenderError::FormatError {
message: format!(
"config.json not found at {}. Use --allow-no-config to proceed without it.",
base_dir.join("config.json").display()
),
});
}
let metadata_arch = infer_architecture(
&options.architecture,
model_config
.as_ref()
.and_then(|c| c.architecture.as_deref()),
);
let param_count = 0u64;
let mut custom = std::collections::HashMap::new();
if let Some(ref tok) = tokenizer {
super::write::insert_f32_tokenizer_metadata(tok, &mut custom);
}
let metadata = AprV2Metadata {
model_type: format!("{metadata_arch:?}"),
name: Some(
output_path
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("model")
.to_string(),
),
param_count,
custom,
architecture: model_config.as_ref().and_then(|c| c.architecture.clone()),
hidden_size: model_config.as_ref().and_then(|c| c.hidden_size),
num_layers: model_config.as_ref().and_then(|c| c.num_layers),
num_heads: model_config.as_ref().and_then(|c| c.num_heads),
num_kv_heads: model_config.as_ref().and_then(|c| c.num_kv_heads),
vocab_size: model_config.as_ref().and_then(|c| c.vocab_size),
intermediate_size: model_config.as_ref().and_then(|c| c.intermediate_size),
max_position_embeddings: model_config
.as_ref()
.and_then(|c| c.max_position_embeddings),
rope_theta: model_config.as_ref().and_then(|c| c.rope_theta),
rope_type: model_config.as_ref().and_then(|c| c.rope_type),
rms_norm_eps: model_config.as_ref().and_then(|c| c.rms_norm_eps),
..Default::default()
};
eprintln!(
"[realizar#136] Streaming import: {} shards, {} tensors → {}",
index.shard_count(),
index.tensor_count(),
output_path.display(),
);
let mut writer =
AprV2StreamingWriter::new(metadata).map_err(|e| AprenderError::FormatError {
message: format!("Failed to create streaming writer: {e}"),
})?;
let mut total_tensors = 0usize;
let mut f16_passthrough = 0usize;
let mut has_lm_head = false;
let mut embed_info: Option<(String, String)> = None;
for shard_file in index.shard_files() {
let shard_path = base_dir.join(shard_file);
if !shard_path.exists() {
return Err(AprenderError::FormatError {
message: format!(
"Shard file {} not found at {}",
shard_file,
shard_path.display()
),
});
}
let mapped =
MappedSafeTensors::open(&shard_path).map_err(|e| AprenderError::FormatError {
message: format!("Failed to mmap shard {shard_file}: {e}"),
})?;
let names: Vec<String> = mapped
.tensor_names()
.iter()
.map(|&s| (*s).to_string())
.collect();
let mut shard_f16 = 0usize;
for name in &names {
if name.starts_with("__") {
continue;
}
let meta = mapped
.get_metadata(name)
.ok_or_else(|| AprenderError::FormatError {
message: format!("Tensor metadata not found for '{name}'"),
})?;
let mapped_name = metadata_arch.map_name(name);
if mapped_name == "lm_head.weight" || mapped_name == "output.weight" {
has_lm_head = true;
}
if mapped_name.contains("embed_tokens.weight")
|| mapped_name == "token_embd.weight"
|| mapped_name == "wte.weight"
{
embed_info = Some((shard_file.clone(), name.clone()));
}
let is_bf16 = meta.dtype == "BF16";
let is_f16 = meta.dtype == "F16" || is_bf16;
if is_f16 && matches!(options.quantize, None | Some(QuantizationType::Fp16)) {
if let Some(raw_bytes) = mapped.get_tensor_bytes(name) {
writer
.add_raw_f16_tensor(&mapped_name, meta.shape.clone(), raw_bytes, is_bf16)
.map_err(|e| AprenderError::FormatError {
message: format!("Failed to write tensor '{mapped_name}': {e}"),
})?;
shard_f16 += 1;
total_tensors += 1;
continue;
}
}
let data = mapped
.get_tensor(name)
.map_err(|e| AprenderError::FormatError {
message: format!("Failed to extract tensor '{name}': {e}"),
})?;
streaming_dispatch_quantize(
&mut writer,
&mapped_name,
&data,
meta.shape.clone(),
options.quantize,
)
.map_err(|e| AprenderError::FormatError {
message: format!("Failed to write tensor '{mapped_name}': {e}"),
})?;
total_tensors += 1;
}
let shard_quantized = names.len() - shard_f16;
f16_passthrough += shard_f16;
eprintln!(
"[realizar#136] Shard {shard_file}: {} tensors ({shard_f16} F16 passthrough, {shard_quantized} quantized)",
names.len(),
);
}
if !has_lm_head {
if let Some((embed_shard, embed_name)) = &embed_info {
let shard_path = base_dir.join(embed_shard);
let mapped =
MappedSafeTensors::open(&shard_path).map_err(|e| AprenderError::FormatError {
message: format!("Failed to re-mmap shard for weight tying: {e}"),
})?;
let meta =
mapped
.get_metadata(embed_name)
.ok_or_else(|| AprenderError::FormatError {
message: "Embedding tensor metadata not found for weight tying".to_string(),
})?;
let data = mapped
.get_tensor(embed_name)
.map_err(|e| AprenderError::FormatError {
message: format!("Failed to extract embedding for weight tying: {e}"),
})?;
streaming_dispatch_quantize(
&mut writer,
"lm_head.weight",
&data,
meta.shape.clone(),
options.quantize,
)
.map_err(|e| AprenderError::FormatError {
message: format!("Failed to write tied lm_head.weight: {e}"),
})?;
total_tensors += 1;
eprintln!("[realizar#136] Weight tying: duplicated {embed_name} → lm_head.weight");
}
}
let total_quantized = total_tensors - f16_passthrough;
eprintln!(
"[realizar#136] Streaming write complete: {} tensors ({} F16 passthrough, {} quantized), {:.1} GB data",
total_tensors,
f16_passthrough,
total_quantized,
writer.data_bytes_written() as f64 / 1_073_741_824.0,
);
writer
.finalize(output_path)
.map_err(|e| AprenderError::FormatError {
message: format!("Failed to finalize APR file: {e}"),
})?;
let file_size = fs::metadata(output_path).map(|m| m.len()).unwrap_or(0);
eprintln!(
"[realizar#136] Written {} ({:.1} GB)",
output_path.display(),
file_size as f64 / 1_073_741_824.0,
);
Ok(ValidationReport::new())
}
include!("import_include_01.rs");