use std::collections::BTreeMap;
use std::path::Path;
use crate::error::Result;
use super::{GgufReader, TensorDataMap};
pub fn load_gguf_tensors<P: AsRef<Path>>(path: P) -> Result<TensorDataMap> {
let reader = GgufReader::from_file(path)?;
reader.get_all_tensors_f32()
}
#[derive(Debug, Clone, Default)]
pub struct GgufTokenizer {
pub vocabulary: Vec<String>,
pub merges: Vec<String>,
pub model_type: Option<String>,
pub bos_token_id: Option<u32>,
pub eos_token_id: Option<u32>,
pub architecture: Option<String>,
pub model_name: Option<String>,
pub token_type: Vec<i32>,
pub padding_token_id: Option<u32>,
pub add_bos_token: Option<bool>,
pub chat_template: Option<String>,
pub pre_type: Option<String>,
pub scores: Vec<f32>,
}
impl GgufTokenizer {
#[must_use]
pub fn has_vocabulary(&self) -> bool {
!self.vocabulary.is_empty()
}
#[must_use]
pub fn vocab_size(&self) -> usize {
self.vocabulary.len()
}
}
#[derive(Debug, Clone, Default)]
pub struct GgufModelConfig {
pub architecture: Option<String>,
pub hidden_size: Option<usize>,
pub num_layers: Option<usize>,
pub num_heads: Option<usize>,
pub num_kv_heads: Option<usize>,
pub vocab_size: Option<usize>,
pub intermediate_size: Option<usize>,
pub max_position_embeddings: Option<usize>,
pub rope_theta: Option<f32>,
pub rms_norm_eps: Option<f32>,
pub rope_type: Option<u32>,
pub head_dim: Option<usize>,
pub num_experts: Option<usize>,
pub num_experts_per_tok: Option<usize>,
pub moe_intermediate_size: Option<usize>,
}
impl GgufModelConfig {
pub fn warn_out_of_bounds(&self) {
contract_pre_architecture_config_invariants!();
Self::check_usize_bound(self.hidden_size, 1, 65_536, "hidden_size");
Self::check_usize_bound(self.num_layers, 1, 256, "num_layers");
Self::check_usize_bound(self.num_heads, 1, 256, "num_heads");
Self::check_usize_bound(self.num_kv_heads, 1, 256, "num_kv_heads");
Self::check_usize_bound(self.vocab_size, 1, 1_000_000, "vocab_size");
Self::check_usize_bound(self.intermediate_size, 1, 262_144, "intermediate_size");
Self::check_usize_bound(
self.max_position_embeddings,
0,
2_097_152,
"max_position_embeddings",
);
if let Some(theta) = self.rope_theta {
if theta > 0.0 && theta < 1.0 {
eprintln!(
"Warning: rope_theta {theta} below minimum 1.0 (model-metadata-bounds-v1)"
);
}
if theta > 100_000_000.0 {
eprintln!(
"Warning: rope_theta {theta} exceeds max 100000000.0 (model-metadata-bounds-v1)"
);
}
}
if let Some(eps) = self.rms_norm_eps {
if eps > 0.0 && eps < 1e-10 {
eprintln!(
"Warning: rms_norm_eps {eps} below minimum 1e-10 (model-metadata-bounds-v1)"
);
}
if eps > 0.01 {
eprintln!(
"Warning: rms_norm_eps {eps} exceeds max 0.01 (model-metadata-bounds-v1)"
);
}
}
contract_post_architecture_config_invariants!(&());
}
fn check_usize_bound(value: Option<usize>, min: usize, max: usize, field: &str) {
if let Some(v) = value {
if v < min {
eprintln!("Warning: {field} {v} below minimum {min} (model-metadata-bounds-v1)");
}
if v > max {
eprintln!("Warning: {field} {v} exceeds max {max} (model-metadata-bounds-v1)");
}
}
}
}
#[derive(Debug)]
pub struct GgufLoadResult {
pub tensors: TensorDataMap,
pub tokenizer: GgufTokenizer,
pub model_config: GgufModelConfig,
}
pub fn load_gguf_with_tokenizer<P: AsRef<Path>>(path: P) -> Result<GgufLoadResult> {
let reader = GgufReader::from_file(path)?;
let tensors = reader.get_all_tensors_f32()?;
let tokenizer = GgufTokenizer {
vocabulary: reader.vocabulary().unwrap_or_else(Vec::new),
merges: reader.merges().unwrap_or_else(Vec::new),
model_type: reader.tokenizer_model(),
bos_token_id: reader.bos_token_id(),
eos_token_id: reader.eos_token_id(),
architecture: reader.architecture(),
model_name: reader.model_name(),
pre_type: reader.pre_tokenizer_type(),
..Default::default()
};
let arch = reader.architecture();
let rope_type = match arch.as_deref() {
Some("qwen2" | "qwen2.5" | "qwen") => Some(2), _ => Some(0), };
let model_config = GgufModelConfig {
architecture: arch,
hidden_size: reader.hidden_size(),
num_layers: reader.num_layers(),
num_heads: reader.num_heads(),
num_kv_heads: reader.num_kv_heads(),
vocab_size: reader.vocab_size(),
intermediate_size: reader.intermediate_size(),
max_position_embeddings: reader.context_length(),
rope_theta: reader.rope_theta(),
rms_norm_eps: reader.rms_norm_eps(),
rope_type,
head_dim: None,
num_experts: None,
num_experts_per_tok: None,
moe_intermediate_size: None,
};
Ok(GgufLoadResult {
tensors,
tokenizer,
model_config,
})
}
#[derive(Debug, Clone)]
pub struct GgufRawTensor {
pub data: Vec<u8>,
pub shape: Vec<usize>,
pub dtype: u32,
}
#[derive(Debug)]
pub struct GgufRawLoadResult {
pub tensors: BTreeMap<String, GgufRawTensor>,
pub tokenizer: GgufTokenizer,
pub model_config: GgufModelConfig,
pub raw_metadata: BTreeMap<String, String>,
}
pub fn load_gguf_raw<P: AsRef<Path>>(path: P) -> Result<GgufRawLoadResult> {
let reader = GgufReader::from_file(path)?;
let raw_tensors = reader.get_all_tensors_raw()?;
let mut tensors = BTreeMap::new();
for (name, (data, shape, dtype)) in raw_tensors {
tensors.insert(name, GgufRawTensor { data, shape, dtype });
}
let tokenizer = GgufTokenizer {
vocabulary: reader.vocabulary().unwrap_or_else(Vec::new),
merges: reader.merges().unwrap_or_else(Vec::new),
model_type: reader.tokenizer_model(),
bos_token_id: reader.bos_token_id(),
eos_token_id: reader.eos_token_id(),
architecture: reader.architecture(),
model_name: reader.model_name(),
token_type: reader.token_type().unwrap_or_default(),
padding_token_id: reader.padding_token_id(),
add_bos_token: reader.add_bos_token(),
chat_template: reader.chat_template(),
pre_type: reader.pre_tokenizer_type(),
scores: Vec::new(),
};
let arch = reader.architecture();
let rope_type = match arch.as_deref() {
Some("qwen2" | "qwen2.5" | "qwen") => Some(2), _ => Some(0), };
let model_config = GgufModelConfig {
architecture: arch,
hidden_size: reader.hidden_size(),
num_layers: reader.num_layers(),
num_heads: reader.num_heads(),
num_kv_heads: reader.num_kv_heads(),
vocab_size: reader.vocab_size(),
intermediate_size: reader.intermediate_size(),
max_position_embeddings: reader.context_length(),
rope_theta: reader.rope_theta(),
rms_norm_eps: reader.rms_norm_eps(),
rope_type,
head_dim: None,
num_experts: None,
num_experts_per_tok: None,
moe_intermediate_size: None,
};
let raw_metadata: BTreeMap<String, String> = reader
.metadata
.iter()
.map(|(k, v)| (k.clone(), gguf_value_display(v)))
.collect();
Ok(GgufRawLoadResult {
tensors,
tokenizer,
model_config,
raw_metadata,
})
}
fn gguf_value_display(v: &crate::format::gguf::types::GgufValue) -> String {
use crate::format::gguf::types::GgufValue;
match v {
GgufValue::Uint8(n) => n.to_string(),
GgufValue::Int8(n) => n.to_string(),
GgufValue::Uint16(n) => n.to_string(),
GgufValue::Int16(n) => n.to_string(),
GgufValue::Uint32(n) => n.to_string(),
GgufValue::Int32(n) => n.to_string(),
GgufValue::Float32(n) => n.to_string(),
GgufValue::Bool(b) => b.to_string(),
GgufValue::String(s) => s.clone(),
GgufValue::Uint64(n) => n.to_string(),
GgufValue::Int64(n) => n.to_string(),
GgufValue::Float64(n) => n.to_string(),
GgufValue::ArrayUint32(a) => format!("[len={}]", a.len()),
GgufValue::ArrayInt32(a) => format!("[len={}]", a.len()),
GgufValue::ArrayFloat32(a) => format!("[len={}]", a.len()),
GgufValue::ArrayString(a) => format!("[len={}]", a.len()),
}
}
#[cfg(test)]
#[path = "api_tests.rs"]
mod tests;