use std::io::{Cursor, Write};
use realizar::gguf::{GGUFModel, GGUFValue, GGUF_MAGIC, GGUF_TYPE_F32, GGUF_TYPE_Q4_0};
fn main() {
println!("=== GGUF Loading Example ===\n");
println!("--- Creating Example GGUF File ---");
let gguf_data = create_example_gguf_model();
println!("Created GGUF file: {} bytes", gguf_data.len());
println!();
println!("--- Loading GGUF Model ---");
let model = GGUFModel::from_bytes(&gguf_data).expect("Failed to load GGUF model");
println!("Successfully loaded model");
println!(" - Number of tensors: {}", model.tensors.len());
println!(" - Metadata entries: {}", model.metadata.len());
println!();
println!("--- GGUF Header ---");
println!(
"Magic: 0x{:08X} ({})",
model.header.magic,
if model.header.magic == GGUF_MAGIC {
"valid ✓"
} else {
"invalid ✗"
}
);
println!("Version: {}", model.header.version);
println!("Tensor count: {}", model.header.tensor_count);
println!("Metadata count: {}", model.header.metadata_count);
println!();
println!("--- Metadata ---");
for (key, value) in &model.metadata {
match value {
GGUFValue::String(s) => println!("{}: \"{}\"", key, s),
GGUFValue::UInt32(v) => println!("{}: {}", key, v),
GGUFValue::UInt64(v) => println!("{}: {}", key, v),
GGUFValue::Float32(v) => println!("{}: {}", key, v),
GGUFValue::Bool(v) => println!("{}: {}", key, v),
GGUFValue::Array(arr) => println!("{}: [array with {} elements]", key, arr.len()),
_ => println!("{}: {:?}", key, value),
}
}
println!();
println!("--- Tensor Information ---");
for tensor in &model.tensors {
println!("Tensor: {}", tensor.name);
println!(" - Dimensions: {:?}", tensor.dims);
println!(
" - Quantization type: {} ({})",
tensor.qtype,
match tensor.qtype {
GGUF_TYPE_F32 => "F32 (unquantized)",
GGUF_TYPE_Q4_0 => "Q4_0 (4-bit)",
_ => "other",
}
);
let total_elements: u64 = tensor.dims.iter().product();
println!(" - Total elements: {}", total_elements);
println!(" - Data offset: {}", tensor.offset);
println!();
}
println!("--- Extracting Tensor Data ---");
match model.get_tensor_f32("embedding.weight", &gguf_data) {
Ok(weights) => {
println!("Embedding weights (F32):");
println!(" - Length: {}", weights.len());
println!(" - First 5 values: {:?}", &weights[..5.min(weights.len())]);
println!(
" - Stats: min={:.4}, max={:.4}, mean={:.4}",
weights.iter().copied().fold(f32::INFINITY, f32::min),
weights.iter().copied().fold(f32::NEG_INFINITY, f32::max),
weights.iter().sum::<f32>() / weights.len() as f32
);
},
Err(e) => println!(" Failed to extract embedding.weight: {}", e),
}
println!();
match model.get_tensor_f32("layer.0.weight", &gguf_data) {
Ok(dequantized) => {
println!("Layer 0 weights (Q4_0 dequantized to F32):");
println!(" - Length: {}", dequantized.len());
println!(
" - First 5 values: {:?}",
&dequantized[..5.min(dequantized.len())]
);
println!(
" - Stats: min={:.4}, max={:.4}, mean={:.4}",
dequantized.iter().copied().fold(f32::INFINITY, f32::min),
dequantized
.iter()
.copied()
.fold(f32::NEG_INFINITY, f32::max),
dequantized.iter().sum::<f32>() / dequantized.len() as f32
);
},
Err(e) => println!(" Failed to extract layer.0.weight: {}", e),
}
println!();
println!("--- Summary ---");
println!("GGUF format features demonstrated:");
println!(" ✓ Binary format parsing (magic number, version)");
println!(" ✓ Metadata extraction (key-value pairs)");
println!(" ✓ Tensor information (shapes, types, offsets)");
println!(" ✓ Unquantized tensor data (F32)");
println!(" ✓ Quantized tensor data (Q4_0 with dequantization)");
println!();
println!("This demonstrates llama.cpp/Ollama model compatibility!");
println!();
println!("=== GGUF Loading Complete ===");
}
fn create_example_gguf_model() -> Vec<u8> {
let mut buffer = Vec::new();
let mut cursor = Cursor::new(&mut buffer);
cursor
.write_all(&GGUF_MAGIC.to_le_bytes())
.expect("Failed to write GGUF magic number");
cursor
.write_all(&3u32.to_le_bytes())
.expect("Failed to write GGUF version");
cursor
.write_all(&2u64.to_le_bytes())
.expect("Failed to write tensor count");
cursor
.write_all(&3u64.to_le_bytes())
.expect("Failed to write metadata count");
write_string(&mut cursor, "model.name");
cursor
.write_all(&8u32.to_le_bytes())
.expect("Failed to write metadata type for model.name");
write_string(&mut cursor, "demo-model");
write_string(&mut cursor, "vocab.size");
cursor
.write_all(&4u32.to_le_bytes())
.expect("Failed to write metadata type for vocab.size");
cursor
.write_all(&1000u32.to_le_bytes())
.expect("Failed to write vocab size value");
write_string(&mut cursor, "hidden.size");
cursor
.write_all(&4u32.to_le_bytes())
.expect("Failed to write metadata type for hidden.size");
cursor
.write_all(&256u32.to_le_bytes())
.expect("Failed to write hidden size value");
write_string(&mut cursor, "embedding.weight");
cursor
.write_all(&2u32.to_le_bytes())
.expect("Failed to write n_dims for embedding.weight");
cursor
.write_all(&1000u64.to_le_bytes())
.expect("Failed to write dim 0 for embedding.weight");
cursor
.write_all(&256u64.to_le_bytes())
.expect("Failed to write dim 1 for embedding.weight");
cursor
.write_all(&GGUF_TYPE_F32.to_le_bytes())
.expect("Failed to write tensor type for embedding.weight");
cursor
.write_all(&0u64.to_le_bytes())
.expect("Failed to write offset for embedding.weight");
write_string(&mut cursor, "layer.0.weight");
cursor
.write_all(&2u32.to_le_bytes())
.expect("Failed to write n_dims for layer.0.weight");
cursor
.write_all(&256u64.to_le_bytes())
.expect("Failed to write dim 0 for layer.0.weight");
cursor
.write_all(&256u64.to_le_bytes())
.expect("Failed to write dim 1 for layer.0.weight");
cursor
.write_all(&GGUF_TYPE_Q4_0.to_le_bytes())
.expect("Failed to write tensor type for layer.0.weight");
let tensor1_size = 1000 * 256 * 4;
cursor
.write_all(&(tensor1_size as u64).to_le_bytes())
.expect("Failed to write offset for layer.0.weight");
let alignment = 32;
let current_pos = buffer.len();
let padding = (alignment - (current_pos % alignment)) % alignment;
buffer.resize(current_pos + padding, 0);
for i in 0..1000 {
for j in 0..256 {
let value = ((i + j) as f32) * 0.01;
buffer.extend_from_slice(&value.to_le_bytes());
}
}
let num_elements = 256 * 256;
let num_blocks = num_elements / 32;
for _ in 0..num_blocks {
buffer.extend_from_slice(&1.0f32.to_le_bytes()[..2]);
buffer.extend(std::iter::repeat_n(0x12, 16)); }
buffer
}
fn write_string(cursor: &mut Cursor<&mut Vec<u8>>, s: &str) {
let bytes = s.as_bytes();
cursor
.write_all(&(bytes.len() as u64).to_le_bytes())
.expect("Failed to write string length");
cursor
.write_all(bytes)
.expect("Failed to write string bytes");
}