ternlang_compress/format.rs
1// Model format I/O — read source models, write .tern files.
2//
3// Currently two stubs are provided:
4// - GgufLoader — reads GGUF files (used by Ollama / llama.cpp)
5// - SafeTensorsLoader — reads HuggingFace safetensors
6//
7// Both are thin interfaces. The actual parsing is deferred to candle's
8// readers or purpose-built crates once this crate reaches Phase 12 integration.
9// Enable the `gguf` or `safetensors` Cargo feature to activate each loader.
10//
11// The .tern output format is bincode-serialised TernModel (see model.rs).
12// This keeps it self-contained and dependency-light for now. A proper
13// GGUF-Q-ternary type can be registered once llama.cpp upstream ternary quant
14// lands, at which point we swap bincode → GGUF writer here.
15
16use std::io::{Read, Write};
17use std::path::Path;
18use anyhow::Result;
19use crate::model::TernModel;
20
21// ─── .tern writer / reader ────────────────────────────────────────────────────
22
23/// Write a TernModel to a `.tern` file (bincode format).
24pub fn write_tern<W: Write>(writer: &mut W, model: &TernModel) -> Result<()> {
25 let bytes = bincode::serialize(model)?;
26 writer.write_all(&bytes)?;
27 Ok(())
28}
29
30/// Read a TernModel from a `.tern` file.
31pub fn read_tern<R: Read>(reader: &mut R) -> Result<TernModel> {
32 let mut bytes = Vec::new();
33 reader.read_to_end(&mut bytes)?;
34 let model = bincode::deserialize(&bytes)?;
35 Ok(model)
36}
37
38/// Convenience: write to a file path.
39pub fn save_tern(path: &Path, model: &TernModel) -> Result<()> {
40 let mut f = std::fs::File::create(path)?;
41 write_tern(&mut f, model)
42}
43
44/// Convenience: read from a file path.
45pub fn load_tern(path: &Path) -> Result<TernModel> {
46 let mut f = std::fs::File::open(path)?;
47 read_tern(&mut f)
48}
49
50// ─── GGUF loader stub ─────────────────────────────────────────────────────────
51
52/// Loads a GGUF model and returns its weight tensors as `(name, f32_weights, shape)`.
53///
54/// TODO (Phase 12): Implement using candle's GGUF reader or a direct parser.
55/// The GGUF format stores tensors with their quant type, shape, and data.
56/// For now this is a stub — the real implementation should:
57/// 1. Open the file (mmap for large models)
58/// 2. Parse GGUF header (magic + metadata KV + tensor index)
59/// 3. For each tensor: dequantize to f32, return (name, data, shape)
60///
61/// Feature gate: compile with `--features gguf` once implemented.
62#[allow(dead_code)]
63pub fn load_gguf(_path: &Path) -> Result<Vec<(String, Vec<f32>, Vec<usize>)>> {
64 // Dequantization note: GGUF supports Q4_0, Q4_1, Q8_0, F16, F32, etc.
65 // We dequant to f32 first, then re-quantize to ternary.
66 // Direct F16→ternary (without f32 intermediate) would reduce peak memory —
67 // worth implementing once the pipeline is validated on Llama-3.2-1B.
68 anyhow::bail!(
69 "GGUF loader not yet implemented. \
70 Enable the `gguf` feature and implement load_gguf() in format.rs. \
71 See TODO comment above for the implementation guide."
72 )
73}
74
75// ─── SafeTensors loader stub ──────────────────────────────────────────────────
76
77/// Loads a HuggingFace safetensors model directory.
78///
79/// TODO (Phase 12): Implement using the `safetensors` crate (MIT licensed).
80/// Typical layout: model.safetensors or model-00001-of-NNNNN.safetensors.
81/// The safetensors format is straightforward: a JSON header + raw tensor data.
82///
83/// Feature gate: compile with `--features safetensors` once implemented.
84#[allow(dead_code)]
85pub fn load_safetensors(_dir: &Path) -> Result<Vec<(String, Vec<f32>, Vec<usize>)>> {
86 anyhow::bail!(
87 "SafeTensors loader not yet implemented. \
88 Enable the `safetensors` feature and implement load_safetensors() in format.rs."
89 )
90}
91
92// ─── Mock loader for testing ──────────────────────────────────────────────────
93
94/// Generate a synthetic model for testing the pipeline without a real model file.
95/// Produces `n_layers` weight matrices of shape `(hidden, hidden)` with random-ish values.
96pub fn synthetic_layers(
97 n_layers: usize,
98 hidden: usize,
99 seed: u64,
100) -> Vec<(String, Vec<f32>, Vec<usize>)> {
101 // Simple deterministic LCG for reproducible tests (no rand dep needed)
102 let mut state = seed.wrapping_add(1);
103 let mut next_f32 = move || -> f32 {
104 state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
105 let bits = (state >> 33) as u32;
106 // Map to [-2.0, 2.0] to get a realistic weight distribution
107 (bits as f32 / u32::MAX as f32) * 4.0 - 2.0
108 };
109
110 (0..n_layers).map(|i| {
111 let name = format!("model.layers.{i}.mlp.weight");
112 let weights: Vec<f32> = (0..hidden * hidden).map(|_| next_f32()).collect();
113 let shape = vec![hidden, hidden];
114 (name, weights, shape)
115 }).collect()
116}