ternlang_compress/
format.rs

1// Model format I/O — read source models, write .tern files.
2//
3// Currently two stubs are provided:
4//   - GgufLoader   — reads GGUF files (used by Ollama / llama.cpp)
5//   - SafeTensorsLoader — reads HuggingFace safetensors
6//
7// Both are thin interfaces.  The actual parsing is deferred to candle's
8// readers or purpose-built crates once this crate reaches Phase 12 integration.
9// Enable the `gguf` or `safetensors` Cargo feature to activate each loader.
10//
11// The .tern output format is bincode-serialised TernModel (see model.rs).
12// This keeps it self-contained and dependency-light for now.  A proper
13// GGUF-Q-ternary type can be registered once llama.cpp upstream ternary quant
14// lands, at which point we swap bincode → GGUF writer here.
15
16use std::io::{Read, Write};
17use std::path::Path;
18use anyhow::Result;
19use crate::model::TernModel;
20
21// ─── .tern writer / reader ────────────────────────────────────────────────────
22
23/// Write a TernModel to a `.tern` file (bincode format).
24pub fn write_tern<W: Write>(writer: &mut W, model: &TernModel) -> Result<()> {
25    let bytes = bincode::serialize(model)?;
26    writer.write_all(&bytes)?;
27    Ok(())
28}
29
30/// Read a TernModel from a `.tern` file.
31pub fn read_tern<R: Read>(reader: &mut R) -> Result<TernModel> {
32    let mut bytes = Vec::new();
33    reader.read_to_end(&mut bytes)?;
34    let model = bincode::deserialize(&bytes)?;
35    Ok(model)
36}
37
38/// Convenience: write to a file path.
39pub fn save_tern(path: &Path, model: &TernModel) -> Result<()> {
40    let mut f = std::fs::File::create(path)?;
41    write_tern(&mut f, model)
42}
43
44/// Convenience: read from a file path.
45pub fn load_tern(path: &Path) -> Result<TernModel> {
46    let mut f = std::fs::File::open(path)?;
47    read_tern(&mut f)
48}
49
50// ─── GGUF loader stub ─────────────────────────────────────────────────────────
51
52/// Loads a GGUF model and returns its weight tensors as `(name, f32_weights, shape)`.
53///
54/// TODO (Phase 12): Implement using candle's GGUF reader or a direct parser.
55///       The GGUF format stores tensors with their quant type, shape, and data.
56///       For now this is a stub — the real implementation should:
57///         1. Open the file (mmap for large models)
58///         2. Parse GGUF header (magic + metadata KV + tensor index)
59///         3. For each tensor: dequantize to f32, return (name, data, shape)
60///
61/// Feature gate: compile with `--features gguf` once implemented.
62#[allow(dead_code)]
63pub fn load_gguf(_path: &Path) -> Result<Vec<(String, Vec<f32>, Vec<usize>)>> {
64    // Dequantization note: GGUF supports Q4_0, Q4_1, Q8_0, F16, F32, etc.
65    // We dequant to f32 first, then re-quantize to ternary.
66    // Direct F16→ternary (without f32 intermediate) would reduce peak memory —
67    // worth implementing once the pipeline is validated on Llama-3.2-1B.
68    anyhow::bail!(
69        "GGUF loader not yet implemented. \
70         Enable the `gguf` feature and implement load_gguf() in format.rs. \
71         See TODO comment above for the implementation guide."
72    )
73}
74
75// ─── SafeTensors loader stub ──────────────────────────────────────────────────
76
77/// Loads a HuggingFace safetensors model directory.
78///
79/// TODO (Phase 12): Implement using the `safetensors` crate (MIT licensed).
80///       Typical layout: model.safetensors or model-00001-of-NNNNN.safetensors.
81///       The safetensors format is straightforward: a JSON header + raw tensor data.
82///
83/// Feature gate: compile with `--features safetensors` once implemented.
84#[allow(dead_code)]
85pub fn load_safetensors(_dir: &Path) -> Result<Vec<(String, Vec<f32>, Vec<usize>)>> {
86    anyhow::bail!(
87        "SafeTensors loader not yet implemented. \
88         Enable the `safetensors` feature and implement load_safetensors() in format.rs."
89    )
90}
91
92// ─── Mock loader for testing ──────────────────────────────────────────────────
93
94/// Generate a synthetic model for testing the pipeline without a real model file.
95/// Produces `n_layers` weight matrices of shape `(hidden, hidden)` with random-ish values.
96pub fn synthetic_layers(
97    n_layers: usize,
98    hidden:   usize,
99    seed:     u64,
100) -> Vec<(String, Vec<f32>, Vec<usize>)> {
101    // Simple deterministic LCG for reproducible tests (no rand dep needed)
102    let mut state = seed.wrapping_add(1);
103    let mut next_f32 = move || -> f32 {
104        state = state.wrapping_mul(6364136223846793005).wrapping_add(1442695040888963407);
105        let bits = (state >> 33) as u32;
106        // Map to [-2.0, 2.0] to get a realistic weight distribution
107        (bits as f32 / u32::MAX as f32) * 4.0 - 2.0
108    };
109
110    (0..n_layers).map(|i| {
111        let name    = format!("model.layers.{i}.mlp.weight");
112        let weights: Vec<f32> = (0..hidden * hidden).map(|_| next_f32()).collect();
113        let shape   = vec![hidden, hidden];
114        (name, weights, shape)
115    }).collect()
116}
ternlang_compress/format.rs

ternlang_compress/
format.rs