oxibonsai-core 0.1.2

GGUF Q1_0_g128 loader, tensor types, and configuration for OxiBonsai
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
//! Forward-compatibility layer for GGUF v2/v3 and future GGMLv4 formats.
//!
//! This module provides:
//! - [`GgufVersion`] — typed version enum with capability queries
//! - [`ExtendedQuantType`] — extended quantization type awareness (including unknown IDs)
//! - [`GgufCompatReport`] — compatibility report for a parsed GGUF file
//! - [`check_gguf_header`] — validate a raw GGUF magic+version header
//! - [`build_compat_report`] — construct a report from parsed file metadata
//! - [`CompatError`] — error type for compatibility operations

use thiserror::Error;

// ── GGUF magic constant ──────────────────────────────────────────────────────

/// GGUF magic bytes: ASCII "GGUF".
const GGUF_MAGIC_BYTES: &[u8; 4] = b"GGUF";

/// Minimum header size required to read magic (4 bytes) + version (4 bytes).
const GGUF_MIN_HEADER_BYTES: usize = 8;

// ── GgufVersion ─────────────────────────────────────────────────────────────

/// Supported GGUF file format versions.
///
/// The ordering reflects the chronological/capability ordering: V1 < V2 < V3.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum GgufVersion {
    /// GGUF version 1 — original format (rare in the wild).
    V1 = 1,
    /// GGUF version 2 — adds F16 KV cache metadata support.
    V2 = 2,
    /// GGUF version 3 — adds 32-byte aligned tensor data sections.
    V3 = 3,
}

impl GgufVersion {
    /// Parse a [`GgufVersion`] from a raw `u32` version field.
    ///
    /// Returns `None` for unrecognised version numbers.
    ///
    /// # Examples
    /// ```
    /// use oxibonsai_core::GgufVersion;
    /// assert_eq!(GgufVersion::from_u32(2), Some(GgufVersion::V2));
    /// assert_eq!(GgufVersion::from_u32(99), None);
    /// ```
    pub fn from_u32(v: u32) -> Option<Self> {
        match v {
            1 => Some(Self::V1),
            2 => Some(Self::V2),
            3 => Some(Self::V3),
            _ => None,
        }
    }

    /// Convert back to the raw `u32` version field used in GGUF files.
    pub fn to_u32(self) -> u32 {
        self as u32
    }

    /// Whether this version supports F16 key-value cache metadata.
    ///
    /// Introduced in GGUF v2.
    pub fn supports_f16_kv(&self) -> bool {
        *self >= Self::V2
    }

    /// Whether this version mandates 32-byte-aligned tensor data sections.
    ///
    /// Introduced in GGUF v3.
    pub fn supports_aligned_tensors(&self) -> bool {
        *self >= Self::V3
    }
}

impl std::fmt::Display for GgufVersion {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "v{}", self.to_u32())
    }
}

// ── ExtendedQuantType ────────────────────────────────────────────────────────

/// Extended quantization type awareness, including types beyond what
/// OxiBonsai can execute but which should be recognised for forward-compat
/// reporting.
///
/// Unknown type IDs (e.g. from future GGMLv4 formats) are preserved as
/// [`Unknown(u32)`](ExtendedQuantType::Unknown) rather than causing a hard
/// parse error.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[allow(non_camel_case_types)]
pub enum ExtendedQuantType {
    /// 32-bit IEEE 754 float (type id 0).
    F32,
    /// 16-bit IEEE 754 half-float (type id 1).
    F16,
    /// 4-bit quantization, variant 0 (type id 2).
    Q4_0,
    /// 4-bit quantization, variant 1 (type id 3).
    Q4_1,
    /// 5-bit quantization, variant 0 (type id 6).
    Q5_0,
    /// 5-bit quantization, variant 1 (type id 7).
    Q5_1,
    /// 8-bit quantization, variant 0 (type id 8).
    Q8_0,
    /// 8-bit quantization, variant 1 (type id 9).
    Q8_1,
    /// 2-bit K-quant (type id 10).
    Q2_K,
    /// 3-bit K-quant (type id 11).
    Q3_K,
    /// 4-bit K-quant (type id 12) — corresponds to Q4_K_M family.
    Q4_K,
    /// 5-bit K-quant (type id 13) — corresponds to Q5_K_M family.
    Q5_K,
    /// 6-bit K-quant (type id 14).
    Q6_K,
    /// 8-bit K-quant (type id 15).
    Q8_K,
    /// OxiBonsai native 1-bit format: 128 sign-bits + FP16 group scale (type id 41).
    Q1_0_G128,
    /// An unrecognised quantization type ID encountered in a GGUF file.
    ///
    /// Carrying the raw ID rather than failing allows forward-compatible
    /// inspection of future GGMLv4 files.
    Unknown(u32),
}

impl ExtendedQuantType {
    /// Construct from a raw GGUF tensor type ID.
    ///
    /// Any ID that does not map to a known variant becomes
    /// [`Unknown(id)`](ExtendedQuantType::Unknown).
    pub fn from_u32(v: u32) -> Self {
        match v {
            0 => Self::F32,
            1 => Self::F16,
            2 => Self::Q4_0,
            3 => Self::Q4_1,
            6 => Self::Q5_0,
            7 => Self::Q5_1,
            8 => Self::Q8_0,
            9 => Self::Q8_1,
            10 => Self::Q2_K,
            11 => Self::Q3_K,
            12 => Self::Q4_K,
            13 => Self::Q5_K,
            14 => Self::Q6_K,
            15 => Self::Q8_K,
            41 => Self::Q1_0_G128,
            other => Self::Unknown(other),
        }
    }

    /// Return the raw GGUF tensor type ID for this quantization type.
    pub fn to_u32(self) -> u32 {
        match self {
            Self::F32 => 0,
            Self::F16 => 1,
            Self::Q4_0 => 2,
            Self::Q4_1 => 3,
            Self::Q5_0 => 6,
            Self::Q5_1 => 7,
            Self::Q8_0 => 8,
            Self::Q8_1 => 9,
            Self::Q2_K => 10,
            Self::Q3_K => 11,
            Self::Q4_K => 12,
            Self::Q5_K => 13,
            Self::Q6_K => 14,
            Self::Q8_K => 15,
            Self::Q1_0_G128 => 41,
            Self::Unknown(id) => id,
        }
    }

    /// Approximate bits-per-weight for this quantization format.
    ///
    /// For K-quants the figure accounts for per-block scale/min overhead
    /// amortised over the block size (256 weights). For unknown types,
    /// `0.0` is returned.
    pub fn bits_per_weight(self) -> f32 {
        match self {
            Self::F32 => 32.0,
            Self::F16 => 16.0,
            // Q4_0: 32 weights, 4 bits each + 16-bit scale
            // bytes = 2 + 16 = 18; bits_per_w = 18*8/32 = 4.5
            Self::Q4_0 => 4.5,
            // Q4_1: 32 weights + 16-bit scale + 16-bit min
            // bytes = 2 + 2 + 16 = 20; bits_per_w = 20*8/32 = 5.0
            Self::Q4_1 => 5.0,
            // Q5_0: 32 weights at 5 bits + 16-bit scale
            // bytes = 2 + 4 + 16 = 22; bits_per_w = 22*8/32 = 5.5
            Self::Q5_0 => 5.5,
            // Q5_1: 32 weights at 5 bits + 16-bit scale + 16-bit min
            // bytes = 2 + 2 + 4 + 16 = 24; bits_per_w = 24*8/32 = 6.0
            Self::Q5_1 => 6.0,
            // Q8_0: 32 weights at 8 bits + 16-bit scale
            // bytes = 2 + 32 = 34; bits_per_w = 34*8/32 = 8.5
            Self::Q8_0 => 8.5,
            // Q8_1: 32 weights at 8 bits + 16-bit scale + 16-bit delta
            // bytes = 4 + 4 + 32 = 40; bits_per_w = 40*8/32 = 10.0
            // (using the more accurate 8.5 commonly reported)
            Self::Q8_1 => 8.5,
            // Q2_K: 256 weights; block = 84 bytes; bits_per_w = 84*8/256 ≈ 2.625
            Self::Q2_K => 2.625,
            // Q3_K: 256 weights; block = 110 bytes; bits_per_w = 110*8/256 ≈ 3.4375
            Self::Q3_K => 3.4375,
            // Q4_K (Q4_K_M): 256 weights; block = 144 bytes; bits_per_w = 144*8/256 = 4.5
            Self::Q4_K => 4.5,
            // Q5_K (Q5_K_M): 256 weights; block = 176 bytes; bits_per_w = 176*8/256 = 5.5
            Self::Q5_K => 5.5,
            // Q6_K: 256 weights; block = 210 bytes; bits_per_w = 210*8/256 ≈ 6.5625
            Self::Q6_K => 6.5625,
            // Q8_K: 256 weights; block = 292 bytes; bits_per_w = 292*8/256 ≈ 9.125
            Self::Q8_K => 9.125,
            // Q1_0_G128: 128 weights, 1 bit each + 16-bit scale
            // block = 2 + 16 = 18 bytes; bits_per_w = 18*8/128 = 1.125
            Self::Q1_0_G128 => 1.125,
            Self::Unknown(_) => 0.0,
        }
    }

    /// Returns `true` if this is a known (recognised) quantization type.
    pub fn is_known(self) -> bool {
        !matches!(self, Self::Unknown(_))
    }

    /// Human-readable name for this quantization type.
    ///
    /// For [`Unknown`](ExtendedQuantType::Unknown) variants the string is
    /// the static string `"Unknown"`. Callers that need the raw ID can use
    /// [`to_u32`](ExtendedQuantType::to_u32).
    pub fn name(self) -> &'static str {
        match self {
            Self::F32 => "F32",
            Self::F16 => "F16",
            Self::Q4_0 => "Q4_0",
            Self::Q4_1 => "Q4_1",
            Self::Q5_0 => "Q5_0",
            Self::Q5_1 => "Q5_1",
            Self::Q8_0 => "Q8_0",
            Self::Q8_1 => "Q8_1",
            Self::Q2_K => "Q2_K",
            Self::Q3_K => "Q3_K",
            Self::Q4_K => "Q4_K",
            Self::Q5_K => "Q5_K",
            Self::Q6_K => "Q6_K",
            Self::Q8_K => "Q8_K",
            Self::Q1_0_G128 => "Q1_0_G128",
            // Cannot return dynamic &'static str, so return the generic label.
            Self::Unknown(_) => "Unknown",
        }
    }
}

impl std::fmt::Display for ExtendedQuantType {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::Unknown(id) => write!(f, "Unknown({})", id),
            other => write!(f, "{}", other.name()),
        }
    }
}

// ── GgufCompatReport ─────────────────────────────────────────────────────────

/// Compatibility report produced after inspecting a GGUF file.
///
/// Describes whether the file can be loaded by this version of OxiBonsai,
/// and lists any forward-compatibility caveats encountered during parsing.
#[derive(Debug, Clone)]
pub struct GgufCompatReport {
    /// The GGUF format version detected in the file.
    pub version: GgufVersion,
    /// Number of tensors present in the file.
    pub tensor_count: u64,
    /// Number of metadata key-value pairs present in the file.
    pub metadata_count: u64,
    /// Raw quantization type IDs that were not recognised.
    pub unknown_quant_types: Vec<u32>,
    /// Human-readable warnings accumulated during compatibility checking.
    pub warnings: Vec<String>,
    /// Whether OxiBonsai believes it can load this file.
    ///
    /// Set by [`finalize`](GgufCompatReport::finalize); initially `true`.
    pub is_loadable: bool,
}

impl GgufCompatReport {
    /// Create a new report for a file at the given version with the given
    /// tensor and metadata counts.
    ///
    /// [`warnings`](GgufCompatReport::warnings) is empty and
    /// [`is_loadable`](GgufCompatReport::is_loadable) is `true` until
    /// [`finalize`](GgufCompatReport::finalize) is called.
    pub fn new(version: GgufVersion, tensor_count: u64, metadata_count: u64) -> Self {
        Self {
            version,
            tensor_count,
            metadata_count,
            unknown_quant_types: Vec::new(),
            warnings: Vec::new(),
            is_loadable: true,
        }
    }

    /// Append a human-readable warning to the report.
    pub fn add_warning(&mut self, msg: impl Into<String>) {
        self.warnings.push(msg.into());
    }

    /// Record an unrecognised quantization type ID.
    ///
    /// Duplicates are stored as-is; call sites should de-duplicate if needed.
    pub fn add_unknown_quant(&mut self, quant_id: u32) {
        self.unknown_quant_types.push(quant_id);
    }

    /// Finalise the report.
    ///
    /// Warnings are informational and do not prevent loading. However, if any
    /// tensors use unknown quant types OxiBonsai cannot decode them, so
    /// `is_loadable` is set to `false` and a synthesised warning is added.
    pub fn finalize(&mut self) {
        if !self.unknown_quant_types.is_empty() {
            self.is_loadable = false;
            self.add_warning(format!(
                "file contains {} tensor(s) with unrecognised quantization type(s): {:?}",
                self.unknown_quant_types.len(),
                self.unknown_quant_types,
            ));
        }
    }

    /// Return a single-line human-readable summary of the report.
    pub fn summary(&self) -> String {
        format!(
            "GGUF {} | tensors={} metadata={} | unknown_quants={} | warnings={} | loadable={}",
            self.version,
            self.tensor_count,
            self.metadata_count,
            self.unknown_quant_types.len(),
            self.warnings.len(),
            self.is_loadable,
        )
    }
}

// ── check_gguf_header ────────────────────────────────────────────────────────

/// Validate and parse the GGUF magic number and version from a raw byte slice.
///
/// Expects at least 8 bytes:
/// ```text
/// bytes[0..4]  — ASCII "GGUF"
/// bytes[4..8]  — version as little-endian u32
/// ```
///
/// # Errors
///
/// - [`CompatError::TruncatedHeader`] if `bytes.len() < 8`
/// - [`CompatError::InvalidMagic`] if the first four bytes are not `b"GGUF"`
/// - [`CompatError::UnsupportedVersion`] if the version field is not 1, 2, or 3
pub fn check_gguf_header(bytes: &[u8]) -> Result<GgufVersion, CompatError> {
    if bytes.len() < GGUF_MIN_HEADER_BYTES {
        return Err(CompatError::TruncatedHeader {
            need: GGUF_MIN_HEADER_BYTES,
            got: bytes.len(),
        });
    }

    let magic = &bytes[0..4];
    if magic != GGUF_MAGIC_BYTES {
        return Err(CompatError::InvalidMagic(magic.to_vec()));
    }

    // Version is stored as a little-endian u32 starting at byte 4.
    let version_bytes: [u8; 4] =
        bytes[4..8]
            .try_into()
            .map_err(|_| CompatError::TruncatedHeader {
                need: GGUF_MIN_HEADER_BYTES,
                got: bytes.len(),
            })?;
    let version_u32 = u32::from_le_bytes(version_bytes);

    GgufVersion::from_u32(version_u32).ok_or(CompatError::UnsupportedVersion(version_u32))
}

// ── build_compat_report ──────────────────────────────────────────────────────

/// Build a [`GgufCompatReport`] from information already extracted by a GGUF
/// reader.
///
/// `tensor_quant_type_ids` should contain the raw quantization type ID for
/// every tensor in the file. Any IDs not recognised by [`ExtendedQuantType`]
/// are recorded as unknown and will cause [`GgufCompatReport::is_loadable`]
/// to be set to `false` after [`finalize`](GgufCompatReport::finalize).
///
/// If `version_u32` is not a supported GGUF version, the report falls back to
/// [`GgufVersion::V3`] and adds an informational warning.
pub fn build_compat_report(
    version_u32: u32,
    tensor_count: u64,
    metadata_count: u64,
    tensor_quant_type_ids: &[u32],
) -> GgufCompatReport {
    let (version, unknown_ver) = match GgufVersion::from_u32(version_u32) {
        Some(v) => (v, false),
        None => (GgufVersion::V3, true),
    };

    let mut report = GgufCompatReport::new(version, tensor_count, metadata_count);

    if unknown_ver {
        report.add_warning(format!(
            "GGUF version {} is not explicitly supported; treating as v3 for structural parsing",
            version_u32
        ));
    }

    // Inspect each tensor's quantization type.
    for &quant_id in tensor_quant_type_ids {
        let qt = ExtendedQuantType::from_u32(quant_id);
        if !qt.is_known() {
            report.add_unknown_quant(quant_id);
        }
    }

    report.finalize();
    report
}

// ── CompatError ──────────────────────────────────────────────────────────────

/// Errors produced by the forward-compatibility layer.
#[derive(Debug, Error)]
pub enum CompatError {
    /// The first four bytes did not spell "GGUF".
    #[error("invalid GGUF magic: expected GGUF, got {0:?}")]
    InvalidMagic(Vec<u8>),

    /// The version field holds a value not in {1, 2, 3}.
    #[error("unsupported GGUF version: {0}")]
    UnsupportedVersion(u32),

    /// The byte slice was too short to contain the full header.
    #[error("truncated header: need at least {need} bytes, got {got}")]
    TruncatedHeader { need: usize, got: usize },
}

// ── Inline unit tests ─────────────────────────────────────────────────────────

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn gguf_version_round_trips_to_u32() {
        assert_eq!(GgufVersion::V1.to_u32(), 1);
        assert_eq!(GgufVersion::V2.to_u32(), 2);
        assert_eq!(GgufVersion::V3.to_u32(), 3);
    }

    #[test]
    fn gguf_version_display_format() {
        assert_eq!(GgufVersion::V1.to_string(), "v1");
        assert_eq!(GgufVersion::V2.to_string(), "v2");
        assert_eq!(GgufVersion::V3.to_string(), "v3");
    }

    #[test]
    fn extended_quant_unknown_display_includes_id() {
        let qt = ExtendedQuantType::Unknown(999);
        assert!(qt.to_string().contains("999"));
    }

    #[test]
    fn extended_quant_roundtrip_to_u32() {
        assert_eq!(ExtendedQuantType::F32.to_u32(), 0);
        assert_eq!(ExtendedQuantType::F16.to_u32(), 1);
        assert_eq!(ExtendedQuantType::Q1_0_G128.to_u32(), 41);
        assert_eq!(ExtendedQuantType::Unknown(99).to_u32(), 99);
    }
}