Skip to main content

oxibonsai_core/
lib.rs

1//! # oxibonsai-core
2//!
3//! GGUF Q1\_0\_g128 format parser, tensor types, and model configuration
4//! for OxiBonsai — the Pure Rust 1-bit LLM inference engine.
5//!
6//! This crate provides the foundational data types and parsing logic used
7//! by the rest of the OxiBonsai stack:
8//!
9//! - **GGUF v3 binary format parsing** — header, metadata key-value store,
10//!   and tensor info directory (see [`gguf`]).
11//! - **Q1\_0\_g128 block type** — the 18-byte packed representation used for
12//!   1-bit weights (see [`tensor::BlockQ1_0G128`]).
13//! - **Memory-mapped tensor loading** — zero-copy access to weight data
14//!   from disk via `memmap2`.
15//! - **Model configuration** — [`config::Qwen3Config`] extracted from GGUF
16//!   metadata or constructed for known Bonsai variants (8B, 4B, 1.7B).
17//!
18//! ## GGUF Q1\_0\_g128 Format
19//!
20//! Each block is 18 bytes: 2-byte FP16 scale + 16 bytes (128 sign bits).
21//! Weight = bit ? +scale : -scale. Effective 1.125 bits per weight.
22//!
23//! ## Crate Organisation
24//!
25//! | Module | Purpose |
26//! |--------|---------|
27//! | [`config`] | `Qwen3Config` with named constructors for each variant |
28//! | [`gguf`] | Low-level GGUF v3 reader (header, metadata, tensors) |
29//! | [`quant_ternary`] | `BlockTQ2_0_g128`, `BlockTQ2_0`, `TernaryCode` — ternary block types |
30//! | [`tensor`] | `BlockQ1_0G128` and `OneBitTensor` types |
31//! | [`error`] | `BonsaiError` / `BonsaiResult` |
32
33pub mod config;
34pub mod error;
35pub mod gguf;
36pub mod quant_fp8;
37pub mod quant_k;
38pub mod quant_k_ext;
39pub mod quant_std;
40pub mod quant_ternary;
41pub mod tensor;
42
43pub use config::Qwen3Config;
44pub use error::{BonsaiError, BonsaiResult};
45pub use gguf::compat::{
46    build_compat_report, check_gguf_header, CompatError, ExtendedQuantType, GgufCompatReport,
47    GgufVersion,
48};
49pub use gguf::header::GgufHeader;
50pub use gguf::metadata::{MetadataStore, MetadataValue};
51pub use gguf::model_card::keys as model_card_keys;
52pub use gguf::model_card::{extract_known_fields, extract_model_card, ModelCard};
53pub use gguf::streaming::{
54    GgufStreamParser, GgufValue, StreamState, StreamedGguf, StreamedTensorInfo,
55};
56pub use gguf::tensor_info::{TensorInfo, TensorStore};
57pub use gguf::types::{GgufTensorType, GgufValueType};
58pub use gguf::writer::MetadataWriteValue;
59pub use gguf::writer::{GgufWriter, TensorEntry, TensorType, WriteError};
60pub use quant_fp8::{
61    fp8_e4m3_decode, fp8_e4m3_encode, fp8_e5m2_decode, fp8_e5m2_encode, BlockFP8E4M3, BlockFP8E5M2,
62    BLOCK_FP8_BYTES, FP8_E4M3_MAX, FP8_E5M2_MAX, QK_FP8,
63};
64pub use quant_k::{
65    BlockQ2K, BlockQ3K, BlockQ4K, BlockQ8K, BLOCK_Q2_K_BYTES, BLOCK_Q3K_BYTES, BLOCK_Q4_K_BYTES,
66    BLOCK_Q8K_BYTES,
67};
68pub use quant_k_ext::{BlockQ5K, BlockQ6K, BLOCK_Q5K_BYTES, BLOCK_Q6K_BYTES};
69pub use quant_std::{BlockQ4_0, BlockQ8_0, BLOCK_Q4_0_BYTES, BLOCK_Q8_0_BYTES, QK_Q4_0, QK_Q8_0};
70pub use quant_ternary::{
71    BlockTQ2_0, BlockTQ2_0_g128, TernaryCode, BLOCK_TQ2_0_BYTES, BLOCK_TQ2_0_G128_BYTES, QK_TQ2_0,
72    QK_TQ2_0_G128,
73};
74pub use tensor::{BlockQ1_0G128, OneBitTensor};