//! GGUF-compatible quantization formats (Q4_0, Q8_0)
//!
//! Implements quantization formats compatible with llama.cpp and GGUF:
//! - Q4_0: 4-bit quantization with per-block f16 scale (32 elements/block)
//! - Q8_0: 8-bit quantization with per-block f16 scale (32 elements/block)
//!
//! Block structure:
//! - Q4_0: 2 bytes scale (f16) + 16 bytes data (32 × 4-bit) = 18 bytes/block
//! - Q8_0: 2 bytes scale (f16) + 32 bytes data (32 × 8-bit) = 34 bytes/block
pub use Q4_0;
pub use Q8_0;
pub use GGUFQuantType;
/// GGUF block size (standard for llama.cpp)
pub const GGUF_BLOCK_SIZE: usize = 32;