ruvllm/quantize/
mod.rs

1//! Quantization Pipeline for RuvLTRA Models
2//!
3//! This module provides quantization capabilities for converting full-precision
4//! models to optimized quantized formats suitable for edge inference on Apple Silicon.
5//!
6//! ## Supported Quantization Formats
7//!
8//! | Format | Bits | Memory (0.5B) | Quality | Use Case |
9//! |--------|------|---------------|---------|----------|
10//! | Q4_K_M | 4.5  | ~300 MB       | Good    | Best quality/size tradeoff |
11//! | Q5_K_M | 5.5  | ~375 MB       | Better  | Higher quality, still compact |
12//! | Q8_0   | 8.5  | ~500 MB       | Best    | Near-lossless quantization |
13//!
14//! ## Apple Neural Engine (ANE) Optimization
15//!
16//! The quantization pipeline produces weights optimized for ANE inference:
17//! - 16-byte aligned weight layouts
18//! - Blocked quantization compatible with ANE tile operations
19//! - Optimized memory access patterns for M4 Pro's unified memory
20//!
21//! ## Example
22//!
23//! ```rust,ignore
24//! use ruvllm::quantize::{RuvltraQuantizer, QuantConfig, TargetFormat};
25//! use std::path::Path;
26//!
27//! // Create quantizer for Q4_K_M format
28//! let config = QuantConfig::default()
29//!     .with_format(TargetFormat::Q4_K_M)
30//!     .with_ane_optimization(true);
31//!
32//! let quantizer = RuvltraQuantizer::new(config)?;
33//!
34//! // Quantize a model
35//! quantizer.quantize_model(
36//!     Path::new("qwen-0.5b.safetensors"),
37//!     Path::new("ruvltra-small-q4.gguf"),
38//! )?;
39//! ```
40
41mod ruvltra_quant;
42
43pub use ruvltra_quant::{
44    // Core quantizer
45    RuvltraQuantizer,
46    QuantConfig,
47    TargetFormat,
48
49    // Quantization functions
50    quantize_ruvltra_q4,
51    quantize_ruvltra_q5,
52    quantize_ruvltra_q8,
53    dequantize_for_ane,
54
55    // Memory estimation
56    estimate_memory_q4,
57    estimate_memory_q5,
58    estimate_memory_q8,
59    MemoryEstimate,
60
61    // Block types
62    Q4KMBlock,
63    Q5KMBlock,
64    Q8Block,
65
66    // Progress tracking
67    QuantProgress,
68    QuantStats,
69};