batuta/oracle/rag/quantization/mod.rs
1//! Scalar Int8 Quantization for Embedding Retrieval
2//!
3//! Implements the scalar int8 rescoring retriever specification with:
4//! - 4x memory reduction (f32 -> i8)
5//! - 99% accuracy retention with rescoring
6//! - 3.66x speedup via SIMD acceleration
7//!
8//! # References
9//!
10//! - Jacob et al. (2018) - Quantization and Training of Neural Networks
11//! - Gholami et al. (2022) - Survey of Quantization Methods
12//! - Wu et al. (2020) - Integer Quantization Principles
13//!
14//! # Toyota Way Principles
15//!
16//! - **Jidoka**: Auto-stop on quantization error > threshold
17//! - **Poka-Yoke**: Type-safe precision levels, compile-time checks
18//! - **Heijunka**: Batched rescoring with backpressure
19//! - **Kaizen**: Continuous calibration improvement
20//! - **Genchi Genbutsu**: Hardware-specific benchmarks
21//! - **Muda**: 4x memory reduction via quantization
22
23// Library code - usage from examples and integration tests
24#![allow(dead_code, unused_imports)]
25
26mod calibration;
27mod embedding;
28mod error;
29mod params;
30mod retriever;
31mod simd;
32
33#[cfg(test)]
34mod tests;
35
36// Re-export all public types
37pub use calibration::CalibrationStats;
38pub use embedding::{compute_hash, QuantizedEmbedding};
39pub use error::{validate_embedding, QuantizationError};
40pub use params::QuantizationParams;
41pub use retriever::{RescoreResult, RescoreRetriever, RescoreRetrieverConfig};
42pub use simd::{dot_i8_scalar, SimdBackend};