bitpolar/lib.rs
1//! # bitpolar
2//!
3//! Zero-overhead vector quantization for semantic search and KV cache compression.
4//!
5//! Implements three algorithms from Google Research:
6//! - **TurboQuant** (ICLR 2026) — two-stage composition for near-optimal compression
7//! - **PolarQuant** (AISTATS 2026) — polar coordinate encoding with lossless radii
8//! - **QJL** (AAAI 2025) — 1-bit Johnson-Lindenstrauss sketching
9//!
10//! ## Key Properties
11//!
12//! - **Data-oblivious**: No training, no codebooks, no calibration data
13//! - **Deterministic**: Fully defined by 4 integers: `(dimension, bits, projections, seed)`
14//! - **Provably unbiased**: Inner product estimates are unbiased at 3+ bits
15//! - **Near-optimal**: Within ~2.7x of Shannon rate-distortion limit
16//! - **Instant indexing**: Vectors compress on arrival — no offline training
17//!
18//! ## Quick Start
19//!
20//! ```rust
21//! use bitpolar::TurboQuantizer;
22//! use bitpolar::traits::VectorQuantizer;
23//!
24//! // Create quantizer from 4 integers — no training needed
25//! let q = TurboQuantizer::new(128, 4, 32, 42).unwrap();
26//!
27//! // Encode a vector
28//! let vector = vec![0.1_f32; 128];
29//! let code = q.encode(&vector).unwrap();
30//!
31//! // Estimate inner product without decompression
32//! let query = vec![0.05_f32; 128];
33//! let score = q.inner_product_estimate(&code, &query).unwrap();
34//! println!("Estimated IP: {score}");
35//!
36//! // Decode back to approximate vector
37//! let reconstructed = q.decode(&code);
38//! assert_eq!(reconstructed.len(), 128);
39//! ```
40//!
41//! ## Architecture
42//!
43//! ```text
44//! Input f32 vector
45//! │
46//! ▼
47//! ┌─────────────────┐
48//! │ Random Rotation │ Haar-distributed orthogonal matrix (QR of Gaussian)
49//! │ (StoredRotation) │ Spreads energy uniformly across coordinates
50//! └────────┬────────┘
51//! │
52//! ▼
53//! ┌─────────────────┐
54//! │ PolarQuant │ Groups d dims into d/2 pairs → polar coords
55//! │ (Stage 1) │ Radii: lossless f32 | Angles: b-bit quantized
56//! └────────┬────────┘
57//! │
58//! ▼
59//! ┌─────────────────┐
60//! │ QJL Residual │ Sketches reconstruction error
61//! │ (Stage 2) │ 1 sign bit per projection → unbiased correction
62//! └────────┬────────┘
63//! │
64//! ▼
65//! TurboCode { polar: PolarCode, residual_sketch: QjlSketch }
66//! ```
67//!
68//! ## Feature Flags
69//!
70//! | Feature | Default | Description |
71//! |---------|---------|-------------|
72//! | `std` | Yes | Standard library (nalgebra QR decomposition) |
73//! | `serde-support` | Yes | Serde serialization for all types |
74//! | `simd` | No | Hand-tuned NEON/AVX2 kernels |
75//! | `parallel` | No | Parallel batch operations via rayon |
76//! | `tracing-support` | No | OpenTelemetry-compatible instrumentation |
77//! | `ffi` | No | C FFI exports |
78
79#![warn(clippy::all)]
80#![warn(missing_docs)]
81// Forbid unsafe in the default code path — only allowed behind simd or ffi features
82#![cfg_attr(not(any(feature = "simd", feature = "ffi")), forbid(unsafe_code))]
83// Enable no_std when the `std` feature is not active.
84// The `alloc` feature provides Vec/String without full std.
85#![cfg_attr(not(feature = "std"), no_std)]
86
87#[cfg(all(not(feature = "std"), feature = "alloc"))]
88extern crate alloc;
89
90// ============================================================================
91// Compatibility layer (std/alloc/no_std)
92// ============================================================================
93
94/// Compatibility module for std/alloc/no_std switching.
95/// All modules import Vec, math functions, etc. from here.
96pub(crate) mod compat;
97
98// ============================================================================
99// Core modules (always available)
100// ============================================================================
101
102/// Error types — all public APIs return `Result<T, TurboQuantError>`
103pub mod error;
104
105/// Core traits for ecosystem integration: VectorQuantizer, BatchQuantizer, etc.
106pub mod traits;
107
108/// Compression statistics and quality metrics
109pub mod stats;
110
111/// Haar-distributed orthogonal rotation matrix (O(d²) memory)
112pub mod rotation;
113
114/// Walsh-Hadamard Transform rotation (O(d) memory, O(d log d) time)
115pub mod wht;
116
117/// Lloyd-Max optimal scalar quantizer for N(0,1) distribution
118pub(crate) mod codebook;
119
120/// PolarQuant: polar coordinate vector encoding (Stage 1)
121pub mod polar;
122
123/// Quantized Johnson-Lindenstrauss 1-bit sketching (Stage 2)
124pub mod qjl;
125
126/// TurboQuantizer: two-stage composition (Polar + QJL)
127pub mod turbo;
128
129/// KV cache compressor for transformer attention
130pub mod kv_cache;
131
132/// Online distortion tracking for quality monitoring
133pub mod distortion;
134
135/// Tiered quantization: hot, warm, and cold storage tiers
136pub mod tiered;
137
138/// Resilient quantization with automatic primary→fallback strategy
139pub mod resilient;
140
141/// Oversampled approximate nearest-neighbor search with exact re-ranking
142pub mod search;
143
144/// Adaptive per-vector bit-width selection with promote/demote
145pub mod adaptive;
146
147/// Prometheus-compatible metrics export for monitoring
148pub mod metrics;
149
150// ============================================================================
151// Optional modules (behind feature flags)
152// ============================================================================
153
154/// SIMD-accelerated kernels (NEON on aarch64, AVX2 on x86_64)
155#[cfg(feature = "simd")]
156pub mod simd;
157
158/// C FFI exports for cross-language bindings.
159///
160/// Enable with `features = ["ffi"]` in your `Cargo.toml`.
161/// Use `cbindgen` to generate the corresponding C header.
162#[cfg(feature = "ffi")]
163pub mod ffi;
164
165// ============================================================================
166// Public re-exports — the primary API surface
167// ============================================================================
168
169// Error types
170pub use error::{Result, TurboQuantError};
171
172// Core quantizers
173pub use polar::{PolarCode, PolarQuantizer};
174pub use qjl::{QjlQuantizer, QjlSketch};
175pub use turbo::{TurboCode, TurboQuantizer};
176
177// KV cache
178pub use kv_cache::{KvCacheCompressor, KvCacheConfig, MultiHeadKvCache};
179
180// Statistics and monitoring
181pub use distortion::DistortionTracker;
182pub use stats::{BatchStats, DistortionMetrics};
183
184// Rotation
185pub use rotation::StoredRotation;
186pub use wht::WhtRotation;
187
188// Tiered, resilient, and search modules
189pub use tiered::{Tier, TieredCode, TieredQuantization};
190pub use resilient::{ResilientCode, ResilientQuantizer};
191pub use search::OversampledSearch;
192
193/// Compact binary format version for all serialized codes.
194///
195/// Incremented when the wire format changes in a backward-incompatible way.
196/// Readers must reject versions they don't understand.
197pub const COMPACT_FORMAT_VERSION: u8 = 0x01;