lnmp_quant/
adaptive.rs

1//! Adaptive quantization module
2//!
3//!
4//! This module provides functionality to automatically select the best quantization scheme
5//! based on high-level requirements (accuracy vs compression).
6//!
7//! # Performance
8//! Adaptive quantization introduces **negligible overhead** (zero cost in most cases) compared
9//! to direct scheme usage, as the selection logic is effectively inlined by the compiler.
10
11use crate::encode::quantize_embedding;
12use crate::error::QuantError;
13use crate::scheme::QuantScheme;
14use crate::vector::QuantizedVector;
15use lnmp_embedding::Vector;
16
17/// Target accuracy levels for adaptive quantization
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum AccuracyTarget {
20    /// Maximum accuracy (~99.9%), uses FP16 (2x compression)
21    Maximum,
22    /// High accuracy (~99%), uses QInt8 (4x compression)
23    High,
24    /// Balanced accuracy (~95-97%), uses QInt4 (8x compression)
25    Balanced,
26    /// Compact storage (~85-90%), uses Binary (32x compression)
27    Compact,
28}
29
30/// Target compression levels for adaptive quantization
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CompressionTarget {
33    /// Conservative compression (2x), prioritizes accuracy (FP16)
34    Conservative,
35    /// Moderate compression (4x), good balance (QInt8)
36    Moderate,
37    /// Aggressive compression (8x), prioritizes size (QInt4)
38    Aggressive,
39    /// Maximum compression (32x), minimal size (Binary)
40    Maximum,
41}
42
43/// Quantize an embedding based on an accuracy target
44///
45/// # Arguments
46/// * `emb` - The embedding vector to quantize
47/// * `target` - The desired accuracy level
48///
49/// # Returns
50/// * `Ok(QuantizedVector)` - The quantized vector using the selected scheme
51/// * `Err(QuantError)` - If quantization fails
52///
53/// # Example
54/// ```
55/// use lnmp_quant::adaptive::{quantize_adaptive, AccuracyTarget};
56/// use lnmp_embedding::Vector;
57///
58/// let vec = Vector::from_f32(vec![0.1, 0.2, 0.3]);
59/// let q = quantize_adaptive(&vec, AccuracyTarget::High).unwrap();
60/// ```
61pub fn quantize_adaptive(
62    emb: &Vector,
63    target: AccuracyTarget,
64) -> Result<QuantizedVector, QuantError> {
65    let scheme = match target {
66        AccuracyTarget::Maximum => QuantScheme::FP16Passthrough,
67        AccuracyTarget::High => QuantScheme::QInt8,
68        AccuracyTarget::Balanced => QuantScheme::QInt4,
69        AccuracyTarget::Compact => QuantScheme::Binary,
70    };
71
72    quantize_embedding(emb, scheme)
73}
74
75/// Quantize an embedding based on a compression target
76///
77/// # Arguments
78/// * `emb` - The embedding vector to quantize
79/// * `target` - The desired compression level
80///
81/// # Returns
82/// * `Ok(QuantizedVector)` - The quantized vector using the selected scheme
83/// * `Err(QuantError)` - If quantization fails
84pub fn quantize_with_target(
85    emb: &Vector,
86    target: CompressionTarget,
87) -> Result<QuantizedVector, QuantError> {
88    let scheme = match target {
89        CompressionTarget::Conservative => QuantScheme::FP16Passthrough,
90        CompressionTarget::Moderate => QuantScheme::QInt8,
91        CompressionTarget::Aggressive => QuantScheme::QInt4,
92        CompressionTarget::Maximum => QuantScheme::Binary,
93    };
94
95    quantize_embedding(emb, scheme)
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101    use lnmp_embedding::Vector;
102
103    #[test]
104    fn test_adaptive_accuracy_selection() {
105        let vec = Vector::from_f32(vec![0.1, 0.2, 0.3, 0.4]);
106
107        // Maximum -> FP16
108        let q = quantize_adaptive(&vec, AccuracyTarget::Maximum).unwrap();
109        assert_eq!(q.scheme, QuantScheme::FP16Passthrough);
110
111        // High -> QInt8
112        let q = quantize_adaptive(&vec, AccuracyTarget::High).unwrap();
113        assert_eq!(q.scheme, QuantScheme::QInt8);
114
115        // Balanced -> QInt4
116        let q = quantize_adaptive(&vec, AccuracyTarget::Balanced).unwrap();
117        assert_eq!(q.scheme, QuantScheme::QInt4);
118
119        // Compact -> Binary
120        let q = quantize_adaptive(&vec, AccuracyTarget::Compact).unwrap();
121        assert_eq!(q.scheme, QuantScheme::Binary);
122    }
123
124    #[test]
125    fn test_adaptive_compression_selection() {
126        let vec = Vector::from_f32(vec![0.1, 0.2, 0.3, 0.4]);
127
128        // Conservative -> FP16
129        let q = quantize_with_target(&vec, CompressionTarget::Conservative).unwrap();
130        assert_eq!(q.scheme, QuantScheme::FP16Passthrough);
131
132        // Moderate -> QInt8
133        let q = quantize_with_target(&vec, CompressionTarget::Moderate).unwrap();
134        assert_eq!(q.scheme, QuantScheme::QInt8);
135
136        // Aggressive -> QInt4
137        let q = quantize_with_target(&vec, CompressionTarget::Aggressive).unwrap();
138        assert_eq!(q.scheme, QuantScheme::QInt4);
139
140        // Maximum -> Binary
141        let q = quantize_with_target(&vec, CompressionTarget::Maximum).unwrap();
142        assert_eq!(q.scheme, QuantScheme::Binary);
143    }
144}