1use crate::encode::quantize_embedding;
12use crate::error::QuantError;
13use crate::scheme::QuantScheme;
14use crate::vector::QuantizedVector;
15use lnmp_embedding::Vector;
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum AccuracyTarget {
20 Maximum,
22 High,
24 Balanced,
26 Compact,
28}
29
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CompressionTarget {
33 Conservative,
35 Moderate,
37 Aggressive,
39 Maximum,
41}
42
43pub fn quantize_adaptive(
62 emb: &Vector,
63 target: AccuracyTarget,
64) -> Result<QuantizedVector, QuantError> {
65 let scheme = match target {
66 AccuracyTarget::Maximum => QuantScheme::FP16Passthrough,
67 AccuracyTarget::High => QuantScheme::QInt8,
68 AccuracyTarget::Balanced => QuantScheme::QInt4,
69 AccuracyTarget::Compact => QuantScheme::Binary,
70 };
71
72 quantize_embedding(emb, scheme)
73}
74
75pub fn quantize_with_target(
85 emb: &Vector,
86 target: CompressionTarget,
87) -> Result<QuantizedVector, QuantError> {
88 let scheme = match target {
89 CompressionTarget::Conservative => QuantScheme::FP16Passthrough,
90 CompressionTarget::Moderate => QuantScheme::QInt8,
91 CompressionTarget::Aggressive => QuantScheme::QInt4,
92 CompressionTarget::Maximum => QuantScheme::Binary,
93 };
94
95 quantize_embedding(emb, scheme)
96}
97
98#[cfg(test)]
99mod tests {
100 use super::*;
101 use lnmp_embedding::Vector;
102
103 #[test]
104 fn test_adaptive_accuracy_selection() {
105 let vec = Vector::from_f32(vec![0.1, 0.2, 0.3, 0.4]);
106
107 let q = quantize_adaptive(&vec, AccuracyTarget::Maximum).unwrap();
109 assert_eq!(q.scheme, QuantScheme::FP16Passthrough);
110
111 let q = quantize_adaptive(&vec, AccuracyTarget::High).unwrap();
113 assert_eq!(q.scheme, QuantScheme::QInt8);
114
115 let q = quantize_adaptive(&vec, AccuracyTarget::Balanced).unwrap();
117 assert_eq!(q.scheme, QuantScheme::QInt4);
118
119 let q = quantize_adaptive(&vec, AccuracyTarget::Compact).unwrap();
121 assert_eq!(q.scheme, QuantScheme::Binary);
122 }
123
124 #[test]
125 fn test_adaptive_compression_selection() {
126 let vec = Vector::from_f32(vec![0.1, 0.2, 0.3, 0.4]);
127
128 let q = quantize_with_target(&vec, CompressionTarget::Conservative).unwrap();
130 assert_eq!(q.scheme, QuantScheme::FP16Passthrough);
131
132 let q = quantize_with_target(&vec, CompressionTarget::Moderate).unwrap();
134 assert_eq!(q.scheme, QuantScheme::QInt8);
135
136 let q = quantize_with_target(&vec, CompressionTarget::Aggressive).unwrap();
138 assert_eq!(q.scheme, QuantScheme::QInt4);
139
140 let q = quantize_with_target(&vec, CompressionTarget::Maximum).unwrap();
142 assert_eq!(q.scheme, QuantScheme::Binary);
143 }
144}