lnmp_quant/lib.rs
1//! # lnmp-quant
2//!
3//! Quantization and compression for LNMP embedding vectors.
4//!
5//! This crate provides efficient quantization schemes to compress embedding vectors
6//! while maintaining high semantic accuracy. Multiple schemes available:
7//! - QInt8: 4x compression with ~99% accuracy
8//! - QInt4: 8x compression with ~95-97% accuracy
9//! - Binary: 32x compression with ~85-90% similarity
10//!
11//! ## Quick Start
12//!
13//! ```rust
14//! use lnmp_quant::{quantize_embedding, dequantize_embedding, QuantScheme};
15//! use lnmp_embedding::Vector;
16//!
17//! // Create an embedding
18//! let embedding = Vector::from_f32(vec![0.12, -0.45, 0.33]);
19//!
20//! // Quantize to QInt8
21//! let quantized = quantize_embedding(&embedding, QuantScheme::QInt8).unwrap();
22//! println!("Original size: {} bytes", embedding.dim * 4);
23//! println!("Quantized size: {} bytes", quantized.data_size());
24//! println!("Compression ratio: {:.1}x", quantized.compression_ratio());
25//!
26//! // Dequantize back
27//! let restored = dequantize_embedding(&quantized).unwrap();
28//! ```
29//!
30//! ## Quantization Schemes
31//!
32//! - **QInt8**: 8-bit signed integer quantization (4x compression, ~99% accuracy)
33//! - **QInt4**: 4-bit packed quantization (8x compression, ~95-97% accuracy)
34//! - **Binary**: 1-bit sign-based quantization (32x compression, ~85-90% similarity)
35//! - **FP16**: Half-precision float (2x compression, ~99.9% accuracy, near-lossless)
36
37pub mod adaptive;
38pub mod batch;
39pub mod binary;
40pub mod decode;
41pub mod encode;
42pub mod error;
43pub mod fp16;
44pub mod metrics;
45pub mod qint4;
46pub mod scheme;
47pub mod vector;
48
49// Re-export main types and functions
50pub use decode::dequantize_embedding;
51pub use encode::quantize_embedding;
52pub use error::QuantError;
53pub use metrics::QuantMetrics;
54pub use scheme::QuantScheme;
55pub use vector::QuantizedVector;
56
57#[cfg(test)]
58mod tests {
59 use super::*;
60 use lnmp_embedding::{SimilarityMetric, Vector};
61
62 #[test]
63 fn test_basic_roundtrip() {
64 let original = Vector::from_f32(vec![0.1, 0.2, 0.3, 0.4, 0.5]);
65 let quantized = quantize_embedding(&original, QuantScheme::QInt8).unwrap();
66 let restored = dequantize_embedding(&quantized).unwrap();
67
68 let similarity = original
69 .similarity(&restored, SimilarityMetric::Cosine)
70 .unwrap();
71
72 assert!(similarity > 0.95);
73 }
74
75 #[test]
76 fn test_compression_ratio() {
77 let original = Vector::from_f32(vec![0.1; 1536]);
78 let quantized = quantize_embedding(&original, QuantScheme::QInt8).unwrap();
79
80 // Original: 1536 * 4 = 6144 bytes
81 // Quantized: 1536 * 1 = 1536 bytes
82 assert_eq!(quantized.compression_ratio(), 4.0);
83 }
84}