Skip to main content

sensorlm/quantization/
mod.rs

1//! Model quantisation.
2//!
3//! | Module | Contents |
4//! |--------|----------|
5//! | [`int8`] | INT8 post-training quantisation, FP16 export, calibration |
6//!
7//! # Supported schemes
8//!
9//! | Scheme | Memory saving | Quality loss |
10//! |--------|---------------|--------------|
11//! | FP16 (half precision) | 2× | Negligible |
12//! | INT8 symmetric per-tensor | 4× | Small (~0.5–1 % on retrieval tasks) |
13//! | INT8 symmetric per-channel | 4× | Very small (~0.1–0.3 %) |
14//!
15//! # Workflow
16//!
17//! ```no_run
18//! use sensorlm::quantization::int8::{quantize_model_weights, QuantizedModel};
19//! use std::path::Path;
20//!
21//! // Extract weights from a trained model (pseudo-code):
22//! // let weights = extract_linear_weights(&trained_model);
23//! // let qm = quantize_model_weights(config_json, weights.into_iter());
24//! // qm.save(Path::new("model_int8.json")).unwrap();
25//! ```
26
27pub mod int8;