pub struct QuantizedTensor<T> { /* private fields */ }Expand description
Quantized tensor with INT8 data and metadata.
Stores quantized values along with the information needed to dequantize them back to FP32.
§Invariants (Enforced at Construction)
- INV-1:
data.len() == metadata.numel() - INV-2:
metadata.scale > 0.0 - INV-3: All values in
dataare in range[qmin, qmax]
§Example
ⓘ
use ruvector_cnn::quantize::{QuantizedTensor, QuantizationParams, QuantizationMode};
let fp32_data = vec![1.0, 2.0, -1.0, 0.5];
let shape = vec![4];
let params = QuantizationParams::from_minmax(-2.0, 2.0, QuantizationMode::Symmetric)?;
// Quantize
let quantized = QuantizedTensor::<i8>::quantize(&fp32_data, &shape, ¶ms)?;
// Dequantize
let dequantized = quantized.dequantize()?;Implementations§
Source§impl QuantizedTensor<i8>
impl QuantizedTensor<i8>
Sourcepub fn quantize(
fp32_data: &[f32],
shape: &[usize],
params: &QuantizationParams,
) -> CnnResult<Self>
pub fn quantize( fp32_data: &[f32], shape: &[usize], params: &QuantizationParams, ) -> CnnResult<Self>
Quantize FP32 data to INT8.
§Arguments
fp32_data- Input FP32 valuesshape- Tensor shapeparams- Quantization parameters
§Returns
Quantized INT8 tensor.
§Example
ⓘ
let fp32 = vec![1.0, 2.0, -1.0];
let shape = vec![3];
let params = QuantizationParams::from_minmax(-2.0, 2.0, QuantizationMode::Symmetric)?;
let quantized = QuantizedTensor::quantize(&fp32, &shape, ¶ms)?;Sourcepub fn dequantize(&self) -> CnnResult<Vec<f32>>
pub fn dequantize(&self) -> CnnResult<Vec<f32>>
Sourcepub fn metadata(&self) -> &QuantizationMetadata
pub fn metadata(&self) -> &QuantizationMetadata
Get reference to metadata.
Sourcepub fn zero_point(&self) -> i32
pub fn zero_point(&self) -> i32
Get zero point.
Sourcepub fn check_bounds(&self, qmin: i8, qmax: i8) -> bool
pub fn check_bounds(&self, qmin: i8, qmax: i8) -> bool
Check bounds invariant: all values in [qmin, qmax].
This is a sanity check to ensure data hasn’t been corrupted.
Should always return true for properly constructed tensors.
Trait Implementations§
Source§impl<T: Clone> Clone for QuantizedTensor<T>
impl<T: Clone> Clone for QuantizedTensor<T>
Source§fn clone(&self) -> QuantizedTensor<T>
fn clone(&self) -> QuantizedTensor<T>
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl<T: Debug> Debug for QuantizedTensor<T>
impl<T: Debug> Debug for QuantizedTensor<T>
Source§impl<'de, T> Deserialize<'de> for QuantizedTensor<T>where
T: Deserialize<'de>,
impl<'de, T> Deserialize<'de> for QuantizedTensor<T>where
T: Deserialize<'de>,
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
Auto Trait Implementations§
impl<T> Freeze for QuantizedTensor<T>
impl<T> RefUnwindSafe for QuantizedTensor<T>where
T: RefUnwindSafe,
impl<T> Send for QuantizedTensor<T>where
T: Send,
impl<T> Sync for QuantizedTensor<T>where
T: Sync,
impl<T> Unpin for QuantizedTensor<T>where
T: Unpin,
impl<T> UnsafeUnpin for QuantizedTensor<T>
impl<T> UnwindSafe for QuantizedTensor<T>where
T: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more