pub trait QuantMethod:
Send
+ Sync
+ Debug
+ QuantizedSerde {
Show 17 methods
// Required methods
fn new(method: QuantMethodConfig) -> Result<Self>
where Self: Sized;
fn dequantize_w(&self) -> Result<Tensor>;
fn forward_raw(&self, a: &Tensor) -> Result<Tensor>;
fn quantized_act_type(&self) -> Option<DType>;
fn dtype_and_device(&self) -> (DType, Device);
fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>;
fn apply_isq(
self: Arc<Self>,
dtype: Option<IsqType>,
device: Device,
n_quantized: &AtomicUsize,
imatrix_weight: Option<Vec<f32>>,
guard: QuantizeOntoGuard,
) -> Result<Arc<dyn QuantMethod>>;
// Provided methods
fn forward(&self, a: &Tensor) -> Result<Tensor> { ... }
fn gather_forward(&self, a: &Tensor, indices: &Tensor) -> Result<Tensor> { ... }
fn gather_forward_raw(
&self,
_a: &Tensor,
_indices: &Tensor,
) -> Result<Tensor> { ... }
fn afq_inner(&self) -> Option<AfqInner<'_>> { ... }
fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)> { ... }
fn has_bias(&self) -> bool { ... }
fn begin_track_stats(&mut self) -> Result<()> { ... }
fn end_track_stats(&self) -> Result<Tensor> { ... }
fn is_distributed(&self) -> Option<DistributedKind> { ... }
fn dummy_info(&self) -> Option<&DummyLayerInfo> { ... }
}Expand description
Quantized method for a quantized matmul.
Required Methods§
fn new(method: QuantMethodConfig) -> Result<Self>where
Self: Sized,
fn dequantize_w(&self) -> Result<Tensor>
Sourcefn forward_raw(&self, a: &Tensor) -> Result<Tensor>
fn forward_raw(&self, a: &Tensor) -> Result<Tensor>
Raw matmul without dtype casting. Implementors override this.
Callers should use forward instead.
Sourcefn quantized_act_type(&self) -> Option<DType>
fn quantized_act_type(&self) -> Option<DType>
If a quantized method, return the activation dtype.
Sourcefn dtype_and_device(&self) -> (DType, Device)
fn dtype_and_device(&self) -> (DType, Device)
Weight dtype and device
Sourcefn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
Add a delta weight from LoRA to the weights. This should be prescaled with alpha.
Sourcefn apply_isq(
self: Arc<Self>,
dtype: Option<IsqType>,
device: Device,
n_quantized: &AtomicUsize,
imatrix_weight: Option<Vec<f32>>,
guard: QuantizeOntoGuard,
) -> Result<Arc<dyn QuantMethod>>
fn apply_isq( self: Arc<Self>, dtype: Option<IsqType>, device: Device, n_quantized: &AtomicUsize, imatrix_weight: Option<Vec<f32>>, guard: QuantizeOntoGuard, ) -> Result<Arc<dyn QuantMethod>>
If the quant is backed by a qmatmul.
Provided Methods§
Sourcefn forward(&self, a: &Tensor) -> Result<Tensor>
fn forward(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of self and a. self should contain the weights.
Automatically casts to the required quantization activation type and back.
Sourcefn gather_forward(&self, a: &Tensor, indices: &Tensor) -> Result<Tensor>
fn gather_forward(&self, a: &Tensor, indices: &Tensor) -> Result<Tensor>
Compute gather matmul of self and a. self should contain the weights.
Automatically casts to the required quantization activation type and back.
If a is (n_tokens, n_experts, cols), self weights are (n_experts, rows, cols),
then the indices are (n_tokens, n_experts).
Sourcefn gather_forward_raw(&self, _a: &Tensor, _indices: &Tensor) -> Result<Tensor>
fn gather_forward_raw(&self, _a: &Tensor, _indices: &Tensor) -> Result<Tensor>
Raw gather matmul without dtype casting. Implementors override this.
Callers should use gather_forward instead.
Sourcefn afq_inner(&self) -> Option<AfqInner<'_>>
fn afq_inner(&self) -> Option<AfqInner<'_>>
If this is an AFQ layer, return its (w_q, scales, biases, bits, group_size). Used by Metal fused QKV / gate-up paths.
fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)>
fn has_bias(&self) -> bool
Sourcefn begin_track_stats(&mut self) -> Result<()>
fn begin_track_stats(&mut self) -> Result<()>
Begin tracking stats into an ImatrixLayerStats
Sourcefn end_track_stats(&self) -> Result<Tensor>
fn end_track_stats(&self) -> Result<Tensor>
End tracking stats into an ImatrixLayerStats. Returns the computed imatrix.
fn is_distributed(&self) -> Option<DistributedKind>
fn dummy_info(&self) -> Option<&DummyLayerInfo>
Trait Implementations§
Dyn Compatibility§
This trait is dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety".