pub struct MXFP4Layer { /* private fields */ }Implementations§
Source§impl MXFP4Layer
impl MXFP4Layer
Sourcepub fn quantize(
weight: &Tensor,
bias: Option<Tensor>,
device: &Device,
) -> Result<Arc<dyn QuantMethod>>
pub fn quantize( weight: &Tensor, bias: Option<Tensor>, device: &Device, ) -> Result<Arc<dyn QuantMethod>>
Quantize an unquantized weight tensor to MXFP4 format.
weight shape: [N, K], bias shape: [N] (optional)
pub fn linear_b( in_dim: usize, out_dim: usize, config: &QuantizedConfig, bias: bool, vb: ShardedVarBuilder, ) -> Result<Arc<dyn QuantMethod>>
pub fn packed_linear_b( num_local_experts: usize, in_dim: usize, out_dim: usize, config: &QuantizedConfig, bias: bool, vb: ShardedVarBuilder, ) -> Result<Arc<dyn QuantMethod>>
Sourcepub fn packed_gptoss_linear(
num_local_experts: usize,
in_dim: usize,
out_dim: usize,
bias: bool,
name: &str,
vb: ShardedVarBuilder,
) -> Result<Arc<dyn QuantMethod>>
pub fn packed_gptoss_linear( num_local_experts: usize, in_dim: usize, out_dim: usize, bias: bool, name: &str, vb: ShardedVarBuilder, ) -> Result<Arc<dyn QuantMethod>>
Load GPT-OSS style MXFP4 experts (combined gate_up_proj format).
GPT-OSS stores tensors as:
{name}_blocks: [num_experts, out_dim, num_blocks, 16] where 16 bytes = 32 FP4 values{name}_scales: [num_experts, out_dim, num_blocks]{name}_bias: [num_experts, out_dim]
This function loads and reshapes the 4D blocks tensor to 3D [num_experts, out_dim, in_dim/2].
Trait Implementations§
Source§impl Debug for MXFP4Layer
impl Debug for MXFP4Layer
Source§impl QuantMethod for MXFP4Layer
impl QuantMethod for MXFP4Layer
fn new(method: QuantMethodConfig) -> Result<Self>where
Self: Sized,
fn dequantize_w(&self) -> Result<Tensor>
Source§fn forward_raw(&self, x: &Tensor) -> Result<Tensor>
fn forward_raw(&self, x: &Tensor) -> Result<Tensor>
Raw matmul without dtype casting. Implementors override this.
Callers should use
forward instead.Source§fn gather_forward_raw(&self, x: &Tensor, indices: &Tensor) -> Result<Tensor>
fn gather_forward_raw(&self, x: &Tensor, indices: &Tensor) -> Result<Tensor>
Raw gather matmul without dtype casting. Implementors override this.
Callers should use
gather_forward instead.Source§fn quantized_act_type(&self) -> Option<DType>
fn quantized_act_type(&self) -> Option<DType>
If a quantized method, return the activation dtype.
Source§fn add_delta_w(&self, _delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
fn add_delta_w(&self, _delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
Add a delta weight from LoRA to the weights. This should be prescaled with alpha.
Source§fn dtype_and_device(&self) -> (DType, Device)
fn dtype_and_device(&self) -> (DType, Device)
Weight dtype and device
Source§fn apply_isq(
self: Arc<Self>,
_dtype: Option<IsqType>,
_device: Device,
_n_quantized: &AtomicUsize,
_imatrix_weight: Option<Vec<f32>>,
_guard: QuantizeOntoGuard,
) -> Result<Arc<dyn QuantMethod>>
fn apply_isq( self: Arc<Self>, _dtype: Option<IsqType>, _device: Device, _n_quantized: &AtomicUsize, _imatrix_weight: Option<Vec<f32>>, _guard: QuantizeOntoGuard, ) -> Result<Arc<dyn QuantMethod>>
If the quant is backed by a qmatmul.
Source§fn forward(&self, a: &Tensor) -> Result<Tensor>
fn forward(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of
self and a. self should contain the weights.
Automatically casts to the required quantization activation type and back.Source§fn gather_forward(&self, a: &Tensor, indices: &Tensor) -> Result<Tensor>
fn gather_forward(&self, a: &Tensor, indices: &Tensor) -> Result<Tensor>
Compute gather matmul of
self and a. self should contain the weights.
Automatically casts to the required quantization activation type and back. Read moreSource§fn afq_inner(&self) -> Option<AfqInner<'_>>
fn afq_inner(&self) -> Option<AfqInner<'_>>
If this is an AFQ layer, return its (w_q, scales, biases, bits, group_size).
Used by Metal fused QKV / gate-up paths.
fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)>
fn has_bias(&self) -> bool
Source§fn begin_track_stats(&mut self) -> Result<()>
fn begin_track_stats(&mut self) -> Result<()>
Begin tracking stats into an ImatrixLayerStats
Source§fn end_track_stats(&self) -> Result<Tensor>
fn end_track_stats(&self) -> Result<Tensor>
End tracking stats into an ImatrixLayerStats. Returns the computed imatrix.
fn is_distributed(&self) -> Option<DistributedKind>
fn dummy_info(&self) -> Option<&DummyLayerInfo>
Source§impl QuantizedSerde for MXFP4Layer
impl QuantizedSerde for MXFP4Layer
fn name(&self) -> &'static str
fn isq_serde_supported(&self) -> bool
fn serialize(&self) -> Result<Cow<'_, [u8]>>
Source§fn serialize_with_bias(&self, bias: Option<Tensor>) -> Result<Cow<'_, [u8]>>
fn serialize_with_bias(&self, bias: Option<Tensor>) -> Result<Cow<'_, [u8]>>
NOT meant for external calling
fn deserialize(
data: Cow<'_, [u8]>,
device: &Device,
_comm: &Arc<Comm>,
guard: QuantizeOntoGuard,
) -> Result<Arc<dyn QuantMethod>>where
Self: Sized,
fn deserialize_ext_bias(
data: Cow<'_, [u8]>,
device: &Device,
guard: QuantizeOntoGuard,
) -> Result<(Arc<dyn QuantMethod>, Option<Tensor>)>where
Self: Sized,
Auto Trait Implementations§
impl Freeze for MXFP4Layer
impl !RefUnwindSafe for MXFP4Layer
impl Send for MXFP4Layer
impl Sync for MXFP4Layer
impl Unpin for MXFP4Layer
impl UnsafeUnpin for MXFP4Layer
impl !UnwindSafe for MXFP4Layer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more