pub struct ReplicatedLayer(/* private fields */);Expand description
This layer has no parallelization
Implementations§
Source§impl ReplicatedLayer
impl ReplicatedLayer
pub fn from_linear(lin: Linear) -> Result<Arc<dyn QuantMethod>>
pub fn new( in_dim: usize, out_dim: usize, config: &Option<QuantizedConfig>, bias: bool, vb: ShardedVarBuilder, ) -> Result<Arc<dyn QuantMethod>>
pub fn new_layers_matformer_indices( in_dim: usize, out_dim: usize, kept_layers_indices: Option<&Tensor>, orig_num_hidden_layers: usize, config: &Option<QuantizedConfig>, bias: bool, vb: ShardedVarBuilder, ) -> Result<Arc<dyn QuantMethod>>
Trait Implementations§
Source§impl Debug for ReplicatedLayer
impl Debug for ReplicatedLayer
Source§impl QuantMethod for ReplicatedLayer
impl QuantMethod for ReplicatedLayer
fn new(_method: QuantMethodConfig) -> Result<Self>where
Self: Sized,
Source§fn forward_raw(&self, a: &Tensor) -> Result<Tensor>
fn forward_raw(&self, a: &Tensor) -> Result<Tensor>
Raw matmul without dtype casting. Implementors override this.
Callers should use
forward instead.Source§fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
fn add_delta_w(&self, delta: &Tensor) -> Result<Arc<dyn QuantMethod>>
Add a delta weight from LoRA to the weights. This should be prescaled with alpha.
fn dequantize_w(&self) -> Result<Tensor>
Source§fn dtype_and_device(&self) -> (DType, Device)
fn dtype_and_device(&self) -> (DType, Device)
Weight dtype and device
Source§fn begin_track_stats(&mut self) -> Result<()>
fn begin_track_stats(&mut self) -> Result<()>
Begin tracking stats into an ImatrixLayerStats
Source§fn end_track_stats(&self) -> Result<Tensor>
fn end_track_stats(&self) -> Result<Tensor>
End tracking stats into an ImatrixLayerStats. Returns the computed imatrix.
Source§fn quantized_act_type(&self) -> Option<DType>
fn quantized_act_type(&self) -> Option<DType>
If a quantized method, return the activation dtype.
fn unquant_weight_bias(&self) -> Option<(Tensor, Option<Tensor>)>
fn has_bias(&self) -> bool
Source§fn apply_isq(
self: Arc<Self>,
dtype: Option<IsqType>,
device: Device,
n_quantized: &AtomicUsize,
imatrix_weight: Option<Vec<f32>>,
guard: QuantizeOntoGuard,
) -> Result<Arc<dyn QuantMethod>>
fn apply_isq( self: Arc<Self>, dtype: Option<IsqType>, device: Device, n_quantized: &AtomicUsize, imatrix_weight: Option<Vec<f32>>, guard: QuantizeOntoGuard, ) -> Result<Arc<dyn QuantMethod>>
If the quant is backed by a qmatmul.
fn is_distributed(&self) -> Option<DistributedKind>
Source§fn forward(&self, a: &Tensor) -> Result<Tensor>
fn forward(&self, a: &Tensor) -> Result<Tensor>
Compute matmul of
self and a. self should contain the weights.
Automatically casts to the required quantization activation type and back.Source§fn gather_forward(&self, a: &Tensor, indices: &Tensor) -> Result<Tensor>
fn gather_forward(&self, a: &Tensor, indices: &Tensor) -> Result<Tensor>
Compute gather matmul of
self and a. self should contain the weights.
Automatically casts to the required quantization activation type and back. Read moreSource§fn gather_forward_raw(&self, _a: &Tensor, _indices: &Tensor) -> Result<Tensor>
fn gather_forward_raw(&self, _a: &Tensor, _indices: &Tensor) -> Result<Tensor>
Raw gather matmul without dtype casting. Implementors override this.
Callers should use
gather_forward instead.Source§fn afq_inner(&self) -> Option<AfqInner<'_>>
fn afq_inner(&self) -> Option<AfqInner<'_>>
If this is an AFQ layer, return its (w_q, scales, biases, bits, group_size).
Used by Metal fused QKV / gate-up paths.
fn dummy_info(&self) -> Option<&DummyLayerInfo>
Source§impl QuantizedSerde for ReplicatedLayer
impl QuantizedSerde for ReplicatedLayer
fn isq_serde_supported(&self) -> bool
fn name(&self) -> &'static str
fn serialize(&self) -> Result<Cow<'_, [u8]>>
fn deserialize(
data: Cow<'_, [u8]>,
device: &Device,
comm: &Arc<Comm>,
guard: QuantizeOntoGuard,
) -> Result<Arc<dyn QuantMethod>>where
Self: Sized,
fn deserialize_ext_bias(
_data: Cow<'_, [u8]>,
_device: &Device,
_guard: QuantizeOntoGuard,
) -> Result<(Arc<dyn QuantMethod>, Option<Tensor>)>where
Self: Sized,
Auto Trait Implementations§
impl Freeze for ReplicatedLayer
impl !RefUnwindSafe for ReplicatedLayer
impl Send for ReplicatedLayer
impl Sync for ReplicatedLayer
impl Unpin for ReplicatedLayer
impl UnsafeUnpin for ReplicatedLayer
impl !UnwindSafe for ReplicatedLayer
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more