pub struct LlamaModel {
pub metadata: ModelMetadata,
pub layers: Vec<LlamaLayer>,
pub embed_tokens: Embedding,
pub norm: RMSNorm,
pub lm_head: Option<Linear>,
}Expand description
Llama model for sparse inference
Fields§
§metadata: ModelMetadata§layers: Vec<LlamaLayer>§embed_tokens: Embedding§norm: RMSNorm§lm_head: Option<Linear>Trait Implementations§
Source§impl ModelRunner for LlamaModel
impl ModelRunner for LlamaModel
Source§fn forward(
&self,
input: &ModelInput,
config: &InferenceConfig,
) -> Result<ModelOutput, SparseInferenceError>
fn forward( &self, input: &ModelInput, config: &InferenceConfig, ) -> Result<ModelOutput, SparseInferenceError>
Forward pass with optional sparse computation
Source§fn get_predictor(&self, layer_idx: usize) -> Option<&LowRankPredictor>
fn get_predictor(&self, layer_idx: usize) -> Option<&LowRankPredictor>
Get predictor for a specific layer (if available)
Source§fn calibrate(
&mut self,
samples: &[ModelInput],
) -> Result<CalibrationStats, SparseInferenceError>
fn calibrate( &mut self, samples: &[ModelInput], ) -> Result<CalibrationStats, SparseInferenceError>
Calibrate predictors with sample data
Source§fn metadata(&self) -> &ModelMetadata
fn metadata(&self) -> &ModelMetadata
Get model metadata
Auto Trait Implementations§
impl Freeze for LlamaModel
impl RefUnwindSafe for LlamaModel
impl Send for LlamaModel
impl Sync for LlamaModel
impl Unpin for LlamaModel
impl UnwindSafe for LlamaModel
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more