ruvector_sparse_inference::model::runners

Struct LlamaModel

pub struct LlamaModel {
    pub metadata: ModelMetadata,
    pub layers: Vec<LlamaLayer>,
    pub embed_tokens: Embedding,
    pub norm: RMSNorm,
    pub lm_head: Option<Linear>,
}

Expand description

Llama model for sparse inference

Fields§

§metadata: ModelMetadata§layers: Vec<LlamaLayer>§embed_tokens: Embedding§norm: RMSNorm§lm_head: Option<Linear>

Trait Implementations§

Source §

impl ModelRunner for LlamaModel

Source §

fn forward( &self, input: &ModelInput, config: &InferenceConfig, ) -> Result<ModelOutput, SparseInferenceError>

Forward pass with optional sparse computation

Source §

fn get_predictor(&self, layer_idx: usize) -> Option<&LowRankPredictor>

Get predictor for a specific layer (if available)

Source §

fn calibrate( &mut self, samples: &[ModelInput], ) -> Result<CalibrationStats, SparseInferenceError>

Calibrate predictors with sample data

Source §

fn metadata(&self) -> &ModelMetadata

Get model metadata

Auto Trait Implementations§

§

impl UnwindSafe for LlamaModel

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T> Instrument for T

Source §

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more

Source §

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

fn vzip(self) -> V

Source §

impl<T> WithSubscriber for T

Source §

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more

Source §

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more

LlamaModel

Struct LlamaModel Copy item path

Fields§

Trait Implementations§

impl ModelRunner for LlamaModel

fn forward( &self, input: &ModelInput, config: &InferenceConfig, ) -> Result<ModelOutput, SparseInferenceError>

fn get_predictor(&self, layer_idx: usize) -> Option<&LowRankPredictor>

fn calibrate( &mut self, samples: &[ModelInput], ) -> Result<CalibrationStats, SparseInferenceError>

fn metadata(&self) -> &ModelMetadata

Auto Trait Implementations§

impl Freeze for LlamaModel

impl RefUnwindSafe for LlamaModel

impl Send for LlamaModel

impl Sync for LlamaModel

impl Unpin for LlamaModel

impl UnwindSafe for LlamaModel

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

impl<T> WithSubscriber for T

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>where S: Into<Dispatch>,

fn with_current_subscriber(self) -> WithDispatch<Self>

Struct LlamaModel

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,