pub struct SparseInferenceBackend { /* private fields */ }Expand description
Sparse inference backend for RuvLLM integration
Implementations§
Source§impl SparseInferenceBackend
impl SparseInferenceBackend
Sourcepub fn new(
num_layers: usize,
hidden_dim: usize,
intermediate_dim: usize,
vocab_size: usize,
sparsity_ratio: f32,
) -> Result<Self>
pub fn new( num_layers: usize, hidden_dim: usize, intermediate_dim: usize, vocab_size: usize, sparsity_ratio: f32, ) -> Result<Self>
Create a new sparse inference backend
Sourcepub fn from_gguf_bytes(data: &[u8]) -> Result<Self>
pub fn from_gguf_bytes(data: &[u8]) -> Result<Self>
Create from GGUF model bytes
Sourcepub fn next_token(
&mut self,
input_ids: &[u32],
kv_cache: &mut KVCache,
) -> Result<u32>
pub fn next_token( &mut self, input_ids: &[u32], kv_cache: &mut KVCache, ) -> Result<u32>
Generate next token
Sourcepub fn generate(
&mut self,
input_ids: &[u32],
config: &GenerationConfig,
) -> Result<Vec<u32>>
pub fn generate( &mut self, input_ids: &[u32], config: &GenerationConfig, ) -> Result<Vec<u32>>
Generate multiple tokens
Sourcepub fn metadata(&self) -> &ModelMetadata
pub fn metadata(&self) -> &ModelMetadata
Get model metadata
Sourcepub fn generation_stats(&self) -> &GenerationStats
pub fn generation_stats(&self) -> &GenerationStats
Get generation statistics
Sourcepub fn set_sparsity(&mut self, threshold: f32)
pub fn set_sparsity(&mut self, threshold: f32)
Set sparsity threshold
Trait Implementations§
Source§impl InferenceBackend for SparseInferenceBackend
impl InferenceBackend for SparseInferenceBackend
Auto Trait Implementations§
impl Freeze for SparseInferenceBackend
impl RefUnwindSafe for SparseInferenceBackend
impl Send for SparseInferenceBackend
impl Sync for SparseInferenceBackend
impl Unpin for SparseInferenceBackend
impl UnwindSafe for SparseInferenceBackend
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more