pub struct InferenceContext {
pub kv_cache: KVCache,
pub backend: Arc<dyn Backend>,
pub position: usize,
pub recurrent_state: Option<RecurrentState>,
pub tq_cache: Option<TurboQuantKVCache>,
}Expand description
Context for model inference
Fields§
§kv_cache: KVCacheKV cache for attention
backend: Arc<dyn Backend>Backend to use for computation
position: usizeCurrent position in sequence
recurrent_state: Option<RecurrentState>Recurrent state for delta-net layers (None if model has no SSM layers)
tq_cache: Option<TurboQuantKVCache>Optional TurboQuant-compressed KV cache (replaces f32 cache for attention)
Implementations§
Source§impl InferenceContext
impl InferenceContext
Sourcepub fn new(config: &ModelConfig, backend: Arc<dyn Backend>) -> Self
pub fn new(config: &ModelConfig, backend: Arc<dyn Backend>) -> Self
Create a new inference context
Sourcepub fn new_with_cache_type(
config: &ModelConfig,
backend: Arc<dyn Backend>,
cache_type: KVCacheType,
) -> Self
pub fn new_with_cache_type( config: &ModelConfig, backend: Arc<dyn Backend>, cache_type: KVCacheType, ) -> Self
Create inference context with a specific KV cache type.
Sourcepub fn new_with_recurrent(
config: &ModelConfig,
backend: Arc<dyn Backend>,
is_recurrent: &[bool],
rc: &RecurrentConfig,
) -> Self
pub fn new_with_recurrent( config: &ModelConfig, backend: Arc<dyn Backend>, is_recurrent: &[bool], rc: &RecurrentConfig, ) -> Self
Create inference context with recurrent state for SSM layers.
is_recurrent[i] marks which layers are recurrent (DeltaNet or Mamba).
Sourcepub fn has_turboquant(&self) -> bool
pub fn has_turboquant(&self) -> bool
Whether TurboQuant KV cache is active.
Auto Trait Implementations§
impl !RefUnwindSafe for InferenceContext
impl !UnwindSafe for InferenceContext
impl Freeze for InferenceContext
impl Send for InferenceContext
impl Sync for InferenceContext
impl Unpin for InferenceContext
impl UnsafeUnpin for InferenceContext
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
impl<A, B, T> HttpServerConnExec<A, B> for Twhere
B: Body,
Source§impl<T> Instrument for T
impl<T> Instrument for T
Source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
Source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more