pub struct LmSessionCaches {
pub projector: HashMap<usize, CompiledGraph>,
/* private fields */
}Expand description
Cached LM graphs + weight snapshot for one runner device.
Fields§
§projector: HashMap<usize, CompiledGraph>Implementations§
Source§impl LmSessionCaches
impl LmSessionCaches
pub fn new(device: Device, max_past: usize) -> LmSessionCaches
Sourcepub fn reset_gpu_kv(&mut self)
pub fn reset_gpu_kv(&mut self)
Invalidate GPU K/V handle bindings (after MTP block or host KV rewrite).
Sourcepub fn reset_decode_after_mtp(&mut self)
pub fn reset_decode_after_mtp(&mut self)
Drop decode/MTP GPU bindings after an MTP block advanced past_len.
Sourcepub fn sync_kv_from_gpu(
&mut self,
cfg: &LocateAnythingConfig,
past_len: usize,
kv: &mut LayerKvCache,
) -> Result<(), Error>
pub fn sync_kv_from_gpu( &mut self, cfg: &LocateAnythingConfig, past_len: usize, kv: &mut LayerKvCache, ) -> Result<(), Error>
Copy GPU-resident K/V into kv before a host-path MTP forward.
Sourcepub fn ensure_lm_store(
&mut self,
store: Arc<LocateAnythingWeightStore>,
) -> Arc<LocateAnythingWeightStore>
pub fn ensure_lm_store( &mut self, store: Arc<LocateAnythingWeightStore>, ) -> Arc<LocateAnythingWeightStore>
Pin mmap-backed LM weights for compile caches (no full f32 snapshot in RAM).
pub fn projector_graph( &mut self, n_tokens: usize, build: impl FnOnce() -> Result<CompiledGraph, Error>, ) -> Result<&mut CompiledGraph, Error>
pub fn prefill_with_kv( &mut self, cfg: &LocateAnythingConfig, seq: usize, inputs_embeds: &[f32], ) -> Result<(Vec<f32>, Vec<Vec<f32>>), Error>
pub fn mtp_logits( &mut self, cfg: &LocateAnythingConfig, past_len: usize, q_len: usize, inputs_embeds: &[f32], full_mask_2d: &[f32], full_seq: usize, rope_cos: &[f32], rope_sin: &[f32], kv: &mut LayerKvCache, ) -> Result<(Vec<f32>, LayerKvCache), Error>
Sourcepub fn decode_step_in_place(
&mut self,
cfg: &LocateAnythingConfig,
past_len: usize,
token: u32,
rope_cos: &[f32],
rope_sin: &[f32],
mtp_window: Option<(usize, usize)>,
kv: &mut LayerKvCache,
) -> Result<Vec<f32>, Error>
pub fn decode_step_in_place( &mut self, cfg: &LocateAnythingConfig, past_len: usize, token: u32, rope_cos: &[f32], rope_sin: &[f32], mtp_window: Option<(usize, usize)>, kv: &mut LayerKvCache, ) -> Result<Vec<f32>, Error>
Single-token (or MTP-mask) decode; updates kv in place and returns logits only.
Auto Trait Implementations§
impl !RefUnwindSafe for LmSessionCaches
impl !Sync for LmSessionCaches
impl !UnwindSafe for LmSessionCaches
impl Freeze for LmSessionCaches
impl Send for LmSessionCaches
impl Unpin for LmSessionCaches
impl UnsafeUnpin for LmSessionCaches
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more