pub struct TalkerEngine { /* private fields */ }Implementations§
Source§impl TalkerEngine
impl TalkerEngine
pub fn open( store: &Qwen3TtsWeightStore, talker: &TalkerConfig, device: Device, ) -> Result<TalkerEngine, Error>
pub fn open_at( model_dir: &Path, store: &Qwen3TtsWeightStore, talker: &TalkerConfig, device: Device, ) -> Result<TalkerEngine, Error>
pub fn open_with_weights( model_dir: &Path, store: &Qwen3TtsWeightStore, talker: &TalkerConfig, weights: HashMap<String, (Vec<f32>, Vec<usize>)>, device: Device, ) -> Result<TalkerEngine, Error>
pub fn open_with_weights_and_profiles( _model_dir: &Path, store: &Qwen3TtsWeightStore, talker: &TalkerConfig, weights: HashMap<String, (Vec<f32>, Vec<usize>)>, device: Device, prefill_profile: CompileProfile, decode_profile: CompileProfile, ) -> Result<TalkerEngine, Error>
Sourcepub fn decode_bucket_upper(&self, past_seq: usize) -> usize
pub fn decode_bucket_upper(&self, past_seq: usize) -> usize
Bucket upper bound for talker decode at past_seq (power-of-two ladder).
Sourcepub fn precompile_decode_bucket_for_past(
&mut self,
past_seq: usize,
) -> Result<(), Error>
pub fn precompile_decode_bucket_for_past( &mut self, past_seq: usize, ) -> Result<(), Error>
Pre-compile the single decode bucket that contains past_seq (cheap first-frame insurance).
Sourcepub fn preinstall_gpu_kv_horizon(&mut self, horizon: usize) -> Result<(), Error>
pub fn preinstall_gpu_kv_horizon(&mut self, horizon: usize) -> Result<(), Error>
Pre-bind GPU K/V handles for horizon buckets (boundary crossings by default).
Sourcepub fn preinstall_gpu_kv_current(&mut self) -> Result<(), Error>
pub fn preinstall_gpu_kv_current(&mut self) -> Result<(), Error>
Bind GPU K/V for the current prefill bucket (no dry decode).
Skips re-upload when handles are already live for this bucket — rebinding from
host kv would clobber GPU-updated prefix rows after the first decode step.
Sourcepub fn warmup_bucket_executions_from(
&mut self,
from_horizon: usize,
new_horizon: usize,
) -> Result<(), Error>
pub fn warmup_bucket_executions_from( &mut self, from_horizon: usize, new_horizon: usize, ) -> Result<(), Error>
Dry-run only bucket boundaries in (from_horizon, new_horizon] not yet warmed.
Sourcepub fn warmup_bucket_executions(&mut self, horizon: usize) -> Result<(), Error>
pub fn warmup_bucket_executions(&mut self, horizon: usize) -> Result<(), Error>
Dry decode to warm Metal/CUDA graphs (restores KV after).
Sourcepub fn precompile_decode_buckets_up_to(
&mut self,
horizon: usize,
parent: Option<&Progress>,
) -> Result<(), Error>
pub fn precompile_decode_buckets_up_to( &mut self, horizon: usize, parent: Option<&Progress>, ) -> Result<(), Error>
Pre-compile decode buckets with upper <= horizon (skips unused large past lengths).
Sourcepub fn warmup(&mut self, prefill_seq: usize) -> Result<(), Error>
pub fn warmup(&mut self, prefill_seq: usize) -> Result<(), Error>
Warm compile caches (prefill + one decode step). Call before timed runs.
Sourcepub fn warmup_embeds(
&mut self,
embeds: ArrayBase<ViewRepr<&f32>, Dim<[usize; 2]>>,
max_frames: usize,
) -> Result<ArrayBase<OwnedRepr<f32>, Dim<[usize; 2]>>, Error>
pub fn warmup_embeds( &mut self, embeds: ArrayBase<ViewRepr<&f32>, Dim<[usize; 2]>>, max_frames: usize, ) -> Result<ArrayBase<OwnedRepr<f32>, Dim<[usize; 2]>>, Error>
Warm prefill (and optionally one decode step when buckets are fully lazy).
Sourcepub fn warm_eager_decode_rope(&mut self) -> Result<(), Error>
pub fn warm_eager_decode_rope(&mut self) -> Result<(), Error>
Precompute eager decode RoPE bank after prefill (rope_delta must be set).
Sourcepub fn ensure_eager_horizon(&mut self, horizon: usize)
pub fn ensure_eager_horizon(&mut self, horizon: usize)
Grow eager attention scratch + RoPE bank for a horizon larger than the default (256). No-op when no eager talker is loaded or the buffers already fit.
pub fn reset_kv(&mut self)
Sourcepub fn ensure_prefill_compiled(&mut self, seq: usize) -> Result<(), Error>
pub fn ensure_prefill_compiled(&mut self, seq: usize) -> Result<(), Error>
Compile prefill graph for seq (no-op when cached or eager).
pub fn prefill( &mut self, embeds: ArrayBase<ViewRepr<&f32>, Dim<[usize; 2]>>, ) -> Result<ArrayBase<OwnedRepr<f32>, Dim<[usize; 2]>>, Error>
pub fn past_len(&self) -> usize
pub fn rope_delta(&self) -> i64
KV decode step; updates Self::last_hidden without sampling.
pub fn decode_step( &mut self, embed: ArrayBase<ViewRepr<&f32>, Dim<[usize; 1]>>, ) -> Result<(ArrayBase<OwnedRepr<f32>, Dim<[usize; 1]>>, u32), Error>
Sourcepub fn import_fused_decode_outputs(
&mut self,
hidden_vec: &[f32],
layers_k: &[Vec<f32>],
layers_v: &[Vec<f32>],
past_seq: usize,
) -> Result<(), Error>
pub fn import_fused_decode_outputs( &mut self, hidden_vec: &[f32], layers_k: &[Vec<f32>], layers_v: &[Vec<f32>], past_seq: usize, ) -> Result<(), Error>
Import bucketed decode outputs from a fused codec-frame graph run.
KV decode; writes the last hidden row into hidden_out.
pub fn is_eager(&self) -> bool
pub fn uses_gpu_kv(&self) -> bool
pub fn codec_eos(&self) -> u32
pub fn codec_head(&self) -> ArrayBase<ViewRepr<&f32>, Dim<[usize; 2]>>
pub fn codec_head_flat(&self) -> (&[f32], usize, usize)
Sourcepub fn kv_state(&self) -> LayerKvCache
pub fn kv_state(&self) -> LayerKvCache
Snapshot host K/V after prefill (parity / isolation tests).
Sourcepub fn restore_kv_state(&mut self, kv: LayerKvCache, rope_delta: i64)
pub fn restore_kv_state(&mut self, kv: LayerKvCache, rope_delta: i64)
Restore host K/V, past_len, and MRoPE delta (parity / isolation tests).
Auto Trait Implementations§
impl !RefUnwindSafe for TalkerEngine
impl !Sync for TalkerEngine
impl !UnwindSafe for TalkerEngine
impl Freeze for TalkerEngine
impl Send for TalkerEngine
impl Unpin for TalkerEngine
impl UnsafeUnpin for TalkerEngine
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
impl<ST, DT> CastableFrom<ST, Initialized, Initialized> for DT
impl<ST, DT> CastableFrom<ST, Uninit, Uninit> for DT
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more