pub struct SampledTokenClassifier<'model> { /* private fields */ }Implementations§
Source§impl<'model> SampledTokenClassifier<'model>
impl<'model> SampledTokenClassifier<'model>
pub fn new(model: &'model LlamaModel, markers: StreamingMarkers) -> Self
Sourcepub fn ingest(&mut self, token: LlamaToken) -> Vec<IngestOutcome>
pub fn ingest(&mut self, token: LlamaToken) -> Vec<IngestOutcome>
Ingest one sampled token. Returns the outcomes that have finalised this turn — typically a single outcome, occasionally zero (the classifier is holding back tokens that may yet form a marker), or several when a buffered marker prefix diverges and the held-back tokens flush.
Each IngestOutcome carries both the SampledToken variant for
classification and the decoded visible_piece for streaming. Marker
boundaries get an empty visible_piece so their text never reaches
user-visible streams.
Sourcepub fn ingest_prompt_token(&mut self, token: LlamaToken)
pub fn ingest_prompt_token(&mut self, token: LlamaToken)
Replay one prompt token through the marker state machine so that the
section at end-of-prompt reflects the chat template’s rendered tail
(e.g. for Qwen3.5/3.6 with enable_thinking=false the prompt ends with
a closed empty <think>...</think> block, leaving the section in
Content; with enable_thinking=true it ends inside an open <think>,
leaving the section in Reasoning).
Prompt tokens never produce IngestOutcomes and never increment usage
counters — they are not generated content.
pub fn ingest_prompt_tokens(&mut self, tokens: &[LlamaToken])
Sourcepub fn flush(&mut self) -> Vec<IngestOutcome>
pub fn flush(&mut self) -> Vec<IngestOutcome>
Drain every still-buffered token. Call once at end of generation (EOG)
to make sure no decoded text is silently dropped. After flush() the
classifier behaves as if freshly constructed in terms of buffer state.
Sourcepub fn sample(
&mut self,
sampler: &mut LlamaSampler,
context: &LlamaContext<'_>,
idx: i32,
) -> Result<(LlamaToken, Vec<IngestOutcome>), SampleError>
pub fn sample( &mut self, sampler: &mut LlamaSampler, context: &LlamaContext<'_>, idx: i32, ) -> Result<(LlamaToken, Vec<IngestOutcome>), SampleError>
§Errors
Forwards LlamaSampler::sample errors verbatim. Nothing is recorded on failure.
Returns the raw sampled token (for downstream batch.add / is_eog_token
calls) alongside the outcomes that finalised this turn — see
Self::ingest for buffering semantics.
Sourcepub fn feed_prompt_to_batch(
&mut self,
batch: &mut LlamaBatch<'_>,
token: LlamaToken,
position: llama_pos,
seq_ids: &[llama_seq_id],
logits: bool,
) -> Result<(), BatchAddError>
pub fn feed_prompt_to_batch( &mut self, batch: &mut LlamaBatch<'_>, token: LlamaToken, position: llama_pos, seq_ids: &[llama_seq_id], logits: bool, ) -> Result<(), BatchAddError>
§Errors
Forwards LlamaBatch::add errors verbatim. Nothing is staged on failure.
Sourcepub fn feed_prompt_sequence_to_batch(
&mut self,
batch: &mut LlamaBatch<'_>,
tokens: &[LlamaToken],
seq_id: llama_seq_id,
logits_all: bool,
) -> Result<(), BatchAddError>
pub fn feed_prompt_sequence_to_batch( &mut self, batch: &mut LlamaBatch<'_>, tokens: &[LlamaToken], seq_id: llama_seq_id, logits_all: bool, ) -> Result<(), BatchAddError>
§Errors
Forwards LlamaBatch::add_sequence errors verbatim. Nothing is staged on failure.
pub const fn commit_prompt_tokens(&mut self) -> u64
pub const fn discard_pending_prompt_tokens(&mut self) -> u64
pub const fn pending_prompt_tokens(&self) -> u64
Sourcepub fn eval_multimodal_chunks(
&mut self,
chunks: &MtmdInputChunks,
mtmd_ctx: &MtmdContext,
llama_ctx: &LlamaContext<'_>,
start_position: llama_pos,
seq_id: llama_seq_id,
n_batch: i32,
logits_last: bool,
) -> Result<llama_pos, EvalMultimodalChunksError>
pub fn eval_multimodal_chunks( &mut self, chunks: &MtmdInputChunks, mtmd_ctx: &MtmdContext, llama_ctx: &LlamaContext<'_>, start_position: llama_pos, seq_id: llama_seq_id, n_batch: i32, logits_last: bool, ) -> Result<llama_pos, EvalMultimodalChunksError>
§Errors
Returns EvalMultimodalChunksError::EvalFailed when the underlying
eval_chunks call fails (no counters move),
EvalMultimodalChunksError::UnknownChunkType when a chunk reports a
type unknown to this binding, or
EvalMultimodalChunksError::ChunkOutOfBounds when a valid index returns
None from chunks.get.
pub const fn record_prompt_tokens(&mut self, count: u64)
pub const fn record_input_image_tokens(&mut self, count: u64)
pub const fn record_input_audio_tokens(&mut self, count: u64)
Sourcepub const fn record_cached_prompt_tokens(
&mut self,
count: u64,
) -> Result<(), TokenUsageError>
pub const fn record_cached_prompt_tokens( &mut self, count: u64, ) -> Result<(), TokenUsageError>
§Errors
Forwards TokenUsageError::CachedExceedsPrompt when the running cached total would
exceed the prompt total.
pub const fn usage(&self) -> &TokenUsage
pub fn into_usage(self) -> TokenUsage
pub const fn current_section(&self) -> SampledTokenSection
pub const fn markers(&self) -> &StreamingMarkers
Auto Trait Implementations§
impl<'model> Freeze for SampledTokenClassifier<'model>
impl<'model> RefUnwindSafe for SampledTokenClassifier<'model>
impl<'model> Send for SampledTokenClassifier<'model>
impl<'model> Sync for SampledTokenClassifier<'model>
impl<'model> Unpin for SampledTokenClassifier<'model>
impl<'model> UnsafeUnpin for SampledTokenClassifier<'model>
impl<'model> UnwindSafe for SampledTokenClassifier<'model>
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read more