1use std::collections::VecDeque;
2
3use llama_cpp_bindings_sys::llama_pos;
4use llama_cpp_bindings_sys::llama_seq_id;
5
6use llama_cpp_bindings_types::TokenUsage;
7use llama_cpp_bindings_types::TokenUsageError;
8
9use crate::batch_add_error::BatchAddError;
10use crate::context::LlamaContext;
11use crate::error::EvalMultimodalChunksError;
12use crate::error::SampleError;
13use crate::llama_batch::LlamaBatch;
14use crate::model::LlamaModel;
15use crate::mtmd::MtmdContext;
16use crate::mtmd::MtmdInputChunks;
17use crate::sampled_token::SampledToken;
18use crate::sampling::LlamaSampler;
19use crate::streaming_json_probe::JsonProbeOutcome;
20use crate::streaming_markers::{MarkerKind, StreamingMarkers};
21use crate::token::LlamaToken;
22
23pub use crate::ingest_outcome::IngestOutcome;
24pub use crate::sampled_token_section::SampledTokenSection;
25
26#[derive(Clone, Debug)]
27struct PendingToken {
28 token: LlamaToken,
29 decoded: String,
30 section: SampledTokenSection,
31 is_boundary: bool,
32 is_from_prompt: bool,
33 is_held_for_probe: bool,
34}
35
36#[derive(Clone, Debug)]
37struct JsonProbeState {
38 held_text: String,
39}
40
41#[derive(Clone, Debug)]
42enum ProbeMode {
43 Idle,
44 Active(JsonProbeState),
45}
46
47pub struct SampledTokenClassifier<'model> {
48 model: &'model LlamaModel,
49 markers: StreamingMarkers,
50 decoder: encoding_rs::Decoder,
51 pending: VecDeque<PendingToken>,
52 section: SampledTokenSection,
53 pending_prompt_tokens: u64,
54 usage: TokenUsage,
55 probe_mode: ProbeMode,
56}
57
58impl<'model> SampledTokenClassifier<'model> {
59 #[must_use]
60 pub fn new(model: &'model LlamaModel, markers: StreamingMarkers) -> Self {
61 Self {
62 model,
63 markers,
64 decoder: encoding_rs::UTF_8.new_decoder(),
65 pending: VecDeque::new(),
66 section: SampledTokenSection::Pending,
67 pending_prompt_tokens: 0,
68 usage: TokenUsage::new(),
69 probe_mode: ProbeMode::Idle,
70 }
71 }
72
73 pub fn ingest(&mut self, token: LlamaToken) -> Vec<IngestOutcome> {
83 if !self.markers.has_any() {
84 self.usage.record_undeterminable_token();
85 let piece = self.decode(token);
86 return vec![IngestOutcome {
87 sampled_token: SampledToken::Undeterminable(token),
88 visible_piece: piece.clone(),
89 raw_piece: piece,
90 }];
91 }
92
93 let decoded = self.decode(token);
94 self.pending.push_back(PendingToken {
95 token,
96 decoded: decoded.clone(),
97 section: self.section,
98 is_boundary: false,
99 is_from_prompt: false,
100 is_held_for_probe: false,
101 });
102
103 self.try_consume_marker_at_tail();
104
105 let probe_was_active = matches!(self.probe_mode, ProbeMode::Active(_));
106 let mut outcomes = if probe_was_active && self.section_disengages_probe() {
107 self.abandon_probe()
108 } else {
109 self.update_probe(&decoded)
110 };
111
112 outcomes.extend(self.drain_overflow());
113 outcomes
114 }
115
116 const fn section_disengages_probe(&self) -> bool {
117 matches!(
118 self.section,
119 SampledTokenSection::ToolCall | SampledTokenSection::Reasoning
120 )
121 }
122
123 pub fn ingest_prompt_token(&mut self, token: LlamaToken) {
133 if !self.markers.has_any() {
134 return;
135 }
136
137 self.pending.push_back(PendingToken {
138 token,
139 decoded: String::new(),
140 section: self.section,
141 is_boundary: false,
142 is_from_prompt: true,
143 is_held_for_probe: false,
144 });
145
146 self.try_consume_marker_at_tail();
147 self.drain_overflow();
148 }
149
150 pub fn ingest_prompt_tokens(&mut self, tokens: &[LlamaToken]) {
151 if !self.markers.has_any() {
152 return;
153 }
154 for &token in tokens {
155 self.ingest_prompt_token(token);
156 }
157 }
158
159 pub fn flush(&mut self) -> Vec<IngestOutcome> {
163 self.probe_mode = ProbeMode::Idle;
164 let mut outcomes = Vec::with_capacity(self.pending.len());
165 while let Some(entry) = self.pending.pop_front() {
166 if entry.is_from_prompt {
167 continue;
168 }
169 outcomes.push(self.finalize_entry(entry));
170 }
171 outcomes
172 }
173
174 fn decode(&mut self, token: LlamaToken) -> String {
175 match self.model.token_to_piece(
176 &SampledToken::Content(token),
177 &mut self.decoder,
178 true,
179 None,
180 ) {
181 Ok(piece) => piece,
182 Err(detokenize_error) => {
183 log::debug!(
184 "token_to_piece failed during classification, dropping piece: {detokenize_error}",
185 );
186 String::new()
187 }
188 }
189 }
190
191 fn try_consume_marker_at_tail(&mut self) {
192 const PROBE_KINDS: &[MarkerKind] = &[
193 MarkerKind::ReasoningOpen,
194 MarkerKind::ReasoningClose,
195 MarkerKind::ToolCallOpen,
196 MarkerKind::ToolCallClose,
197 ];
198
199 for &kind in PROBE_KINDS {
200 let Some(marker) = self.markers.lookup(kind) else {
201 continue;
202 };
203 if marker.is_empty() || self.pending.len() < marker.len() {
204 continue;
205 }
206 let span_start = self.pending.len() - marker.len();
207 let matches = self
208 .pending
209 .iter()
210 .skip(span_start)
211 .zip(marker)
212 .all(|(entry, marker_token)| entry.token == *marker_token);
213 if matches {
214 self.mark_marker_span(span_start, kind);
215 return;
216 }
217 }
218 }
219
220 fn mark_marker_span(&mut self, span_start: usize, kind: MarkerKind) {
221 let next_section = match kind {
222 MarkerKind::ReasoningOpen => SampledTokenSection::Reasoning,
223 MarkerKind::ReasoningClose | MarkerKind::ToolCallClose => SampledTokenSection::Content,
224 MarkerKind::ToolCallOpen => SampledTokenSection::ToolCall,
225 };
226 let span_section = match kind {
227 MarkerKind::ReasoningOpen => SampledTokenSection::Reasoning,
228 MarkerKind::ToolCallOpen => SampledTokenSection::ToolCall,
229 MarkerKind::ReasoningClose => {
230 if self.section == SampledTokenSection::Reasoning {
231 SampledTokenSection::Reasoning
232 } else {
233 SampledTokenSection::Content
234 }
235 }
236 MarkerKind::ToolCallClose => {
237 if self.section == SampledTokenSection::ToolCall {
238 SampledTokenSection::ToolCall
239 } else {
240 SampledTokenSection::Content
241 }
242 }
243 };
244
245 for entry in self.pending.iter_mut().skip(span_start) {
246 entry.is_boundary = true;
247 entry.section = span_section;
248 }
249
250 self.section = next_section;
251 }
252
253 fn drain_overflow(&mut self) -> Vec<IngestOutcome> {
254 let lookback = self.markers.max_token_len().saturating_sub(1);
255 let mut outcomes = Vec::new();
256
257 loop {
258 let Some(front) = self.pending.front() else {
259 break;
260 };
261 if front.is_held_for_probe {
262 break;
263 }
264 let probe_held = self
265 .pending
266 .iter()
267 .filter(|entry| entry.is_held_for_probe)
268 .count();
269 let drainable = self.pending.len().saturating_sub(probe_held);
270 let beyond_lookback = drainable > lookback;
271 if !front.is_boundary && !beyond_lookback {
272 break;
273 }
274 let Some(entry) = self.pending.pop_front() else {
275 break;
276 };
277 if entry.is_from_prompt {
278 continue;
279 }
280 outcomes.push(self.finalize_entry(entry));
281 }
282
283 outcomes
284 }
285
286 fn update_probe(&mut self, piece: &str) -> Vec<IngestOutcome> {
287 let probe_active = matches!(self.probe_mode, ProbeMode::Active(_));
288 if !probe_active {
289 if !self.section_allows_probe_engagement() {
290 return Vec::new();
291 }
292 if !piece.trim_start().starts_with('{') {
293 return Vec::new();
294 }
295 if let Some(entry) = self.pending.back_mut() {
296 entry.is_held_for_probe = true;
297 }
298 self.probe_mode = ProbeMode::Active(JsonProbeState {
299 held_text: piece.to_owned(),
300 });
301 return self.evaluate_probe();
302 }
303
304 if let Some(entry) = self.pending.back_mut() {
305 entry.is_held_for_probe = true;
306 }
307 if let ProbeMode::Active(state) = &mut self.probe_mode {
308 state.held_text.push_str(piece);
309 }
310 self.evaluate_probe()
311 }
312
313 const fn section_allows_probe_engagement(&self) -> bool {
314 matches!(
315 self.section,
316 SampledTokenSection::Content | SampledTokenSection::Pending
317 )
318 }
319
320 fn evaluate_probe(&mut self) -> Vec<IngestOutcome> {
321 let outcome = match &self.probe_mode {
322 ProbeMode::Active(state) => JsonProbeOutcome::validate_prefix(&state.held_text),
323 ProbeMode::Idle => return Vec::new(),
324 };
325 match outcome {
326 JsonProbeOutcome::StillPossiblyValid => Vec::new(),
327 JsonProbeOutcome::CompletedValid => self.commit_probe_as_tool_call(),
328 JsonProbeOutcome::Failed => self.abandon_probe(),
329 }
330 }
331
332 fn commit_probe_as_tool_call(&mut self) -> Vec<IngestOutcome> {
333 if !matches!(self.probe_mode, ProbeMode::Active(_)) {
334 return Vec::new();
335 }
336 self.probe_mode = ProbeMode::Idle;
337 self.section = SampledTokenSection::Content;
338
339 let drained: Vec<_> = self.pending.drain(..).collect();
340 let mut outcomes = Vec::new();
341 for mut entry in drained {
342 if entry.is_held_for_probe {
343 entry.section = SampledTokenSection::ToolCall;
344 entry.is_held_for_probe = false;
345 if !entry.is_from_prompt {
346 outcomes.push(self.finalize_entry(entry));
347 }
348 } else {
349 self.pending.push_back(entry);
350 }
351 }
352 outcomes
353 }
354
355 fn abandon_probe(&mut self) -> Vec<IngestOutcome> {
356 if !matches!(self.probe_mode, ProbeMode::Active(_)) {
357 return Vec::new();
358 }
359 self.probe_mode = ProbeMode::Idle;
360
361 let drained: Vec<_> = self.pending.drain(..).collect();
362 let mut outcomes = Vec::new();
363 for mut entry in drained {
364 if entry.is_held_for_probe {
365 entry.is_held_for_probe = false;
366 if !entry.is_from_prompt {
367 outcomes.push(self.finalize_entry(entry));
368 }
369 } else {
370 self.pending.push_back(entry);
371 }
372 }
373 outcomes
374 }
375
376 fn finalize_entry(&mut self, entry: PendingToken) -> IngestOutcome {
377 let section = entry.section;
378 match section {
379 SampledTokenSection::Reasoning => self.usage.record_reasoning_token(),
380 SampledTokenSection::Content => self.usage.record_content_token(),
381 SampledTokenSection::ToolCall => self.usage.record_tool_call_token(),
382 SampledTokenSection::Pending => self.usage.record_undeterminable_token(),
383 }
384
385 let sampled_token = match section {
386 SampledTokenSection::Reasoning => SampledToken::Reasoning(entry.token),
387 SampledTokenSection::Content => SampledToken::Content(entry.token),
388 SampledTokenSection::ToolCall => SampledToken::ToolCall(entry.token),
389 SampledTokenSection::Pending => SampledToken::Undeterminable(entry.token),
390 };
391
392 let visible_piece = if entry.is_boundary {
393 String::new()
394 } else {
395 entry.decoded.clone()
396 };
397
398 IngestOutcome {
399 sampled_token,
400 visible_piece,
401 raw_piece: entry.decoded,
402 }
403 }
404
405 pub fn sample(
412 &mut self,
413 sampler: &mut LlamaSampler,
414 context: &LlamaContext,
415 idx: i32,
416 ) -> Result<(LlamaToken, Vec<IngestOutcome>), SampleError> {
417 let raw = sampler.sample(context, idx)?;
418 let outcomes = self.ingest(raw);
419
420 Ok((raw, outcomes))
421 }
422
423 pub fn feed_prompt_to_batch(
426 &mut self,
427 batch: &mut LlamaBatch,
428 token: LlamaToken,
429 position: llama_pos,
430 seq_ids: &[llama_seq_id],
431 logits: bool,
432 ) -> Result<(), BatchAddError> {
433 batch.add(&SampledToken::Content(token), position, seq_ids, logits)?;
434 self.ingest_prompt_token(token);
435 self.pending_prompt_tokens = self.pending_prompt_tokens.saturating_add(1);
436
437 Ok(())
438 }
439
440 pub fn feed_prompt_sequence_to_batch(
443 &mut self,
444 batch: &mut LlamaBatch,
445 tokens: &[LlamaToken],
446 seq_id: llama_seq_id,
447 logits_all: bool,
448 ) -> Result<(), BatchAddError> {
449 batch.add_sequence(tokens, seq_id, logits_all)?;
450 self.ingest_prompt_tokens(tokens);
451 self.pending_prompt_tokens = self
452 .pending_prompt_tokens
453 .saturating_add(tokens.len() as u64);
454
455 Ok(())
456 }
457
458 pub const fn commit_prompt_tokens(&mut self) -> u64 {
459 let promoted = self.pending_prompt_tokens;
460 self.usage.record_prompt_tokens(promoted);
461 self.pending_prompt_tokens = 0;
462
463 promoted
464 }
465
466 pub const fn discard_pending_prompt_tokens(&mut self) -> u64 {
467 let discarded = self.pending_prompt_tokens;
468 self.pending_prompt_tokens = 0;
469
470 discarded
471 }
472
473 #[must_use]
474 pub const fn pending_prompt_tokens(&self) -> u64 {
475 self.pending_prompt_tokens
476 }
477
478 #[expect(
486 clippy::too_many_arguments,
487 reason = "thin wrapper over MtmdInputChunks::eval_chunks; parameter shape mirrors the underlying API"
488 )]
489 pub fn eval_multimodal_chunks(
490 &mut self,
491 chunks: &MtmdInputChunks,
492 mtmd_ctx: &MtmdContext,
493 llama_ctx: &LlamaContext,
494 start_position: llama_pos,
495 seq_id: llama_seq_id,
496 n_batch: i32,
497 logits_last: bool,
498 ) -> Result<llama_pos, EvalMultimodalChunksError> {
499 let chunk_count = chunks.len();
500 let mut next_position = start_position;
501
502 for index in 0..chunk_count {
503 let chunk = chunks
504 .get(index)
505 .ok_or(EvalMultimodalChunksError::ChunkOutOfBounds(index))?;
506 let logits_for_this_chunk = logits_last && index + 1 == chunk_count;
507
508 next_position = chunk.eval_single(
509 mtmd_ctx,
510 llama_ctx,
511 next_position,
512 seq_id,
513 n_batch,
514 logits_for_this_chunk,
515 )?;
516 crate::ingest_prompt_chunk::ingest_prompt_chunk(self, &chunk)?;
517 }
518
519 Ok(next_position)
520 }
521
522 pub const fn record_prompt_tokens(&mut self, count: u64) {
523 self.usage.record_prompt_tokens(count);
524 }
525
526 pub const fn record_input_image_tokens(&mut self, count: u64) {
527 self.usage.record_input_image_tokens(count);
528 }
529
530 pub const fn record_input_audio_tokens(&mut self, count: u64) {
531 self.usage.record_input_audio_tokens(count);
532 }
533
534 pub const fn record_cached_prompt_tokens(&mut self, count: u64) -> Result<(), TokenUsageError> {
538 self.usage.record_cached_prompt_tokens(count)
539 }
540
541 #[must_use]
542 pub const fn usage(&self) -> &TokenUsage {
543 &self.usage
544 }
545
546 #[must_use]
547 pub fn into_usage(self) -> TokenUsage {
548 self.usage
549 }
550
551 #[must_use]
552 pub const fn current_section(&self) -> SampledTokenSection {
553 self.section
554 }
555
556 #[must_use]
557 pub const fn markers(&self) -> &StreamingMarkers {
558 &self.markers
559 }
560}
561
562#[cfg(test)]
563mod tests {
564 use super::PendingToken;
565 use super::ProbeMode;
566 use super::SampledTokenClassifier;
567 use crate::ingest_outcome::IngestOutcome;
568 use crate::sampled_token::SampledToken;
569 use crate::sampled_token_section::SampledTokenSection;
570 use crate::streaming_markers::StreamingMarkers;
571 use crate::token::LlamaToken;
572
573 fn token(id: i32) -> LlamaToken {
574 LlamaToken::new(id)
575 }
576
577 fn markers_with(
578 reasoning_open: Option<Vec<LlamaToken>>,
579 reasoning_close: Option<Vec<LlamaToken>>,
580 ) -> StreamingMarkers {
581 StreamingMarkers {
582 reasoning_open,
583 reasoning_close,
584 tool_call_open: None,
585 tool_call_close: None,
586 }
587 }
588
589 fn synthetic_classifier(markers: StreamingMarkers) -> SampledTokenClassifier<'static> {
590 SampledTokenClassifier {
591 model: unsafe { &*std::ptr::NonNull::<crate::model::LlamaModel>::dangling().as_ptr() },
592 markers,
593 decoder: encoding_rs::UTF_8.new_decoder(),
594 pending: std::collections::VecDeque::new(),
595 section: SampledTokenSection::Pending,
596 pending_prompt_tokens: 0,
597 usage: llama_cpp_bindings_types::TokenUsage::new(),
598 probe_mode: ProbeMode::Idle,
599 }
600 }
601
602 fn push_pending(classifier: &mut SampledTokenClassifier<'_>, token_id: i32, decoded: &str) {
603 classifier.pending.push_back(PendingToken {
604 token: token(token_id),
605 decoded: decoded.to_owned(),
606 section: classifier.section,
607 is_boundary: false,
608 is_from_prompt: false,
609 is_held_for_probe: false,
610 });
611 }
612
613 fn push_pending_from_prompt(classifier: &mut SampledTokenClassifier<'_>, token_id: i32) {
614 classifier.pending.push_back(PendingToken {
615 token: token(token_id),
616 decoded: String::new(),
617 section: classifier.section,
618 is_boundary: false,
619 is_from_prompt: true,
620 is_held_for_probe: false,
621 });
622 }
623
624 fn push_and_probe(
625 classifier: &mut SampledTokenClassifier<'_>,
626 token_id: i32,
627 decoded: &str,
628 ) -> Vec<IngestOutcome> {
629 push_pending(classifier, token_id, decoded);
630 classifier.try_consume_marker_at_tail();
631 let probe_was_active = matches!(classifier.probe_mode, ProbeMode::Active(_));
632 let mut outcomes = if probe_was_active && classifier.section_disengages_probe() {
633 classifier.abandon_probe()
634 } else {
635 classifier.update_probe(decoded)
636 };
637 outcomes.extend(classifier.drain_overflow());
638 outcomes
639 }
640
641 fn outcome_pieces(outcomes: &[IngestOutcome]) -> Vec<&str> {
642 outcomes
643 .iter()
644 .map(|outcome| outcome.visible_piece.as_str())
645 .collect()
646 }
647
648 fn outcome_sections(outcomes: &[IngestOutcome]) -> Vec<SampledTokenSection> {
649 outcomes
650 .iter()
651 .map(|outcome| match outcome.sampled_token {
652 SampledToken::Reasoning(_) => SampledTokenSection::Reasoning,
653 SampledToken::Content(_) => SampledTokenSection::Content,
654 SampledToken::ToolCall(_) => SampledTokenSection::ToolCall,
655 SampledToken::Undeterminable(_) => SampledTokenSection::Pending,
656 })
657 .collect()
658 }
659
660 #[test]
661 fn single_token_close_marker_when_already_in_reasoning_emits_empty_piece_for_marker() {
662 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
663 let mut classifier = synthetic_classifier(markers);
664 classifier.section = SampledTokenSection::Reasoning;
665
666 push_pending(&mut classifier, 7, "step");
667 classifier.try_consume_marker_at_tail();
668 let mut outcomes = classifier.drain_overflow();
669
670 push_pending(&mut classifier, 200, "</think>");
671 classifier.try_consume_marker_at_tail();
672 outcomes.extend(classifier.drain_overflow());
673
674 push_pending(&mut classifier, 9, "Hi");
675 classifier.try_consume_marker_at_tail();
676 outcomes.extend(classifier.drain_overflow());
677
678 outcomes.extend(classifier.flush());
679
680 assert_eq!(
681 outcome_sections(&outcomes),
682 vec![
683 SampledTokenSection::Reasoning,
684 SampledTokenSection::Reasoning,
685 SampledTokenSection::Content,
686 ],
687 );
688 assert_eq!(outcome_pieces(&outcomes), vec!["step", "", "Hi"]);
689 assert_eq!(classifier.section, SampledTokenSection::Content);
690 }
691
692 #[test]
693 fn multi_token_close_marker_suppresses_every_marker_token() {
694 let markers = markers_with(
695 Some(vec![token(100)]),
696 Some(vec![token(200), token(201), token(202)]),
697 );
698 let mut classifier = synthetic_classifier(markers);
699 classifier.section = SampledTokenSection::Reasoning;
700
701 let mut outcomes = Vec::new();
702 for (id, decoded) in [(7, "r"), (200, "</"), (201, "thi"), (202, "nk>"), (9, "OK")] {
703 push_pending(&mut classifier, id, decoded);
704 classifier.try_consume_marker_at_tail();
705 outcomes.extend(classifier.drain_overflow());
706 }
707 outcomes.extend(classifier.flush());
708
709 assert_eq!(outcome_pieces(&outcomes), vec!["r", "", "", "", "OK"]);
710 assert_eq!(classifier.section, SampledTokenSection::Content);
711 }
712
713 #[test]
714 fn marker_prefix_that_diverges_does_not_suppress_buffered_tokens() {
715 let markers = markers_with(
716 Some(vec![token(100)]),
717 Some(vec![token(200), token(201), token(202)]),
718 );
719 let mut classifier = synthetic_classifier(markers);
720 classifier.section = SampledTokenSection::Reasoning;
721
722 let mut outcomes = Vec::new();
723 for (id, decoded) in [(7, "r"), (200, "a"), (201, "b"), (300, "x")] {
724 push_pending(&mut classifier, id, decoded);
725 classifier.try_consume_marker_at_tail();
726 outcomes.extend(classifier.drain_overflow());
727 }
728 outcomes.extend(classifier.flush());
729
730 assert_eq!(outcome_pieces(&outcomes), vec!["r", "a", "b", "x"]);
731 assert!(
732 outcomes
733 .iter()
734 .all(|outcome| matches!(outcome.sampled_token, SampledToken::Reasoning(_)))
735 );
736 assert_eq!(classifier.section, SampledTokenSection::Reasoning);
737 }
738
739 #[test]
740 fn open_then_close_back_to_back_emits_two_empty_pieces_around_zero_content() {
741 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
742 let mut classifier = synthetic_classifier(markers);
743 classifier.section = SampledTokenSection::Content;
744
745 let mut outcomes = Vec::new();
746 for (id, decoded) in [(100, "<think>"), (200, "</think>"), (9, "Hi")] {
747 push_pending(&mut classifier, id, decoded);
748 classifier.try_consume_marker_at_tail();
749 outcomes.extend(classifier.drain_overflow());
750 }
751 outcomes.extend(classifier.flush());
752
753 assert_eq!(
754 outcome_sections(&outcomes),
755 vec![
756 SampledTokenSection::Reasoning,
757 SampledTokenSection::Reasoning,
758 SampledTokenSection::Content,
759 ],
760 );
761 assert_eq!(outcome_pieces(&outcomes), vec!["", "", "Hi"]);
762 assert_eq!(classifier.section, SampledTokenSection::Content);
763 }
764
765 #[test]
766 fn spurious_reasoning_close_in_content_section_classifies_as_content() {
767 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
768 let mut classifier = synthetic_classifier(markers);
769 classifier.section = SampledTokenSection::Content;
770
771 push_pending(&mut classifier, 200, "</think>");
772 classifier.try_consume_marker_at_tail();
773 let outcomes = classifier.drain_overflow();
774
775 assert_eq!(
776 outcome_sections(&outcomes),
777 vec![SampledTokenSection::Content],
778 );
779 assert_eq!(classifier.section, SampledTokenSection::Content);
780 }
781
782 #[test]
783 fn spurious_tool_call_close_in_reasoning_section_classifies_as_tool_call() {
784 let markers = StreamingMarkers {
785 reasoning_open: Some(vec![token(100)]),
786 reasoning_close: Some(vec![token(200)]),
787 tool_call_open: Some(vec![token(300)]),
788 tool_call_close: Some(vec![token(400)]),
789 };
790 let mut classifier = synthetic_classifier(markers);
791 classifier.section = SampledTokenSection::ToolCall;
792
793 push_pending(&mut classifier, 400, "</tool_call>");
794 classifier.try_consume_marker_at_tail();
795 let outcomes = classifier.drain_overflow();
796
797 assert_eq!(
798 outcome_sections(&outcomes),
799 vec![SampledTokenSection::ToolCall],
800 );
801 assert_eq!(classifier.section, SampledTokenSection::Content);
802 }
803
804 #[test]
805 fn flush_drains_remaining_pending_at_eog() {
806 let markers = markers_with(
807 Some(vec![token(100)]),
808 Some(vec![token(200), token(201), token(202)]),
809 );
810 let mut classifier = synthetic_classifier(markers);
811 classifier.section = SampledTokenSection::Reasoning;
812
813 push_pending(&mut classifier, 7, "abc");
814 push_pending(&mut classifier, 200, "</");
815 push_pending(&mut classifier, 201, "th");
816
817 let outcomes = classifier.flush();
818
819 assert_eq!(outcome_pieces(&outcomes), vec!["abc", "</", "th"]);
820 assert!(classifier.pending.is_empty());
821 }
822
823 #[test]
824 fn no_markers_marks_each_token_undeterminable_with_visible_piece() {
825 let markers = StreamingMarkers::default();
826 let mut classifier = synthetic_classifier(markers);
827
828 push_pending(&mut classifier, 1, "h");
829 push_pending(&mut classifier, 2, "i");
830 let outcomes = classifier.flush();
831
832 assert_eq!(outcome_pieces(&outcomes), vec!["h", "i"]);
833 assert_eq!(
834 outcome_sections(&outcomes),
835 vec![SampledTokenSection::Pending, SampledTokenSection::Pending],
836 );
837 }
838
839 #[test]
840 fn ingest_prompt_tokens_without_markers_is_noop() {
841 let markers = StreamingMarkers::default();
842 let mut classifier = synthetic_classifier(markers);
843
844 push_pending_from_prompt(&mut classifier, 7);
845 push_pending_from_prompt(&mut classifier, 8);
846
847 assert_eq!(classifier.section, SampledTokenSection::Pending);
848 assert_eq!(classifier.usage().reasoning_tokens, 0);
849 assert_eq!(classifier.usage().content_tokens, 0);
850 assert_eq!(classifier.usage().tool_call_tokens, 0);
851 assert_eq!(classifier.usage().undeterminable_tokens, 0);
852 }
853
854 #[test]
855 fn ingest_prompt_tokens_through_open_close_pair_ends_in_content() {
856 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
857 let mut classifier = synthetic_classifier(markers);
858
859 for token_id in [100, 7, 200] {
860 push_pending_from_prompt(&mut classifier, token_id);
861 classifier.try_consume_marker_at_tail();
862 classifier.drain_overflow();
863 }
864
865 assert_eq!(classifier.section, SampledTokenSection::Content);
866 assert_eq!(classifier.usage().reasoning_tokens, 0);
867 assert_eq!(classifier.usage().content_tokens, 0);
868 assert_eq!(classifier.usage().tool_call_tokens, 0);
869 assert_eq!(classifier.usage().undeterminable_tokens, 0);
870 }
871
872 #[test]
873 fn ingest_prompt_tokens_through_open_only_ends_in_reasoning() {
874 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
875 let mut classifier = synthetic_classifier(markers);
876
877 for token_id in [100, 7] {
878 push_pending_from_prompt(&mut classifier, token_id);
879 classifier.try_consume_marker_at_tail();
880 classifier.drain_overflow();
881 }
882
883 assert_eq!(classifier.section, SampledTokenSection::Reasoning);
884 assert_eq!(classifier.usage().reasoning_tokens, 0);
885 assert_eq!(classifier.usage().content_tokens, 0);
886 }
887
888 #[test]
889 fn ingest_prompt_tokens_does_not_record_usage() {
890 let markers = markers_with(
891 Some(vec![token(100)]),
892 Some(vec![token(200), token(201), token(202)]),
893 );
894 let mut classifier = synthetic_classifier(markers);
895
896 for token_id in [100, 7, 8, 9, 200, 201, 202, 11] {
897 push_pending_from_prompt(&mut classifier, token_id);
898 classifier.try_consume_marker_at_tail();
899 classifier.drain_overflow();
900 }
901 let drained = classifier.flush();
902 assert!(drained.is_empty());
903
904 assert_eq!(classifier.usage().reasoning_tokens, 0);
905 assert_eq!(classifier.usage().content_tokens, 0);
906 assert_eq!(classifier.usage().tool_call_tokens, 0);
907 assert_eq!(classifier.usage().undeterminable_tokens, 0);
908 }
909
910 #[test]
911 fn prompt_token_completing_marker_with_generated_token_is_suppressed_correctly() {
912 let markers = markers_with(
913 Some(vec![token(100)]),
914 Some(vec![token(200), token(201), token(202)]),
915 );
916 let mut classifier = synthetic_classifier(markers);
917 classifier.section = SampledTokenSection::Reasoning;
918
919 for token_id in [200, 201] {
920 push_pending_from_prompt(&mut classifier, token_id);
921 classifier.try_consume_marker_at_tail();
922 classifier.drain_overflow();
923 }
924
925 assert_eq!(classifier.section, SampledTokenSection::Reasoning);
926 assert_eq!(classifier.pending.len(), 2);
927
928 classifier.pending.push_back(PendingToken {
929 token: token(202),
930 decoded: "k>".to_owned(),
931 section: classifier.section,
932 is_boundary: false,
933 is_from_prompt: false,
934 is_held_for_probe: false,
935 });
936 classifier.try_consume_marker_at_tail();
937 let outcomes = classifier.drain_overflow();
938
939 assert_eq!(outcomes.len(), 1);
940 assert!(matches!(
941 outcomes[0].sampled_token,
942 SampledToken::Reasoning(_)
943 ));
944 assert_eq!(outcomes[0].visible_piece, "");
945 assert_eq!(outcomes[0].raw_piece, "k>");
946
947 assert_eq!(classifier.section, SampledTokenSection::Content);
948 assert_eq!(classifier.usage().reasoning_tokens, 1);
949 assert_eq!(classifier.usage().content_tokens, 0);
950 }
951
952 #[test]
953 fn ingest_prompt_tokens_with_multiple_round_trips_ends_in_content() {
954 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
955 let mut classifier = synthetic_classifier(markers);
956
957 for token_id in [100, 7, 200, 100, 8, 200] {
958 push_pending_from_prompt(&mut classifier, token_id);
959 classifier.try_consume_marker_at_tail();
960 classifier.drain_overflow();
961 }
962
963 assert_eq!(classifier.section, SampledTokenSection::Content);
964 assert_eq!(classifier.usage().reasoning_tokens, 0);
965 assert_eq!(classifier.usage().content_tokens, 0);
966 assert_eq!(classifier.usage().tool_call_tokens, 0);
967 assert_eq!(classifier.usage().undeterminable_tokens, 0);
968 }
969
970 #[test]
971 fn ingest_prompt_tokens_initial_section_is_always_pending() {
972 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
973 let classifier = synthetic_classifier(markers);
974
975 assert_eq!(classifier.section, SampledTokenSection::Pending);
976 }
977
978 #[test]
979 fn ingest_prompt_tokens_then_drain_for_generated_token_classifies_correctly() {
980 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
981 let mut classifier = synthetic_classifier(markers);
982
983 for token_id in [100, 7, 200] {
984 push_pending_from_prompt(&mut classifier, token_id);
985 classifier.try_consume_marker_at_tail();
986 classifier.drain_overflow();
987 }
988
989 assert_eq!(classifier.section, SampledTokenSection::Content);
990 assert_eq!(classifier.usage().reasoning_tokens, 0);
991 assert_eq!(classifier.usage().content_tokens, 0);
992
993 classifier.pending.push_back(PendingToken {
994 token: token(50),
995 decoded: "hi".to_owned(),
996 section: classifier.section,
997 is_boundary: false,
998 is_from_prompt: false,
999 is_held_for_probe: false,
1000 });
1001 classifier.try_consume_marker_at_tail();
1002 let outcomes = classifier.drain_overflow();
1003
1004 assert_eq!(outcomes.len(), 1);
1005 assert!(matches!(
1006 outcomes[0].sampled_token,
1007 SampledToken::Content(_)
1008 ));
1009 assert_eq!(outcomes[0].visible_piece, "hi");
1010 assert_eq!(classifier.usage().content_tokens, 1);
1011 assert_eq!(classifier.usage().reasoning_tokens, 0);
1012 assert_eq!(classifier.usage().undeterminable_tokens, 0);
1013 }
1014
1015 #[test]
1016 fn close_marker_in_content_section_is_suppressed_as_boundary() {
1017 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
1018 let mut classifier = synthetic_classifier(markers);
1019 classifier.section = SampledTokenSection::Content;
1020
1021 let mut outcomes = Vec::new();
1022 for (id, decoded) in [(7, "hi"), (200, "</think>"), (8, "ok")] {
1023 push_pending(&mut classifier, id, decoded);
1024 classifier.try_consume_marker_at_tail();
1025 outcomes.extend(classifier.drain_overflow());
1026 }
1027 outcomes.extend(classifier.flush());
1028
1029 assert_eq!(
1030 outcome_sections(&outcomes),
1031 vec![
1032 SampledTokenSection::Content,
1033 SampledTokenSection::Content,
1034 SampledTokenSection::Content,
1035 ],
1036 );
1037 assert_eq!(outcome_pieces(&outcomes), vec!["hi", "", "ok"]);
1038 assert_eq!(classifier.section, SampledTokenSection::Content);
1039 }
1040
1041 #[test]
1042 fn open_marker_in_reasoning_section_is_suppressed_as_boundary() {
1043 let markers = markers_with(Some(vec![token(100)]), Some(vec![token(200)]));
1044 let mut classifier = synthetic_classifier(markers);
1045 classifier.section = SampledTokenSection::Reasoning;
1046
1047 let mut outcomes = Vec::new();
1048 for (id, decoded) in [(7, "step1"), (100, "<think>"), (8, "step2")] {
1049 push_pending(&mut classifier, id, decoded);
1050 classifier.try_consume_marker_at_tail();
1051 outcomes.extend(classifier.drain_overflow());
1052 }
1053 outcomes.extend(classifier.flush());
1054
1055 assert_eq!(outcome_pieces(&outcomes), vec!["step1", "", "step2"]);
1056 assert_eq!(classifier.section, SampledTokenSection::Reasoning);
1057 }
1058
1059 #[test]
1060 fn record_prompt_tokens_updates_usage() {
1061 let markers = markers_with(None, None);
1062 let mut classifier = synthetic_classifier(markers);
1063
1064 classifier.record_prompt_tokens(7);
1065
1066 assert_eq!(classifier.usage().prompt_tokens, 7);
1067 }
1068
1069 #[test]
1070 fn record_cached_prompt_tokens_updates_usage_when_under_limit() {
1071 let markers = markers_with(None, None);
1072 let mut classifier = synthetic_classifier(markers);
1073 classifier.record_prompt_tokens(10);
1074
1075 classifier.record_cached_prompt_tokens(3).unwrap();
1076
1077 assert_eq!(classifier.usage().cached_prompt_tokens, 3);
1078 }
1079
1080 #[test]
1081 fn record_cached_prompt_tokens_returns_error_when_over_prompt_total() {
1082 let markers = markers_with(None, None);
1083 let mut classifier = synthetic_classifier(markers);
1084 classifier.record_prompt_tokens(2);
1085
1086 let result = classifier.record_cached_prompt_tokens(5);
1087
1088 assert!(result.is_err());
1089 }
1090
1091 #[test]
1092 fn markers_accessor_returns_configured_markers() {
1093 let configured = markers_with(Some(vec![token(1)]), Some(vec![token(2)]));
1094 let classifier = synthetic_classifier(configured);
1095
1096 let returned = classifier.markers();
1097
1098 assert_eq!(returned.reasoning_open.as_deref(), Some(&[token(1)][..]));
1099 assert_eq!(returned.reasoning_close.as_deref(), Some(&[token(2)][..]));
1100 }
1101
1102 #[test]
1103 fn into_usage_consumes_classifier_and_yields_usage_snapshot() {
1104 let markers = markers_with(None, None);
1105 let mut classifier = synthetic_classifier(markers);
1106 classifier.record_prompt_tokens(11);
1107
1108 let usage = classifier.into_usage();
1109
1110 assert_eq!(usage.prompt_tokens, 11);
1111 }
1112
1113 #[test]
1114 fn spurious_tool_call_close_in_content_section_classifies_as_content() {
1115 let mut markers = markers_with(None, None);
1116 markers.tool_call_close = Some(vec![token(300)]);
1117 let mut classifier = synthetic_classifier(markers);
1118 classifier.section = SampledTokenSection::Content;
1119
1120 push_pending(&mut classifier, 300, "</tool_call>");
1121 classifier.try_consume_marker_at_tail();
1122 let outcomes = classifier.drain_overflow();
1123
1124 assert_eq!(
1125 outcome_sections(&outcomes),
1126 vec![SampledTokenSection::Content],
1127 );
1128 assert_eq!(classifier.section, SampledTokenSection::Content);
1129 }
1130
1131 fn markers_with_tool_call_open(tool_call_open: Vec<LlamaToken>) -> StreamingMarkers {
1132 StreamingMarkers {
1133 reasoning_open: None,
1134 reasoning_close: None,
1135 tool_call_open: Some(tool_call_open),
1136 tool_call_close: None,
1137 }
1138 }
1139
1140 fn feed_json_string(
1141 classifier: &mut SampledTokenClassifier<'_>,
1142 text: &str,
1143 starting_token_id: i32,
1144 ) -> Vec<IngestOutcome> {
1145 let mut outcomes = Vec::new();
1146 for (offset, ch) in text.char_indices() {
1147 let token_id = starting_token_id + i32::try_from(offset).unwrap_or(i32::MAX);
1148 let mut buffer = [0_u8; 4];
1149 let chunk = ch.encode_utf8(&mut buffer);
1150 outcomes.extend(push_and_probe(classifier, token_id, chunk));
1151 }
1152 outcomes
1153 }
1154
1155 #[test]
1156 fn json_probe_engages_when_first_non_whitespace_is_open_brace_in_content() {
1157 let markers = markers_with_tool_call_open(vec![token(900)]);
1158 let mut classifier = synthetic_classifier(markers);
1159 classifier.section = SampledTokenSection::Content;
1160
1161 push_and_probe(&mut classifier, 1, "{");
1162
1163 assert!(matches!(classifier.probe_mode, ProbeMode::Active(_)));
1164 }
1165
1166 #[test]
1167 fn json_probe_releases_tokens_as_tool_call_when_signature_matches() {
1168 let markers = markers_with_tool_call_open(vec![token(900)]);
1169 let mut classifier = synthetic_classifier(markers);
1170 classifier.section = SampledTokenSection::Content;
1171
1172 let outcomes = feed_json_string(&mut classifier, r#"{"name":"f","arguments":{}}"#, 100);
1173
1174 assert!(!outcomes.is_empty());
1175 assert!(
1176 outcomes
1177 .iter()
1178 .all(|outcome| matches!(outcome.sampled_token, SampledToken::ToolCall(_))),
1179 "every emitted outcome should be ToolCall, got {:?}",
1180 outcome_sections(&outcomes),
1181 );
1182 assert!(matches!(classifier.probe_mode, ProbeMode::Idle));
1183 }
1184
1185 #[test]
1186 fn json_probe_releases_tokens_as_content_when_signature_does_not_match() {
1187 let markers = markers_with_tool_call_open(vec![token(900)]);
1188 let mut classifier = synthetic_classifier(markers);
1189 classifier.section = SampledTokenSection::Content;
1190
1191 let outcomes = feed_json_string(&mut classifier, r#"{"foo":"bar"}"#, 100);
1192
1193 assert!(
1194 outcomes
1195 .iter()
1196 .all(|outcome| matches!(outcome.sampled_token, SampledToken::Content(_))),
1197 "every emitted outcome should be Content, got {:?}",
1198 outcome_sections(&outcomes),
1199 );
1200 assert!(matches!(classifier.probe_mode, ProbeMode::Idle));
1201 }
1202
1203 #[test]
1204 fn json_probe_releases_tokens_as_content_when_extra_top_level_key() {
1205 let markers = markers_with_tool_call_open(vec![token(900)]);
1206 let mut classifier = synthetic_classifier(markers);
1207 classifier.section = SampledTokenSection::Content;
1208
1209 let outcomes = feed_json_string(
1210 &mut classifier,
1211 r#"{"name":"f","arguments":{},"extra":1}"#,
1212 100,
1213 );
1214
1215 assert!(
1216 outcomes
1217 .iter()
1218 .all(|outcome| matches!(outcome.sampled_token, SampledToken::Content(_))),
1219 );
1220 }
1221
1222 #[test]
1223 fn json_probe_releases_tokens_as_content_when_arguments_is_not_object() {
1224 let markers = markers_with_tool_call_open(vec![token(900)]);
1225 let mut classifier = synthetic_classifier(markers);
1226 classifier.section = SampledTokenSection::Content;
1227
1228 let outcomes = feed_json_string(&mut classifier, r#"{"name":"f","arguments":"hi"}"#, 100);
1229
1230 assert!(
1231 outcomes
1232 .iter()
1233 .all(|outcome| matches!(outcome.sampled_token, SampledToken::Content(_))),
1234 );
1235 }
1236
1237 #[test]
1238 fn json_probe_handles_strings_with_quoted_braces_in_arguments() {
1239 let markers = markers_with_tool_call_open(vec![token(900)]);
1240 let mut classifier = synthetic_classifier(markers);
1241 classifier.section = SampledTokenSection::Content;
1242
1243 let outcomes = feed_json_string(
1244 &mut classifier,
1245 r#"{"name":"f","arguments":{"q":"a } b"}}"#,
1246 100,
1247 );
1248
1249 assert!(
1250 outcomes
1251 .iter()
1252 .all(|outcome| matches!(outcome.sampled_token, SampledToken::ToolCall(_))),
1253 );
1254 }
1255
1256 #[test]
1257 fn json_probe_handles_escaped_quotes_in_string_values() {
1258 let markers = markers_with_tool_call_open(vec![token(900)]);
1259 let mut classifier = synthetic_classifier(markers);
1260 classifier.section = SampledTokenSection::Content;
1261
1262 let outcomes = feed_json_string(
1263 &mut classifier,
1264 r#"{"name":"f","arguments":{"q":"he said \"hi\""}}"#,
1265 100,
1266 );
1267
1268 assert!(
1269 outcomes
1270 .iter()
1271 .all(|outcome| matches!(outcome.sampled_token, SampledToken::ToolCall(_))),
1272 );
1273 }
1274
1275 #[test]
1276 fn json_probe_handles_unicode_letters_in_strings() {
1277 let markers = markers_with_tool_call_open(vec![token(900)]);
1278 let mut classifier = synthetic_classifier(markers);
1279 classifier.section = SampledTokenSection::Content;
1280
1281 let outcomes = feed_json_string(
1282 &mut classifier,
1283 r#"{"name":"日本語","arguments":{"city":"パリ"}}"#,
1284 100,
1285 );
1286
1287 assert!(
1288 outcomes
1289 .iter()
1290 .all(|outcome| matches!(outcome.sampled_token, SampledToken::ToolCall(_))),
1291 );
1292 }
1293
1294 #[test]
1295 fn json_probe_handles_nested_objects() {
1296 let markers = markers_with_tool_call_open(vec![token(900)]);
1297 let mut classifier = synthetic_classifier(markers);
1298 classifier.section = SampledTokenSection::Content;
1299
1300 let outcomes = feed_json_string(
1301 &mut classifier,
1302 r#"{"name":"f","arguments":{"a":{"b":{"c":1}}}}"#,
1303 100,
1304 );
1305
1306 assert!(
1307 outcomes
1308 .iter()
1309 .all(|outcome| matches!(outcome.sampled_token, SampledToken::ToolCall(_))),
1310 );
1311 }
1312
1313 #[test]
1314 fn json_probe_handles_arrays_inside_arguments() {
1315 let markers = markers_with_tool_call_open(vec![token(900)]);
1316 let mut classifier = synthetic_classifier(markers);
1317 classifier.section = SampledTokenSection::Content;
1318
1319 let outcomes = feed_json_string(
1320 &mut classifier,
1321 r#"{"name":"f","arguments":{"items":[1,2,3]}}"#,
1322 100,
1323 );
1324
1325 assert!(
1326 outcomes
1327 .iter()
1328 .all(|outcome| matches!(outcome.sampled_token, SampledToken::ToolCall(_))),
1329 );
1330 }
1331
1332 #[test]
1333 fn json_probe_does_not_engage_when_first_byte_is_close_brace() {
1334 let markers = markers_with_tool_call_open(vec![token(900)]);
1335 let mut classifier = synthetic_classifier(markers);
1336 classifier.section = SampledTokenSection::Content;
1337
1338 let outcomes = feed_json_string(&mut classifier, "}}", 100);
1339
1340 assert!(matches!(classifier.probe_mode, ProbeMode::Idle));
1341 assert!(
1342 outcomes
1343 .iter()
1344 .all(|outcome| matches!(outcome.sampled_token, SampledToken::Content(_))),
1345 );
1346 }
1347
1348 #[test]
1349 fn json_probe_does_not_engage_in_reasoning_section() {
1350 let markers = StreamingMarkers {
1351 reasoning_open: Some(vec![token(800)]),
1352 reasoning_close: Some(vec![token(801)]),
1353 tool_call_open: Some(vec![token(900)]),
1354 tool_call_close: None,
1355 };
1356 let mut classifier = synthetic_classifier(markers);
1357 classifier.section = SampledTokenSection::Reasoning;
1358
1359 push_and_probe(&mut classifier, 1, "{");
1360
1361 assert!(matches!(classifier.probe_mode, ProbeMode::Idle));
1362 }
1363
1364 #[test]
1365 fn json_probe_does_not_engage_in_tool_call_section() {
1366 let markers = markers_with_tool_call_open(vec![token(900)]);
1367 let mut classifier = synthetic_classifier(markers);
1368 classifier.section = SampledTokenSection::ToolCall;
1369
1370 push_and_probe(&mut classifier, 1, "{");
1371
1372 assert!(matches!(classifier.probe_mode, ProbeMode::Idle));
1373 }
1374
1375 #[test]
1376 fn marker_probe_takes_precedence_when_both_could_match() {
1377 let markers = markers_with_tool_call_open(vec![token(900)]);
1378 let mut classifier = synthetic_classifier(markers);
1379 classifier.section = SampledTokenSection::Content;
1380
1381 let mut outcomes = Vec::new();
1382 outcomes.extend(push_and_probe(&mut classifier, 1, "{"));
1383 outcomes.extend(push_and_probe(&mut classifier, 900, r#"""#));
1384
1385 assert_eq!(classifier.section, SampledTokenSection::ToolCall);
1386 assert_eq!(outcome_pieces(&outcomes), vec!["{", ""]);
1387 assert_eq!(
1388 outcome_sections(&outcomes),
1389 vec![SampledTokenSection::Content, SampledTokenSection::ToolCall],
1390 );
1391 }
1392
1393 #[test]
1394 fn json_probe_consumes_two_consecutive_objects_separately() {
1395 let markers = markers_with_tool_call_open(vec![token(900)]);
1396 let mut classifier = synthetic_classifier(markers);
1397 classifier.section = SampledTokenSection::Content;
1398
1399 let mut outcomes = Vec::new();
1400 outcomes.extend(feed_json_string(
1401 &mut classifier,
1402 r#"{"name":"a","arguments":{}}"#,
1403 100,
1404 ));
1405 outcomes.extend(feed_json_string(
1406 &mut classifier,
1407 r#"{"name":"b","arguments":{"x":1}}"#,
1408 200,
1409 ));
1410
1411 assert!(
1412 outcomes
1413 .iter()
1414 .all(|outcome| matches!(outcome.sampled_token, SampledToken::ToolCall(_))),
1415 "two consecutive markerless tool calls must both classify as ToolCall, got {:?}",
1416 outcome_sections(&outcomes),
1417 );
1418 }
1419
1420 #[test]
1421 fn json_probe_with_leading_whitespace_then_open_brace_classifies_whitespace_as_content_and_json_as_tool_call()
1422 {
1423 let markers = markers_with_tool_call_open(vec![token(900)]);
1424 let mut classifier = synthetic_classifier(markers);
1425 classifier.section = SampledTokenSection::Content;
1426
1427 let outcomes = feed_json_string(
1428 &mut classifier,
1429 "\n {\"name\":\"f\",\"arguments\":{}}",
1430 100,
1431 );
1432
1433 let tool_call_count = outcomes
1434 .iter()
1435 .filter(|outcome| matches!(outcome.sampled_token, SampledToken::ToolCall(_)))
1436 .count();
1437 let content_count = outcomes
1438 .iter()
1439 .filter(|outcome| matches!(outcome.sampled_token, SampledToken::Content(_)))
1440 .count();
1441 assert_eq!(
1442 content_count, 3,
1443 "leading `\\n ` should classify as content"
1444 );
1445 assert!(
1446 tool_call_count > 0,
1447 "the JSON object should classify as ToolCall",
1448 );
1449 assert_eq!(content_count + tool_call_count, outcomes.len());
1450 }
1451
1452 #[test]
1453 fn json_probe_records_tool_call_token_usage_on_commit() {
1454 let markers = markers_with_tool_call_open(vec![token(900)]);
1455 let mut classifier = synthetic_classifier(markers);
1456 classifier.section = SampledTokenSection::Content;
1457
1458 let json = r#"{"name":"f","arguments":{}}"#;
1459 let outcomes = feed_json_string(&mut classifier, json, 100);
1460
1461 let emitted = outcomes.len();
1462 let usage = classifier.usage();
1463 assert_eq!(usage.tool_call_tokens, emitted as u64);
1464 assert_eq!(usage.content_tokens, 0);
1465 }
1466
1467 #[test]
1468 fn json_probe_records_content_token_usage_on_abandon() {
1469 let markers = markers_with_tool_call_open(vec![token(900)]);
1470 let mut classifier = synthetic_classifier(markers);
1471 classifier.section = SampledTokenSection::Content;
1472
1473 let json = r#"{"foo":"bar"}"#;
1474 let outcomes = feed_json_string(&mut classifier, json, 100);
1475
1476 let emitted = outcomes.len();
1477 let usage = classifier.usage();
1478 assert_eq!(usage.content_tokens, emitted as u64);
1479 assert_eq!(usage.tool_call_tokens, 0);
1480 }
1481
1482 #[test]
1483 fn flush_during_active_json_probe_releases_held_tokens_as_content() {
1484 let markers = markers_with_tool_call_open(vec![token(900)]);
1485 let mut classifier = synthetic_classifier(markers);
1486 classifier.section = SampledTokenSection::Content;
1487
1488 push_and_probe(&mut classifier, 1, "{");
1489 push_and_probe(&mut classifier, 2, r#""name""#);
1490 assert!(matches!(classifier.probe_mode, ProbeMode::Active(_)));
1491
1492 let outcomes = classifier.flush();
1493
1494 assert!(
1495 outcomes
1496 .iter()
1497 .all(|outcome| matches!(outcome.sampled_token, SampledToken::Content(_))),
1498 "mid-probe flush must release held tokens as Content, got {:?}",
1499 outcome_sections(&outcomes),
1500 );
1501 assert!(matches!(classifier.probe_mode, ProbeMode::Idle));
1502 }
1503}