Skip to main content

atomr_infer_core/
tokens.rs

1//! Output side: the streaming token chunks runners emit and the
2//! `RequestActor` accumulates.
3
4use serde::{Deserialize, Serialize};
5
6/// One streamed chunk. Local runtimes emit one per generated token (or
7/// per micro-batch); remote runtimes emit one per provider SSE event.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct TokenChunk {
10    pub request_id: String,
11    pub text_delta: String,
12    /// Tool-call delta (provider-specific JSON). Carried opaquely
13    /// through the runtime; only the gateway interprets it.
14    #[serde(default, skip_serializing_if = "Option::is_none")]
15    pub tool_call_delta: Option<serde_json::Value>,
16    #[serde(default, skip_serializing_if = "Option::is_none")]
17    pub usage: Option<TokenUsage>,
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub finish_reason: Option<FinishReason>,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
23#[serde(rename_all = "snake_case")]
24pub enum FinishReason {
25    Stop,
26    Length,
27    ToolCalls,
28    ContentFilter,
29    Error,
30}
31
32#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
33pub struct TokenUsage {
34    pub input_tokens: u32,
35    pub output_tokens: u32,
36    /// Model-reported reasoning tokens (o1-style). Optional.
37    #[serde(default)]
38    pub reasoning_tokens: u32,
39    /// Cached prefix tokens, if the provider reports them (Anthropic
40    /// prompt-caching, OpenAI cached input).
41    #[serde(default)]
42    pub cached_tokens: u32,
43}
44
45impl TokenUsage {
46    pub fn add(&mut self, other: TokenUsage) {
47        self.input_tokens += other.input_tokens;
48        self.output_tokens += other.output_tokens;
49        self.reasoning_tokens += other.reasoning_tokens;
50        self.cached_tokens += other.cached_tokens;
51    }
52}
53
54/// Aggregate of one request's full output. Built by the `RequestActor`
55/// from the chunk stream; emitted to the upstream client as either an
56/// SSE response (streaming) or a single JSON body (unary).
57#[derive(Debug, Clone, Default, Serialize, Deserialize)]
58pub struct Tokens {
59    pub request_id: String,
60    pub text: String,
61    pub usage: TokenUsage,
62    pub finish_reason: Option<FinishReason>,
63}
64
65impl Tokens {
66    pub fn append(&mut self, chunk: &TokenChunk) {
67        self.text.push_str(&chunk.text_delta);
68        if let Some(u) = chunk.usage {
69            self.usage.add(u);
70        }
71        if let Some(r) = chunk.finish_reason {
72            self.finish_reason = Some(r);
73        }
74    }
75}