Skip to main content

atomr_infer_core/
tokens.rs

1//! Output side: the streaming token chunks runners emit and the
2//! `RequestActor` accumulates.
3
4use serde::{Deserialize, Serialize};
5
6/// One streamed chunk. Local runtimes emit one per generated token (or
7/// per micro-batch); remote runtimes emit one per provider SSE event.
8#[derive(Debug, Clone, Serialize, Deserialize)]
9pub struct TokenChunk {
10    pub request_id: String,
11    pub text_delta: String,
12    /// Tool-call delta (provider-specific JSON). Carried opaquely
13    /// through the runtime; only the gateway interprets it.
14    #[serde(default, skip_serializing_if = "Option::is_none")]
15    pub tool_call_delta: Option<serde_json::Value>,
16    #[serde(default, skip_serializing_if = "Option::is_none")]
17    pub usage: Option<TokenUsage>,
18    #[serde(default, skip_serializing_if = "Option::is_none")]
19    pub finish_reason: Option<FinishReason>,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
23#[serde(rename_all = "snake_case")]
24#[non_exhaustive]
25pub enum FinishReason {
26    Stop,
27    Length,
28    ToolCalls,
29    ContentFilter,
30    Error,
31}
32
33#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
34pub struct TokenUsage {
35    pub input_tokens: u32,
36    pub output_tokens: u32,
37    /// Model-reported reasoning tokens (o1-style). Optional.
38    #[serde(default)]
39    pub reasoning_tokens: u32,
40    /// Cached prefix tokens, if the provider reports them (Anthropic
41    /// prompt-caching, OpenAI cached input).
42    #[serde(default)]
43    pub cached_tokens: u32,
44}
45
46impl TokenUsage {
47    pub fn add(&mut self, other: TokenUsage) {
48        self.input_tokens += other.input_tokens;
49        self.output_tokens += other.output_tokens;
50        self.reasoning_tokens += other.reasoning_tokens;
51        self.cached_tokens += other.cached_tokens;
52    }
53}
54
55/// Aggregate of one request's full output. Built by the `RequestActor`
56/// from the chunk stream; emitted to the upstream client as either an
57/// SSE response (streaming) or a single JSON body (unary).
58#[derive(Debug, Clone, Default, Serialize, Deserialize)]
59pub struct Tokens {
60    pub request_id: String,
61    pub text: String,
62    pub usage: TokenUsage,
63    pub finish_reason: Option<FinishReason>,
64}
65
66impl Tokens {
67    pub fn append(&mut self, chunk: &TokenChunk) {
68        self.text.push_str(&chunk.text_delta);
69        if let Some(u) = chunk.usage {
70            self.usage.add(u);
71        }
72        if let Some(r) = chunk.finish_reason {
73            self.finish_reason = Some(r);
74        }
75    }
76}