defect_core/llm/chunk.rs
1//! Unified shape for streaming output chunks.
2
3use serde::{Deserialize, Serialize};
4
5/// A single event produced by a provider's streaming generation.
6///
7/// Semantic constraints:
8/// - The first event of a stream is always [`ProviderChunk::MessageStart`]
9/// - [`ProviderChunk::Stop`] is the last semantic event; only [`ProviderChunk::Usage`]
10/// may appear after it
11/// - [`ProviderChunk::ToolUseArgsDelta`] chunks with the same `tool_use_id` are
12/// concatenated in arrival order to form the complete argument JSON; concurrent tool
13/// uses are accumulated per id, independent of wire order
14/// - [`ProviderChunk::Usage`] may arrive multiple times; callers should accumulate each
15/// field
16#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(tag = "type", rename_all = "snake_case")]
18pub enum ProviderChunk {
19 /// First event of the stream, containing only session-level metadata.
20 MessageStart { id: String, model: String },
21
22 /// Incremental assistant text.
23 TextDelta { text: String },
24
25 /// Thinking-chain text delta (unified abstraction for Anthropic extended thinking,
26 /// DeepSeek `reasoning_content`, o1-style reasoning, etc.).
27 ThinkingDelta { text: String },
28
29 /// Thinking chain signature (Anthropic verification data for preserving thinking
30 /// across multiple turns).
31 /// Not text; should not be merged with [`ProviderChunk::ThinkingDelta`].
32 ThinkingSignature { signature: String },
33
34 /// Tool call start: declares a new `tool_use`; subsequent
35 /// [`ProviderChunk::ToolUseArgsDelta`] and [`ProviderChunk::ToolUseEnd`]
36 /// are linked to this call via `id`.
37 ToolUseStart { id: String, name: String },
38
39 /// A fragment of tool-use arguments.
40 ///
41 /// `fragment` is a raw byte slice; it is **not guaranteed to be a valid JSON
42 /// substring**. Callers must wait until the corresponding
43 /// [`ProviderChunk::ToolUseEnd`] is received before parsing the complete payload as
44 /// JSON.
45 ToolUseArgsDelta { id: String, fragment: String },
46
47 /// Tool call end: all `ArgsDelta` for this `id` have been sent.
48 ToolUseEnd { id: String },
49
50 /// The generation has ended.
51 Stop { reason: StopReason },
52
53 /// Token usage statistics. May arrive multiple times in a single stream; callers
54 /// should accumulate rather than overwrite.
55 Usage(Usage),
56}
57
58/// Semantic category of generation termination.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
60#[serde(rename_all = "snake_case")]
61pub enum StopReason {
62 /// The model naturally ended the current turn.
63 EndTurn,
64 /// Hit the max_tokens limit.
65 MaxTokens,
66 /// Hit stop_sequence.
67 StopSequence,
68 /// Model requested a tool call; the caller should proceed with a subsequent
69 /// `tool_use` turn.
70 ToolUse,
71 /// Refusal due to safety policy.
72 Refusal,
73}
74
75/// Token usage statistics. Each field is `Option` to indicate that the provider does not
76/// report that field.
77///
78/// When the provider sends multiple responses, the caller should sum each field
79/// individually (treating `Option::None` as 0).
80#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
81pub struct Usage {
82 pub input_tokens: Option<u64>,
83 pub output_tokens: Option<u64>,
84 pub cache_read_input_tokens: Option<u64>,
85 pub cache_creation_input_tokens: Option<u64>,
86}