defect_agent/llm/chunk.rs
1//! Unified shape for streaming output chunks.
2
3use serde::{Deserialize, Serialize};
4
5/// A single event produced by a provider's streaming generation.
6///
7/// Semantic constraints:
8/// - The first event of a stream is always [`ProviderChunk::MessageStart`]
9/// - [`ProviderChunk::Stop`] is the last semantic event; only [`ProviderChunk::Usage`]
10/// may appear after it
11/// - [`ProviderChunk::ToolUseArgsDelta`] chunks with the same `tool_use_id` are
12/// concatenated in arrival order to form the complete argument JSON; concurrent tool
13/// uses are accumulated per id, independent of wire order
14/// - [`ProviderChunk::Usage`] may arrive multiple times; callers should accumulate each
15/// field
16#[non_exhaustive]
17#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
18#[serde(tag = "type", rename_all = "snake_case")]
19pub enum ProviderChunk {
20 /// First event of the stream, containing only session-level metadata.
21 MessageStart { id: String, model: String },
22
23 /// Incremental assistant text.
24 TextDelta { text: String },
25
26 /// Thinking-chain text delta (unified abstraction for Anthropic extended thinking,
27 /// DeepSeek `reasoning_content`, o1-style reasoning, etc.).
28 ThinkingDelta { text: String },
29
30 /// Thinking chain signature (Anthropic verification data for preserving thinking
31 /// across multiple turns).
32 /// Not text; should not be merged with [`ProviderChunk::ThinkingDelta`].
33 ThinkingSignature { signature: String },
34
35 /// Tool call start: declares a new `tool_use`; subsequent
36 /// [`ProviderChunk::ToolUseArgsDelta`] and [`ProviderChunk::ToolUseEnd`]
37 /// are linked to this call via `id`.
38 ToolUseStart { id: String, name: String },
39
40 /// A fragment of tool-use arguments.
41 ///
42 /// `fragment` is a raw byte slice; it is **not guaranteed to be a valid JSON
43 /// substring**. Callers must wait until the corresponding
44 /// [`ProviderChunk::ToolUseEnd`] is received before parsing the complete payload as
45 /// JSON.
46 ToolUseArgsDelta { id: String, fragment: String },
47
48 /// Tool call end: all `ArgsDelta` for this `id` have been sent.
49 ToolUseEnd { id: String },
50
51 /// The generation has ended.
52 Stop { reason: StopReason },
53
54 /// Token usage statistics. May arrive multiple times in a single stream; callers
55 /// should accumulate rather than overwrite.
56 Usage(Usage),
57}
58
59/// Semantic category of generation termination.
60#[non_exhaustive]
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
62#[serde(rename_all = "snake_case")]
63pub enum StopReason {
64 /// The model naturally ended the current turn.
65 EndTurn,
66 /// Hit the max_tokens limit.
67 MaxTokens,
68 /// Hit stop_sequence.
69 StopSequence,
70 /// Model requested a tool call; the caller should proceed with a subsequent
71 /// `tool_use` turn.
72 ToolUse,
73 /// Refusal due to safety policy.
74 Refusal,
75}
76
77/// Token usage statistics. Each field is `Option` to indicate that the provider does not
78/// report that field.
79///
80/// When the provider sends multiple responses, the caller should sum each field
81/// individually (treating `Option::None` as 0).
82#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
83pub struct Usage {
84 pub input_tokens: Option<u64>,
85 pub output_tokens: Option<u64>,
86 pub cache_read_input_tokens: Option<u64>,
87 pub cache_creation_input_tokens: Option<u64>,
88}