Skip to main content

defect_agent/llm/
chunk.rs

1//! Unified shape for streaming output chunks.
2
3use serde::{Deserialize, Serialize};
4
5/// A single event produced by a provider's streaming generation.
6///
7/// Semantic constraints:
8/// - The first event of a stream is always [`ProviderChunk::MessageStart`]
9/// - [`ProviderChunk::Stop`] is the last semantic event; only [`ProviderChunk::Usage`]
10///   may appear after it
11/// - [`ProviderChunk::ToolUseArgsDelta`] chunks with the same `tool_use_id` are
12///   concatenated in arrival order to form the complete argument JSON; concurrent tool
13///   uses are accumulated per id, independent of wire order
14/// - [`ProviderChunk::Usage`] may arrive multiple times; callers should accumulate each
15///   field
16#[non_exhaustive]
17#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
18#[serde(tag = "type", rename_all = "snake_case")]
19pub enum ProviderChunk {
20    /// First event of the stream, containing only session-level metadata.
21    MessageStart { id: String, model: String },
22
23    /// Incremental assistant text.
24    TextDelta { text: String },
25
26    /// Thinking-chain text delta (unified abstraction for Anthropic extended thinking,
27    /// DeepSeek `reasoning_content`, o1-style reasoning, etc.).
28    ThinkingDelta { text: String },
29
30    /// Thinking chain signature (Anthropic verification data for preserving thinking
31    /// across multiple turns).
32    /// Not text; should not be merged with [`ProviderChunk::ThinkingDelta`].
33    ThinkingSignature { signature: String },
34
35    /// Tool call start: declares a new `tool_use`; subsequent
36    /// [`ProviderChunk::ToolUseArgsDelta`] and [`ProviderChunk::ToolUseEnd`]
37    /// are linked to this call via `id`.
38    ToolUseStart { id: String, name: String },
39
40    /// A fragment of tool-use arguments.
41    ///
42    /// `fragment` is a raw byte slice; it is **not guaranteed to be a valid JSON
43    /// substring**. Callers must wait until the corresponding
44    /// [`ProviderChunk::ToolUseEnd`] is received before parsing the complete payload as
45    /// JSON.
46    ToolUseArgsDelta { id: String, fragment: String },
47
48    /// Tool call end: all `ArgsDelta` for this `id` have been sent.
49    ToolUseEnd { id: String },
50
51    /// The generation has ended.
52    Stop { reason: StopReason },
53
54    /// Token usage statistics. May arrive multiple times in a single stream; callers
55    /// should accumulate rather than overwrite.
56    Usage(Usage),
57}
58
59/// Semantic category of generation termination.
60#[non_exhaustive]
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
62#[serde(rename_all = "snake_case")]
63pub enum StopReason {
64    /// The model naturally ended the current turn.
65    EndTurn,
66    /// Hit the max_tokens limit.
67    MaxTokens,
68    /// Hit stop_sequence.
69    StopSequence,
70    /// Model requested a tool call; the caller should proceed with a subsequent
71    /// `tool_use` turn.
72    ToolUse,
73    /// Refusal due to safety policy.
74    Refusal,
75}
76
77/// Token usage statistics. Each field is `Option` to indicate that the provider does not
78/// report that field.
79///
80/// When the provider sends multiple responses, the caller should sum each field
81/// individually (treating `Option::None` as 0).
82#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
83pub struct Usage {
84    pub input_tokens: Option<u64>,
85    pub output_tokens: Option<u64>,
86    pub cache_read_input_tokens: Option<u64>,
87    pub cache_creation_input_tokens: Option<u64>,
88}