Skip to main content

defect_core/llm/
chunk.rs

1//! Unified shape for streaming output chunks.
2
3use serde::{Deserialize, Serialize};
4
5/// A single event produced by a provider's streaming generation.
6///
7/// Semantic constraints:
8/// - The first event of a stream is always [`ProviderChunk::MessageStart`]
9/// - [`ProviderChunk::Stop`] is the last semantic event; only [`ProviderChunk::Usage`]
10///   may appear after it
11/// - [`ProviderChunk::ToolUseArgsDelta`] chunks with the same `tool_use_id` are
12///   concatenated in arrival order to form the complete argument JSON; concurrent tool
13///   uses are accumulated per id, independent of wire order
14/// - [`ProviderChunk::Usage`] may arrive multiple times; callers should accumulate each
15///   field
16#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(tag = "type", rename_all = "snake_case")]
18pub enum ProviderChunk {
19    /// First event of the stream, containing only session-level metadata.
20    MessageStart { id: String, model: String },
21
22    /// Incremental assistant text.
23    TextDelta { text: String },
24
25    /// Thinking-chain text delta (unified abstraction for Anthropic extended thinking,
26    /// DeepSeek `reasoning_content`, o1-style reasoning, etc.).
27    ThinkingDelta { text: String },
28
29    /// Thinking chain signature (Anthropic verification data for preserving thinking
30    /// across multiple turns).
31    /// Not text; should not be merged with [`ProviderChunk::ThinkingDelta`].
32    ThinkingSignature { signature: String },
33
34    /// Tool call start: declares a new `tool_use`; subsequent
35    /// [`ProviderChunk::ToolUseArgsDelta`] and [`ProviderChunk::ToolUseEnd`]
36    /// are linked to this call via `id`.
37    ToolUseStart { id: String, name: String },
38
39    /// A fragment of tool-use arguments.
40    ///
41    /// `fragment` is a raw byte slice; it is **not guaranteed to be a valid JSON
42    /// substring**. Callers must wait until the corresponding
43    /// [`ProviderChunk::ToolUseEnd`] is received before parsing the complete payload as
44    /// JSON.
45    ToolUseArgsDelta { id: String, fragment: String },
46
47    /// Tool call end: all `ArgsDelta` for this `id` have been sent.
48    ToolUseEnd { id: String },
49
50    /// The generation has ended.
51    Stop { reason: StopReason },
52
53    /// Token usage statistics. May arrive multiple times in a single stream; callers
54    /// should accumulate rather than overwrite.
55    Usage(Usage),
56}
57
58/// Semantic category of generation termination.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
60#[serde(rename_all = "snake_case")]
61pub enum StopReason {
62    /// The model naturally ended the current turn.
63    EndTurn,
64    /// Hit the max_tokens limit.
65    MaxTokens,
66    /// Hit stop_sequence.
67    StopSequence,
68    /// Model requested a tool call; the caller should proceed with a subsequent
69    /// `tool_use` turn.
70    ToolUse,
71    /// Refusal due to safety policy.
72    Refusal,
73}
74
75/// Token usage statistics. Each field is `Option` to indicate that the provider does not
76/// report that field.
77///
78/// When the provider sends multiple responses, the caller should sum each field
79/// individually (treating `Option::None` as 0).
80#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
81pub struct Usage {
82    pub input_tokens: Option<u64>,
83    pub output_tokens: Option<u64>,
84    pub cache_read_input_tokens: Option<u64>,
85    pub cache_creation_input_tokens: Option<u64>,
86}