Skip to main content

quantum_sdk/
chat.rs

1use std::collections::HashMap;
2use std::pin::Pin;
3use std::task::{Context, Poll};
4
5use futures_util::Stream;
6use pin_project_lite::pin_project;
7use serde::{Deserialize, Serialize};
8
9use crate::client::Client;
10use crate::error::Result;
11
12/// Deserialize null as empty Vec (Gemini sometimes returns null for array fields).
13fn null_as_empty_vec<'de, D, T>(deserializer: D) -> std::result::Result<Vec<T>, D::Error>
14where
15    D: serde::Deserializer<'de>,
16    T: Deserialize<'de>,
17{
18    Option::<Vec<T>>::deserialize(deserializer).map(|v| v.unwrap_or_default())
19}
20
21/// Deserialize null as None for Option<Vec<T>> fields.
22fn deserialize_opt_vec<'de, D, T>(deserializer: D) -> std::result::Result<Option<Vec<T>>, D::Error>
23where
24    D: serde::Deserializer<'de>,
25    T: Deserialize<'de>,
26{
27    // null → None, [] → Some([]), [...] → Some([...])
28    Ok(Option::<Vec<T>>::deserialize(deserializer).unwrap_or(None))
29}
30
31/// Request body for text generation.
32#[derive(Debug, Clone, Serialize, Default)]
33pub struct ChatRequest {
34    /// Model ID that determines provider routing (e.g. "claude-sonnet-4-6", "grok-4-1-fast-non-reasoning").
35    pub model: String,
36
37    /// Conversation history.
38    pub messages: Vec<ChatMessage>,
39
40    /// Functions the model can call.
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub tools: Option<Vec<ChatTool>>,
43
44    /// Constrains tool use: "auto" (default), "any" (force tool use), "none", or a specific tool name.
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub tool_choice: Option<String>,
47
48    /// JSON Schema for structured output constraints.
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub output_schema: Option<serde_json::Value>,
51
52    /// Enables server-sent event streaming. Set automatically by `chat_stream`.
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub stream: Option<bool>,
55
56    /// Controls randomness (0.0-2.0).
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub temperature: Option<f64>,
59
60    /// Limits the response length.
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub max_tokens: Option<i32>,
63
64    /// Provider-specific settings (e.g. Anthropic thinking, xAI search).
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub provider_options: Option<HashMap<String, serde_json::Value>>,
67}
68
69/// A single message in a conversation.
70#[derive(Debug, Clone, Serialize, Deserialize, Default)]
71pub struct ChatMessage {
72    /// One of "system", "user", "assistant", or "tool".
73    pub role: String,
74
75    /// Text content of the message.
76    #[serde(skip_serializing_if = "Option::is_none")]
77    pub content: Option<String>,
78
79    /// Structured content for assistant messages with tool calls.
80    /// When present, takes precedence over `content`.
81    #[serde(skip_serializing_if = "Option::is_none", deserialize_with = "deserialize_opt_vec", default)]
82    pub content_blocks: Option<Vec<ContentBlock>>,
83
84    /// Required when role is "tool" — references the tool_use ID.
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub tool_call_id: Option<String>,
87
88    /// Whether a tool result is an error.
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub is_error: Option<bool>,
91}
92
93impl ChatMessage {
94    /// Creates a user message.
95    pub fn user(content: impl Into<String>) -> Self {
96        Self {
97            role: "user".to_string(),
98            content: Some(content.into()),
99            ..Default::default()
100        }
101    }
102
103    /// Creates an assistant message.
104    pub fn assistant(content: impl Into<String>) -> Self {
105        Self {
106            role: "assistant".to_string(),
107            content: Some(content.into()),
108            ..Default::default()
109        }
110    }
111
112    /// Creates a system message.
113    pub fn system(content: impl Into<String>) -> Self {
114        Self {
115            role: "system".to_string(),
116            content: Some(content.into()),
117            ..Default::default()
118        }
119    }
120
121    /// Creates a tool result message.
122    pub fn tool_result(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
123        Self {
124            role: "tool".to_string(),
125            content: Some(content.into()),
126            tool_call_id: Some(tool_call_id.into()),
127            ..Default::default()
128        }
129    }
130
131    /// Creates a tool error result message.
132    pub fn tool_error(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
133        Self {
134            role: "tool".to_string(),
135            content: Some(content.into()),
136            tool_call_id: Some(tool_call_id.into()),
137            is_error: Some(true),
138            ..Default::default()
139        }
140    }
141}
142
143/// A single block in the response content array.
144#[derive(Debug, Clone, Serialize, Deserialize, Default)]
145pub struct ContentBlock {
146    /// One of "text", "thinking", or "tool_use".
147    #[serde(rename = "type")]
148    pub block_type: String,
149
150    /// Content for "text" and "thinking" blocks.
151    #[serde(skip_serializing_if = "Option::is_none")]
152    pub text: Option<String>,
153
154    /// Tool call identifier for "tool_use" blocks.
155    #[serde(skip_serializing_if = "Option::is_none")]
156    pub id: Option<String>,
157
158    /// Function name for "tool_use" blocks.
159    #[serde(skip_serializing_if = "Option::is_none")]
160    pub name: Option<String>,
161
162    /// Function arguments for "tool_use" blocks.
163    #[serde(skip_serializing_if = "Option::is_none")]
164    pub input: Option<HashMap<String, serde_json::Value>>,
165
166    /// Gemini thought signature — must be echoed back with tool results.
167    #[serde(skip_serializing_if = "Option::is_none")]
168    pub thought_signature: Option<String>,
169
170    /// Base64-encoded data for file/image content blocks.
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub data: Option<String>,
173
174    /// Filename for file content blocks.
175    #[serde(skip_serializing_if = "Option::is_none")]
176    pub file_name: Option<String>,
177
178    /// MIME type for file/image/file_uri content blocks.
179    #[serde(skip_serializing_if = "Option::is_none")]
180    pub mime_type: Option<String>,
181
182    /// Remote-resource URL for `file_uri` content blocks. Gemini
183    /// accepts YouTube URLs verbatim here (with `mime_type: "video/mp4"`)
184    /// — no upload step needed for public videos. Other providers
185    /// may require a pre-uploaded resource URI; unsupported URIs are
186    /// silently skipped server-side rather than erroring the request.
187    #[serde(skip_serializing_if = "Option::is_none")]
188    pub file_uri: Option<String>,
189}
190
191/// Defines a function the model can call.
192#[derive(Debug, Clone, Serialize, Default)]
193pub struct ChatTool {
194    /// Function name.
195    pub name: String,
196
197    /// Explains what the function does.
198    pub description: String,
199
200    /// JSON Schema for the function's arguments.
201    #[serde(skip_serializing_if = "Option::is_none")]
202    pub parameters: Option<serde_json::Value>,
203
204    /// Enable guaranteed schema validation on tool inputs (Anthropic, OpenAI).
205    #[serde(skip_serializing_if = "Option::is_none")]
206    pub strict: Option<bool>,
207}
208
209/// Response from a non-streaming chat request.
210#[derive(Debug, Clone, Deserialize)]
211pub struct ChatResponse {
212    /// Unique request identifier.
213    pub id: String,
214
215    /// Model that generated the response.
216    pub model: String,
217
218    /// List of content blocks (text, thinking, tool_use).
219    #[serde(default, deserialize_with = "null_as_empty_vec")]
220    pub content: Vec<ContentBlock>,
221
222    /// Token counts and cost.
223    pub usage: Option<ChatUsage>,
224
225    /// Why generation stopped ("end_turn", "tool_use", "max_tokens").
226    #[serde(default)]
227    pub stop_reason: String,
228
229    /// Citations from web search (when search is enabled via provider_options).
230    #[serde(default, deserialize_with = "null_as_empty_vec")]
231    pub citations: Vec<Citation>,
232
233    /// Total cost from the X-QAI-Cost-Ticks header.
234    #[serde(skip)]
235    pub cost_ticks: i64,
236
237    /// From the X-QAI-Request-Id header.
238    #[serde(skip)]
239    pub request_id: String,
240}
241
242impl ChatResponse {
243    /// Returns the concatenated text content, ignoring thinking and tool_use blocks.
244    pub fn text(&self) -> String {
245        self.content
246            .iter()
247            .filter(|b| b.block_type == "text")
248            .filter_map(|b| b.text.as_deref())
249            .collect::<Vec<_>>()
250            .join("")
251    }
252
253    /// Returns the concatenated thinking content.
254    pub fn thinking(&self) -> String {
255        self.content
256            .iter()
257            .filter(|b| b.block_type == "thinking")
258            .filter_map(|b| b.text.as_deref())
259            .collect::<Vec<_>>()
260            .join("")
261    }
262
263    /// Returns all tool_use blocks from the response.
264    pub fn tool_calls(&self) -> Vec<&ContentBlock> {
265        self.content
266            .iter()
267            .filter(|b| b.block_type == "tool_use")
268            .collect()
269    }
270}
271
272/// A source reference from web search grounding.
273#[derive(Debug, Clone, Deserialize, Serialize)]
274pub struct Citation {
275    /// Title of the cited source.
276    #[serde(default)]
277    pub title: String,
278
279    /// URL of the cited source.
280    #[serde(default)]
281    pub url: String,
282
283    /// Relevant text snippet from the source.
284    #[serde(default)]
285    pub text: String,
286
287    /// Position in the response.
288    #[serde(default)]
289    pub index: i32,
290}
291
292/// Token counts and cost for a chat response.
293#[derive(Debug, Clone, Deserialize)]
294pub struct ChatUsage {
295    pub input_tokens: i32,
296    pub output_tokens: i32,
297    pub cost_ticks: i64,
298
299    /// Input tokens served from the provider's prompt cache, billed
300    /// at the (lower) cached rate. Omitted on responses with no
301    /// cache hit. Backend wire shape: `cached_tokens` (i32);
302    /// promoted to Option<i64> here for headroom on future
303    /// long-cache scenarios (multi-hour video transcripts, etc.).
304    /// Multi-turn billing audits reconcile by computing
305    /// (non-cached) input_tokens vs (cached) cached_tokens — both
306    /// roll into the provider's billable total.
307    #[serde(default)]
308    pub cached_tokens: Option<i64>,
309
310    /// Sub-component of `output_tokens` that was model reasoning /
311    /// thinking output (Gemini 3.x's `thoughtTokens`, OpenAI o-
312    /// series' reasoning tokens, Anthropic's extended-thinking
313    /// blocks). Omitted on responses from non-reasoning models.
314    /// Total billable output = `output_tokens` (which already
315    /// includes the reasoning component — this field is just
316    /// transparency on how much of that was thinking).
317    #[serde(default)]
318    pub reasoning_tokens: Option<i64>,
319}
320
321/// Response shape from `POST /qai/v1/chat/estimate`. Returned by
322/// `Client::estimate_chat`.
323///
324/// `estimated_cost_ticks` is the upfront reservation the actual
325/// `chat` call would book — it's a worst-case ceiling, not a
326/// prediction of the final settle. For text-only payloads, expected
327/// settle is close to this number; for video / multimodal inputs
328/// the ceiling can over-estimate (Gemini's reasoning budget +
329/// `max_tokens` cap drive the output side) and the post-call
330/// settle refunds the difference. Either way, this is the number
331/// the user must have available to send the request.
332///
333/// `estimated_cost_usd` is the same value pre-divided by
334/// `TicksPerUSD` for convenience — no need to know the per-tick
335/// rate on the client.
336#[derive(Debug, Clone, Deserialize)]
337pub struct EstimateResponse {
338    pub estimated_cost_ticks: i64,
339    pub estimated_cost_usd: f64,
340    /// Echo of the model name the estimate was computed against —
341    /// handy when the caller wants to display "≈ X credits on
342    /// gemini-flash-latest" without re-reading the request.
343    #[serde(default)]
344    pub model: String,
345}
346
347/// A single event from an SSE chat stream.
348///
349/// Tool-use streaming uses a triplet of events since v0.7:
350/// `tool_use_start` carries `tool_use_start`, `tool_use_input_delta`
351/// carries `tool_use_input_delta`, and `tool_use_complete` carries
352/// `tool_use_complete`. The legacy atomic `tool_use` event is still
353/// emitted by backends that haven't shipped the triplet yet — for new
354/// code, prefer the triplet fields.
355#[derive(Debug, Clone)]
356pub struct StreamEvent {
357    /// Event type: "content_delta", "thinking_delta",
358    /// "tool_use_start", "tool_use_input_delta", "tool_use_complete",
359    /// "tool_use" (legacy), "usage", "heartbeat", "error", "done".
360    pub event_type: String,
361
362    /// Incremental text for content_delta and thinking_delta events.
363    pub delta: Option<StreamDelta>,
364
365    /// Populated for legacy atomic tool_use events.
366    pub tool_use: Option<StreamToolUse>,
367
368    /// Populated for tool_use_start events.
369    pub tool_use_start: Option<StreamToolUseStart>,
370
371    /// Populated for tool_use_input_delta events.
372    pub tool_use_input_delta: Option<StreamToolUseInputDelta>,
373
374    /// Populated for tool_use_complete events.
375    pub tool_use_complete: Option<StreamToolUseComplete>,
376
377    /// Populated for usage events.
378    pub usage: Option<ChatUsage>,
379
380    /// Populated for error events.
381    pub error: Option<String>,
382
383    /// True when the stream is complete.
384    pub done: bool,
385}
386
387/// Incremental text in a streaming event.
388#[derive(Debug, Clone, Deserialize)]
389pub struct StreamDelta {
390    pub text: String,
391}
392
393/// A tool call from a legacy (atomic) streaming event.
394#[derive(Debug, Clone, Deserialize)]
395pub struct StreamToolUse {
396    pub id: String,
397    pub name: String,
398    pub input: HashMap<String, serde_json::Value>,
399}
400
401/// Tool-call start event — fires once before any input deltas.
402#[derive(Debug, Clone, Deserialize)]
403pub struct StreamToolUseStart {
404    pub id: String,
405    pub name: String,
406}
407
408/// Tool-call input delta — fires zero or more times with raw JSON fragments.
409#[derive(Debug, Clone, Deserialize)]
410pub struct StreamToolUseInputDelta {
411    pub id: String,
412    /// Raw JSON fragment. May not parse on its own; accumulate until
413    /// the corresponding `tool_use_complete` event arrives with the
414    /// authoritative `input`.
415    pub partial_json: String,
416}
417
418/// Tool-call completion event — fires exactly once per call with the
419/// server-accumulated, fully-parsed arguments.
420#[derive(Debug, Clone, Deserialize)]
421pub struct StreamToolUseComplete {
422    pub id: String,
423    pub name: String,
424    pub input: HashMap<String, serde_json::Value>,
425}
426
427/// Raw JSON from the SSE stream before parsing into typed fields.
428#[derive(Deserialize)]
429struct RawStreamEvent {
430    #[serde(rename = "type")]
431    event_type: String,
432    #[serde(default)]
433    delta: Option<StreamDelta>,
434    #[serde(default)]
435    id: Option<String>,
436    #[serde(default)]
437    name: Option<String>,
438    #[serde(default)]
439    input: Option<HashMap<String, serde_json::Value>>,
440    /// Carried by `tool_use_input_delta` events — a raw JSON fragment.
441    #[serde(default)]
442    partial_json: Option<String>,
443    #[serde(default)]
444    input_tokens: Option<i32>,
445    #[serde(default)]
446    output_tokens: Option<i32>,
447    #[serde(default)]
448    cost_ticks: Option<i64>,
449    #[serde(default)]
450    message: Option<String>,
451}
452
453pin_project! {
454    /// An async stream of [`StreamEvent`]s from an SSE chat response.
455    pub struct ChatStream {
456        #[pin]
457        inner: Pin<Box<dyn Stream<Item = StreamEvent> + Send>>,
458    }
459}
460
461impl Stream for ChatStream {
462    type Item = StreamEvent;
463
464    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
465        self.project().inner.poll_next(cx)
466    }
467}
468
469impl Client {
470    /// Sends a non-streaming text generation request.
471    pub async fn chat(&self, req: &ChatRequest) -> Result<ChatResponse> {
472        let mut req = req.clone();
473        req.stream = Some(false);
474
475        let (mut resp, meta) = self.post_json::<ChatRequest, ChatResponse>("/qai/v1/chat", &req).await?;
476        resp.cost_ticks = meta.cost_ticks;
477        resp.request_id = meta.request_id;
478        if resp.model.is_empty() {
479            resp.model = meta.model;
480        }
481        Ok(resp)
482    }
483
484    /// Estimates the upfront credit reservation a `chat` call with
485    /// the same `ChatRequest` would book — WITHOUT making the
486    /// provider call or deducting credits. Use this to render a
487    /// "this will cost ≈ X credits" hint in the UI before the user
488    /// commits to a payload (notably long YouTube videos attached
489    /// via `ContentBlock.file_uri`, which bill at ~263 tokens per
490    /// second and can dwarf a text-only chat by 1000×).
491    ///
492    /// Wraps `POST /qai/v1/chat/estimate`. Same auth as `chat()`.
493    ///
494    /// # Example
495    ///
496    /// ```no_run
497    /// # async fn example() -> quantum_sdk::Result<()> {
498    /// let client = quantum_sdk::Client::new("qai_...");
499    /// let req = quantum_sdk::ChatRequest {
500    ///     model: "gemini-flash-latest".into(),
501    ///     messages: vec![quantum_sdk::ChatMessage::user("hi")],
502    ///     ..Default::default()
503    /// };
504    /// let est = client.estimate_chat(&req).await?;
505    /// println!("would cost ~{} ticks (~${})", est.estimated_cost_ticks, est.estimated_cost_usd);
506    /// # Ok(())
507    /// # }
508    /// ```
509    pub async fn estimate_chat(&self, req: &ChatRequest) -> Result<EstimateResponse> {
510        // Drop `stream` from the estimate payload — the backend
511        // doesn't care about streaming for cost purposes (output
512        // ceiling is the same either way) and including it would
513        // make the SDK shape diverge needlessly from the JSON the
514        // server sees on the wire.
515        let mut req = req.clone();
516        req.stream = None;
517        let (resp, _meta) = self
518            .post_json::<ChatRequest, EstimateResponse>("/qai/v1/chat/estimate", &req)
519            .await?;
520        Ok(resp)
521    }
522
523    /// Sends a streaming text generation request and returns an async stream of events.
524    ///
525    /// # Example
526    ///
527    /// ```no_run
528    /// use futures_util::StreamExt;
529    ///
530    /// # async fn example() -> quantum_sdk::Result<()> {
531    /// let client = quantum_sdk::Client::new("key");
532    /// let req = quantum_sdk::ChatRequest {
533    ///     model: "claude-sonnet-4-6".into(),
534    ///     messages: vec![quantum_sdk::ChatMessage::user("Hello!")],
535    ///     ..Default::default()
536    /// };
537    /// let mut stream = client.chat_stream(&req).await?;
538    /// while let Some(ev) = stream.next().await {
539    ///     if let Some(delta) = &ev.delta {
540    ///         print!("{}", delta.text);
541    ///     }
542    /// }
543    /// # Ok(())
544    /// # }
545    /// ```
546    pub async fn chat_stream(&self, req: &ChatRequest) -> Result<ChatStream> {
547        let mut req = req.clone();
548        req.stream = Some(true);
549
550        let (resp, _meta) = self.post_stream_raw("/qai/v1/chat", &req).await?;
551
552        let byte_stream = resp.bytes_stream();
553        let event_stream = sse_to_events(byte_stream);
554
555        Ok(ChatStream {
556            inner: Box::pin(event_stream),
557        })
558    }
559}
560
561/// Converts a byte stream into a stream of parsed [`StreamEvent`]s.
562fn sse_to_events<S>(byte_stream: S) -> impl Stream<Item = StreamEvent> + Send
563where
564    S: Stream<Item = std::result::Result<bytes::Bytes, reqwest::Error>> + Send + 'static,
565{
566    // Pin the byte stream so we can poll it inside unfold.
567    let pinned_stream = Box::pin(byte_stream);
568
569    // Accumulate raw bytes into lines to avoid splitting multi-byte UTF-8 characters.
570    // Only convert to String when we have a complete newline-terminated line.
571    let line_stream = futures_util::stream::unfold(
572        (pinned_stream, Vec::<u8>::new()),
573        |(mut stream, mut buffer)| async move {
574            use futures_util::StreamExt;
575            loop {
576                // Check if we have a complete line in the buffer.
577                if let Some(newline_pos) = buffer.iter().position(|&b| b == b'\n') {
578                    let mut line_bytes = buffer[..newline_pos].to_vec();
579                    buffer = buffer[newline_pos + 1..].to_vec();
580                    // Trim trailing \r
581                    if line_bytes.last() == Some(&b'\r') {
582                        line_bytes.pop();
583                    }
584                    let line = String::from_utf8_lossy(&line_bytes).into_owned();
585                    return Some((line, (stream, buffer)));
586                }
587
588                // Read more data.
589                match stream.next().await {
590                    Some(Ok(chunk)) => {
591                        buffer.extend_from_slice(&chunk);
592                    }
593                    Some(Err(_)) | None => {
594                        // Stream ended. Emit remaining buffer if non-empty.
595                        if !buffer.is_empty() {
596                            let remaining = String::from_utf8_lossy(&buffer).into_owned();
597                            buffer.clear();
598                            return Some((remaining, (stream, buffer)));
599                        }
600                        return None;
601                    }
602                }
603            }
604        },
605    );
606
607    let pinned_lines = Box::pin(line_stream);
608    futures_util::stream::unfold(pinned_lines, |mut lines| async move {
609        use futures_util::StreamExt;
610        loop {
611            let line = lines.next().await?;
612
613            if !line.starts_with("data: ") {
614                continue;
615            }
616            let payload = &line["data: ".len()..];
617
618            if payload == "[DONE]" {
619                let ev = StreamEvent {
620                    event_type: "done".to_string(),
621                    delta: None,
622                    tool_use: None,
623                    tool_use_start: None,
624                    tool_use_input_delta: None,
625                    tool_use_complete: None,
626                    usage: None,
627                    error: None,
628                    done: true,
629                };
630                return Some((ev, lines));
631            }
632
633            let raw: RawStreamEvent = match serde_json::from_str(payload) {
634                Ok(r) => r,
635                Err(e) => {
636                    let ev = StreamEvent {
637                        event_type: "error".to_string(),
638                        delta: None,
639                        tool_use: None,
640                        tool_use_start: None,
641                        tool_use_input_delta: None,
642                        tool_use_complete: None,
643                        usage: None,
644                        error: Some(format!("parse SSE: {e}")),
645                        done: false,
646                    };
647                    return Some((ev, lines));
648                }
649            };
650
651            let mut ev = StreamEvent {
652                event_type: raw.event_type.clone(),
653                delta: None,
654                tool_use: None,
655                tool_use_start: None,
656                tool_use_input_delta: None,
657                tool_use_complete: None,
658                usage: None,
659                error: None,
660                done: false,
661            };
662
663            match raw.event_type.as_str() {
664                "content_delta" | "thinking_delta" => {
665                    ev.delta = raw.delta;
666                }
667                "tool_use" => {
668                    // Legacy atomic event — kept for back-compat with
669                    // backends that haven't shipped the triplet (v0.7+).
670                    ev.tool_use = Some(StreamToolUse {
671                        id: raw.id.unwrap_or_default(),
672                        name: raw.name.unwrap_or_default(),
673                        input: raw.input.unwrap_or_default(),
674                    });
675                }
676                "tool_use_start" => {
677                    ev.tool_use_start = Some(StreamToolUseStart {
678                        id: raw.id.unwrap_or_default(),
679                        name: raw.name.unwrap_or_default(),
680                    });
681                }
682                "tool_use_input_delta" => {
683                    ev.tool_use_input_delta = Some(StreamToolUseInputDelta {
684                        id: raw.id.unwrap_or_default(),
685                        partial_json: raw.partial_json.unwrap_or_default(),
686                    });
687                }
688                "tool_use_complete" => {
689                    ev.tool_use_complete = Some(StreamToolUseComplete {
690                        id: raw.id.unwrap_or_default(),
691                        name: raw.name.unwrap_or_default(),
692                        input: raw.input.unwrap_or_default(),
693                    });
694                }
695                "usage" => {
696                    ev.usage = Some(ChatUsage {
697                        input_tokens: raw.input_tokens.unwrap_or(0),
698                        output_tokens: raw.output_tokens.unwrap_or(0),
699                        cost_ticks: raw.cost_ticks.unwrap_or(0),
700                        // Stream "usage" events carry only the
701                        // running totals — cached/reasoning splits
702                        // arrive on the final non-stream envelope.
703                        // None here is faithful, not a bug.
704                        cached_tokens: None,
705                        reasoning_tokens: None,
706                    });
707                }
708                "error" => {
709                    ev.error = raw.message;
710                }
711                "heartbeat" => {}
712                _ => {}
713            }
714
715            return Some((ev, lines));
716        }
717    })
718}