quantum_sdk/chat.rs
1use std::collections::HashMap;
2use std::pin::Pin;
3use std::task::{Context, Poll};
4
5use futures_util::Stream;
6use pin_project_lite::pin_project;
7use serde::{Deserialize, Serialize};
8
9use crate::client::Client;
10use crate::error::Result;
11
12/// Deserialize null as empty Vec (Gemini sometimes returns null for array fields).
13fn null_as_empty_vec<'de, D, T>(deserializer: D) -> std::result::Result<Vec<T>, D::Error>
14where
15 D: serde::Deserializer<'de>,
16 T: Deserialize<'de>,
17{
18 Option::<Vec<T>>::deserialize(deserializer).map(|v| v.unwrap_or_default())
19}
20
21/// Deserialize null as None for Option<Vec<T>> fields.
22fn deserialize_opt_vec<'de, D, T>(deserializer: D) -> std::result::Result<Option<Vec<T>>, D::Error>
23where
24 D: serde::Deserializer<'de>,
25 T: Deserialize<'de>,
26{
27 // null → None, [] → Some([]), [...] → Some([...])
28 Ok(Option::<Vec<T>>::deserialize(deserializer).unwrap_or(None))
29}
30
31/// Request body for text generation.
32#[derive(Debug, Clone, Serialize, Default)]
33pub struct ChatRequest {
34 /// Model ID that determines provider routing (e.g. "claude-sonnet-4-6", "grok-4-1-fast-non-reasoning").
35 pub model: String,
36
37 /// Conversation history.
38 pub messages: Vec<ChatMessage>,
39
40 /// Functions the model can call.
41 #[serde(skip_serializing_if = "Option::is_none")]
42 pub tools: Option<Vec<ChatTool>>,
43
44 /// Constrains tool use: "auto" (default), "any" (force tool use), "none", or a specific tool name.
45 #[serde(skip_serializing_if = "Option::is_none")]
46 pub tool_choice: Option<String>,
47
48 /// JSON Schema for structured output constraints.
49 #[serde(skip_serializing_if = "Option::is_none")]
50 pub output_schema: Option<serde_json::Value>,
51
52 /// Enables server-sent event streaming. Set automatically by `chat_stream`.
53 #[serde(skip_serializing_if = "Option::is_none")]
54 pub stream: Option<bool>,
55
56 /// Controls randomness (0.0-2.0).
57 #[serde(skip_serializing_if = "Option::is_none")]
58 pub temperature: Option<f64>,
59
60 /// Limits the response length.
61 #[serde(skip_serializing_if = "Option::is_none")]
62 pub max_tokens: Option<i32>,
63
64 /// Provider-specific settings (e.g. Anthropic thinking, xAI search).
65 #[serde(skip_serializing_if = "Option::is_none")]
66 pub provider_options: Option<HashMap<String, serde_json::Value>>,
67}
68
69/// A single message in a conversation.
70#[derive(Debug, Clone, Serialize, Deserialize, Default)]
71pub struct ChatMessage {
72 /// One of "system", "user", "assistant", or "tool".
73 pub role: String,
74
75 /// Text content of the message.
76 #[serde(skip_serializing_if = "Option::is_none")]
77 pub content: Option<String>,
78
79 /// Structured content for assistant messages with tool calls.
80 /// When present, takes precedence over `content`.
81 #[serde(skip_serializing_if = "Option::is_none", deserialize_with = "deserialize_opt_vec", default)]
82 pub content_blocks: Option<Vec<ContentBlock>>,
83
84 /// Required when role is "tool" — references the tool_use ID.
85 #[serde(skip_serializing_if = "Option::is_none")]
86 pub tool_call_id: Option<String>,
87
88 /// Whether a tool result is an error.
89 #[serde(skip_serializing_if = "Option::is_none")]
90 pub is_error: Option<bool>,
91}
92
93impl ChatMessage {
94 /// Creates a user message.
95 pub fn user(content: impl Into<String>) -> Self {
96 Self {
97 role: "user".to_string(),
98 content: Some(content.into()),
99 ..Default::default()
100 }
101 }
102
103 /// Creates an assistant message.
104 pub fn assistant(content: impl Into<String>) -> Self {
105 Self {
106 role: "assistant".to_string(),
107 content: Some(content.into()),
108 ..Default::default()
109 }
110 }
111
112 /// Creates a system message.
113 pub fn system(content: impl Into<String>) -> Self {
114 Self {
115 role: "system".to_string(),
116 content: Some(content.into()),
117 ..Default::default()
118 }
119 }
120
121 /// Creates a tool result message.
122 pub fn tool_result(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
123 Self {
124 role: "tool".to_string(),
125 content: Some(content.into()),
126 tool_call_id: Some(tool_call_id.into()),
127 ..Default::default()
128 }
129 }
130
131 /// Creates a tool error result message.
132 pub fn tool_error(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
133 Self {
134 role: "tool".to_string(),
135 content: Some(content.into()),
136 tool_call_id: Some(tool_call_id.into()),
137 is_error: Some(true),
138 ..Default::default()
139 }
140 }
141}
142
143/// A single block in the response content array.
144#[derive(Debug, Clone, Serialize, Deserialize, Default)]
145pub struct ContentBlock {
146 /// One of "text", "thinking", or "tool_use".
147 #[serde(rename = "type")]
148 pub block_type: String,
149
150 /// Content for "text" and "thinking" blocks.
151 #[serde(skip_serializing_if = "Option::is_none")]
152 pub text: Option<String>,
153
154 /// Tool call identifier for "tool_use" blocks.
155 #[serde(skip_serializing_if = "Option::is_none")]
156 pub id: Option<String>,
157
158 /// Function name for "tool_use" blocks.
159 #[serde(skip_serializing_if = "Option::is_none")]
160 pub name: Option<String>,
161
162 /// Function arguments for "tool_use" blocks.
163 #[serde(skip_serializing_if = "Option::is_none")]
164 pub input: Option<HashMap<String, serde_json::Value>>,
165
166 /// Gemini thought signature — must be echoed back with tool results.
167 #[serde(skip_serializing_if = "Option::is_none")]
168 pub thought_signature: Option<String>,
169
170 /// Base64-encoded data for file/image content blocks.
171 #[serde(skip_serializing_if = "Option::is_none")]
172 pub data: Option<String>,
173
174 /// Filename for file content blocks.
175 #[serde(skip_serializing_if = "Option::is_none")]
176 pub file_name: Option<String>,
177
178 /// MIME type for file/image/file_uri content blocks.
179 #[serde(skip_serializing_if = "Option::is_none")]
180 pub mime_type: Option<String>,
181
182 /// Remote-resource URL for `file_uri` content blocks. Gemini
183 /// accepts YouTube URLs verbatim here (with `mime_type: "video/mp4"`)
184 /// — no upload step needed for public videos. Other providers
185 /// may require a pre-uploaded resource URI; unsupported URIs are
186 /// silently skipped server-side rather than erroring the request.
187 #[serde(skip_serializing_if = "Option::is_none")]
188 pub file_uri: Option<String>,
189}
190
191/// Defines a function the model can call.
192#[derive(Debug, Clone, Serialize, Default)]
193pub struct ChatTool {
194 /// Function name.
195 pub name: String,
196
197 /// Explains what the function does.
198 pub description: String,
199
200 /// JSON Schema for the function's arguments.
201 #[serde(skip_serializing_if = "Option::is_none")]
202 pub parameters: Option<serde_json::Value>,
203
204 /// Enable guaranteed schema validation on tool inputs (Anthropic, OpenAI).
205 #[serde(skip_serializing_if = "Option::is_none")]
206 pub strict: Option<bool>,
207}
208
209/// Response from a non-streaming chat request.
210#[derive(Debug, Clone, Deserialize)]
211pub struct ChatResponse {
212 /// Unique request identifier.
213 pub id: String,
214
215 /// Model that generated the response.
216 pub model: String,
217
218 /// List of content blocks (text, thinking, tool_use).
219 #[serde(default, deserialize_with = "null_as_empty_vec")]
220 pub content: Vec<ContentBlock>,
221
222 /// Token counts and cost.
223 pub usage: Option<ChatUsage>,
224
225 /// Why generation stopped ("end_turn", "tool_use", "max_tokens").
226 #[serde(default)]
227 pub stop_reason: String,
228
229 /// Citations from web search (when search is enabled via provider_options).
230 #[serde(default, deserialize_with = "null_as_empty_vec")]
231 pub citations: Vec<Citation>,
232
233 /// Total cost from the X-QAI-Cost-Ticks header.
234 #[serde(skip)]
235 pub cost_ticks: i64,
236
237 /// From the X-QAI-Request-Id header.
238 #[serde(skip)]
239 pub request_id: String,
240}
241
242impl ChatResponse {
243 /// Returns the concatenated text content, ignoring thinking and tool_use blocks.
244 pub fn text(&self) -> String {
245 self.content
246 .iter()
247 .filter(|b| b.block_type == "text")
248 .filter_map(|b| b.text.as_deref())
249 .collect::<Vec<_>>()
250 .join("")
251 }
252
253 /// Returns the concatenated thinking content.
254 pub fn thinking(&self) -> String {
255 self.content
256 .iter()
257 .filter(|b| b.block_type == "thinking")
258 .filter_map(|b| b.text.as_deref())
259 .collect::<Vec<_>>()
260 .join("")
261 }
262
263 /// Returns all tool_use blocks from the response.
264 pub fn tool_calls(&self) -> Vec<&ContentBlock> {
265 self.content
266 .iter()
267 .filter(|b| b.block_type == "tool_use")
268 .collect()
269 }
270}
271
272/// A source reference from web search grounding.
273#[derive(Debug, Clone, Deserialize, Serialize)]
274pub struct Citation {
275 /// Title of the cited source.
276 #[serde(default)]
277 pub title: String,
278
279 /// URL of the cited source.
280 #[serde(default)]
281 pub url: String,
282
283 /// Relevant text snippet from the source.
284 #[serde(default)]
285 pub text: String,
286
287 /// Position in the response.
288 #[serde(default)]
289 pub index: i32,
290}
291
292/// Token counts and cost for a chat response.
293#[derive(Debug, Clone, Deserialize)]
294pub struct ChatUsage {
295 pub input_tokens: i32,
296 pub output_tokens: i32,
297 pub cost_ticks: i64,
298
299 /// Input tokens served from the provider's prompt cache, billed
300 /// at the (lower) cached rate. Omitted on responses with no
301 /// cache hit. Backend wire shape: `cached_tokens` (i32);
302 /// promoted to Option<i64> here for headroom on future
303 /// long-cache scenarios (multi-hour video transcripts, etc.).
304 /// Multi-turn billing audits reconcile by computing
305 /// (non-cached) input_tokens vs (cached) cached_tokens — both
306 /// roll into the provider's billable total.
307 #[serde(default)]
308 pub cached_tokens: Option<i64>,
309
310 /// Sub-component of `output_tokens` that was model reasoning /
311 /// thinking output (Gemini 3.x's `thoughtTokens`, OpenAI o-
312 /// series' reasoning tokens, Anthropic's extended-thinking
313 /// blocks). Omitted on responses from non-reasoning models.
314 /// Total billable output = `output_tokens` (which already
315 /// includes the reasoning component — this field is just
316 /// transparency on how much of that was thinking).
317 #[serde(default)]
318 pub reasoning_tokens: Option<i64>,
319}
320
321/// Response shape from `POST /qai/v1/chat/estimate`. Returned by
322/// `Client::estimate_chat`.
323///
324/// `estimated_cost_ticks` is the upfront reservation the actual
325/// `chat` call would book — it's a worst-case ceiling, not a
326/// prediction of the final settle. For text-only payloads, expected
327/// settle is close to this number; for video / multimodal inputs
328/// the ceiling can over-estimate (Gemini's reasoning budget +
329/// `max_tokens` cap drive the output side) and the post-call
330/// settle refunds the difference. Either way, this is the number
331/// the user must have available to send the request.
332///
333/// `estimated_cost_usd` is the same value pre-divided by
334/// `TicksPerUSD` for convenience — no need to know the per-tick
335/// rate on the client.
336#[derive(Debug, Clone, Deserialize)]
337pub struct EstimateResponse {
338 pub estimated_cost_ticks: i64,
339 pub estimated_cost_usd: f64,
340 /// Echo of the model name the estimate was computed against —
341 /// handy when the caller wants to display "≈ X credits on
342 /// gemini-flash-latest" without re-reading the request.
343 #[serde(default)]
344 pub model: String,
345}
346
347/// A single event from an SSE chat stream.
348///
349/// Tool-use streaming uses a triplet of events since v0.7:
350/// `tool_use_start` carries `tool_use_start`, `tool_use_input_delta`
351/// carries `tool_use_input_delta`, and `tool_use_complete` carries
352/// `tool_use_complete`. The legacy atomic `tool_use` event is still
353/// emitted by backends that haven't shipped the triplet yet — for new
354/// code, prefer the triplet fields.
355#[derive(Debug, Clone)]
356pub struct StreamEvent {
357 /// Event type: "content_delta", "thinking_delta",
358 /// "tool_use_start", "tool_use_input_delta", "tool_use_complete",
359 /// "tool_use" (legacy), "usage", "heartbeat", "error", "done".
360 pub event_type: String,
361
362 /// Incremental text for content_delta and thinking_delta events.
363 pub delta: Option<StreamDelta>,
364
365 /// Populated for legacy atomic tool_use events.
366 pub tool_use: Option<StreamToolUse>,
367
368 /// Populated for tool_use_start events.
369 pub tool_use_start: Option<StreamToolUseStart>,
370
371 /// Populated for tool_use_input_delta events.
372 pub tool_use_input_delta: Option<StreamToolUseInputDelta>,
373
374 /// Populated for tool_use_complete events.
375 pub tool_use_complete: Option<StreamToolUseComplete>,
376
377 /// Populated for usage events.
378 pub usage: Option<ChatUsage>,
379
380 /// Populated for error events.
381 pub error: Option<String>,
382
383 /// True when the stream is complete.
384 pub done: bool,
385}
386
387/// Incremental text in a streaming event.
388#[derive(Debug, Clone, Deserialize)]
389pub struct StreamDelta {
390 pub text: String,
391}
392
393/// A tool call from a legacy (atomic) streaming event.
394#[derive(Debug, Clone, Deserialize)]
395pub struct StreamToolUse {
396 pub id: String,
397 pub name: String,
398 pub input: HashMap<String, serde_json::Value>,
399}
400
401/// Tool-call start event — fires once before any input deltas.
402#[derive(Debug, Clone, Deserialize)]
403pub struct StreamToolUseStart {
404 pub id: String,
405 pub name: String,
406}
407
408/// Tool-call input delta — fires zero or more times with raw JSON fragments.
409#[derive(Debug, Clone, Deserialize)]
410pub struct StreamToolUseInputDelta {
411 pub id: String,
412 /// Raw JSON fragment. May not parse on its own; accumulate until
413 /// the corresponding `tool_use_complete` event arrives with the
414 /// authoritative `input`.
415 pub partial_json: String,
416}
417
418/// Tool-call completion event — fires exactly once per call with the
419/// server-accumulated, fully-parsed arguments.
420#[derive(Debug, Clone, Deserialize)]
421pub struct StreamToolUseComplete {
422 pub id: String,
423 pub name: String,
424 pub input: HashMap<String, serde_json::Value>,
425}
426
427/// Raw JSON from the SSE stream before parsing into typed fields.
428#[derive(Deserialize)]
429struct RawStreamEvent {
430 #[serde(rename = "type")]
431 event_type: String,
432 #[serde(default)]
433 delta: Option<StreamDelta>,
434 #[serde(default)]
435 id: Option<String>,
436 #[serde(default)]
437 name: Option<String>,
438 #[serde(default)]
439 input: Option<HashMap<String, serde_json::Value>>,
440 /// Carried by `tool_use_input_delta` events — a raw JSON fragment.
441 #[serde(default)]
442 partial_json: Option<String>,
443 #[serde(default)]
444 input_tokens: Option<i32>,
445 #[serde(default)]
446 output_tokens: Option<i32>,
447 #[serde(default)]
448 cost_ticks: Option<i64>,
449 #[serde(default)]
450 message: Option<String>,
451}
452
453pin_project! {
454 /// An async stream of [`StreamEvent`]s from an SSE chat response.
455 pub struct ChatStream {
456 #[pin]
457 inner: Pin<Box<dyn Stream<Item = StreamEvent> + Send>>,
458 }
459}
460
461impl Stream for ChatStream {
462 type Item = StreamEvent;
463
464 fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
465 self.project().inner.poll_next(cx)
466 }
467}
468
469impl Client {
470 /// Sends a non-streaming text generation request.
471 pub async fn chat(&self, req: &ChatRequest) -> Result<ChatResponse> {
472 let mut req = req.clone();
473 req.stream = Some(false);
474
475 let (mut resp, meta) = self.post_json::<ChatRequest, ChatResponse>("/qai/v1/chat", &req).await?;
476 resp.cost_ticks = meta.cost_ticks;
477 resp.request_id = meta.request_id;
478 if resp.model.is_empty() {
479 resp.model = meta.model;
480 }
481 Ok(resp)
482 }
483
484 /// Estimates the upfront credit reservation a `chat` call with
485 /// the same `ChatRequest` would book — WITHOUT making the
486 /// provider call or deducting credits. Use this to render a
487 /// "this will cost ≈ X credits" hint in the UI before the user
488 /// commits to a payload (notably long YouTube videos attached
489 /// via `ContentBlock.file_uri`, which bill at ~263 tokens per
490 /// second and can dwarf a text-only chat by 1000×).
491 ///
492 /// Wraps `POST /qai/v1/chat/estimate`. Same auth as `chat()`.
493 ///
494 /// # Example
495 ///
496 /// ```no_run
497 /// # async fn example() -> quantum_sdk::Result<()> {
498 /// let client = quantum_sdk::Client::new("qai_...");
499 /// let req = quantum_sdk::ChatRequest {
500 /// model: "gemini-flash-latest".into(),
501 /// messages: vec![quantum_sdk::ChatMessage::user("hi")],
502 /// ..Default::default()
503 /// };
504 /// let est = client.estimate_chat(&req).await?;
505 /// println!("would cost ~{} ticks (~${})", est.estimated_cost_ticks, est.estimated_cost_usd);
506 /// # Ok(())
507 /// # }
508 /// ```
509 pub async fn estimate_chat(&self, req: &ChatRequest) -> Result<EstimateResponse> {
510 // Drop `stream` from the estimate payload — the backend
511 // doesn't care about streaming for cost purposes (output
512 // ceiling is the same either way) and including it would
513 // make the SDK shape diverge needlessly from the JSON the
514 // server sees on the wire.
515 let mut req = req.clone();
516 req.stream = None;
517 let (resp, _meta) = self
518 .post_json::<ChatRequest, EstimateResponse>("/qai/v1/chat/estimate", &req)
519 .await?;
520 Ok(resp)
521 }
522
523 /// Sends a streaming text generation request and returns an async stream of events.
524 ///
525 /// # Example
526 ///
527 /// ```no_run
528 /// use futures_util::StreamExt;
529 ///
530 /// # async fn example() -> quantum_sdk::Result<()> {
531 /// let client = quantum_sdk::Client::new("key");
532 /// let req = quantum_sdk::ChatRequest {
533 /// model: "claude-sonnet-4-6".into(),
534 /// messages: vec![quantum_sdk::ChatMessage::user("Hello!")],
535 /// ..Default::default()
536 /// };
537 /// let mut stream = client.chat_stream(&req).await?;
538 /// while let Some(ev) = stream.next().await {
539 /// if let Some(delta) = &ev.delta {
540 /// print!("{}", delta.text);
541 /// }
542 /// }
543 /// # Ok(())
544 /// # }
545 /// ```
546 pub async fn chat_stream(&self, req: &ChatRequest) -> Result<ChatStream> {
547 let mut req = req.clone();
548 req.stream = Some(true);
549
550 let (resp, _meta) = self.post_stream_raw("/qai/v1/chat", &req).await?;
551
552 let byte_stream = resp.bytes_stream();
553 let event_stream = sse_to_events(byte_stream);
554
555 Ok(ChatStream {
556 inner: Box::pin(event_stream),
557 })
558 }
559}
560
561/// Converts a byte stream into a stream of parsed [`StreamEvent`]s.
562fn sse_to_events<S>(byte_stream: S) -> impl Stream<Item = StreamEvent> + Send
563where
564 S: Stream<Item = std::result::Result<bytes::Bytes, reqwest::Error>> + Send + 'static,
565{
566 // Pin the byte stream so we can poll it inside unfold.
567 let pinned_stream = Box::pin(byte_stream);
568
569 // Accumulate raw bytes into lines to avoid splitting multi-byte UTF-8 characters.
570 // Only convert to String when we have a complete newline-terminated line.
571 let line_stream = futures_util::stream::unfold(
572 (pinned_stream, Vec::<u8>::new()),
573 |(mut stream, mut buffer)| async move {
574 use futures_util::StreamExt;
575 loop {
576 // Check if we have a complete line in the buffer.
577 if let Some(newline_pos) = buffer.iter().position(|&b| b == b'\n') {
578 let mut line_bytes = buffer[..newline_pos].to_vec();
579 buffer = buffer[newline_pos + 1..].to_vec();
580 // Trim trailing \r
581 if line_bytes.last() == Some(&b'\r') {
582 line_bytes.pop();
583 }
584 let line = String::from_utf8_lossy(&line_bytes).into_owned();
585 return Some((line, (stream, buffer)));
586 }
587
588 // Read more data.
589 match stream.next().await {
590 Some(Ok(chunk)) => {
591 buffer.extend_from_slice(&chunk);
592 }
593 Some(Err(_)) | None => {
594 // Stream ended. Emit remaining buffer if non-empty.
595 if !buffer.is_empty() {
596 let remaining = String::from_utf8_lossy(&buffer).into_owned();
597 buffer.clear();
598 return Some((remaining, (stream, buffer)));
599 }
600 return None;
601 }
602 }
603 }
604 },
605 );
606
607 let pinned_lines = Box::pin(line_stream);
608 futures_util::stream::unfold(pinned_lines, |mut lines| async move {
609 use futures_util::StreamExt;
610 loop {
611 let line = lines.next().await?;
612
613 if !line.starts_with("data: ") {
614 continue;
615 }
616 let payload = &line["data: ".len()..];
617
618 if payload == "[DONE]" {
619 let ev = StreamEvent {
620 event_type: "done".to_string(),
621 delta: None,
622 tool_use: None,
623 tool_use_start: None,
624 tool_use_input_delta: None,
625 tool_use_complete: None,
626 usage: None,
627 error: None,
628 done: true,
629 };
630 return Some((ev, lines));
631 }
632
633 let raw: RawStreamEvent = match serde_json::from_str(payload) {
634 Ok(r) => r,
635 Err(e) => {
636 let ev = StreamEvent {
637 event_type: "error".to_string(),
638 delta: None,
639 tool_use: None,
640 tool_use_start: None,
641 tool_use_input_delta: None,
642 tool_use_complete: None,
643 usage: None,
644 error: Some(format!("parse SSE: {e}")),
645 done: false,
646 };
647 return Some((ev, lines));
648 }
649 };
650
651 let mut ev = StreamEvent {
652 event_type: raw.event_type.clone(),
653 delta: None,
654 tool_use: None,
655 tool_use_start: None,
656 tool_use_input_delta: None,
657 tool_use_complete: None,
658 usage: None,
659 error: None,
660 done: false,
661 };
662
663 match raw.event_type.as_str() {
664 "content_delta" | "thinking_delta" => {
665 ev.delta = raw.delta;
666 }
667 "tool_use" => {
668 // Legacy atomic event — kept for back-compat with
669 // backends that haven't shipped the triplet (v0.7+).
670 ev.tool_use = Some(StreamToolUse {
671 id: raw.id.unwrap_or_default(),
672 name: raw.name.unwrap_or_default(),
673 input: raw.input.unwrap_or_default(),
674 });
675 }
676 "tool_use_start" => {
677 ev.tool_use_start = Some(StreamToolUseStart {
678 id: raw.id.unwrap_or_default(),
679 name: raw.name.unwrap_or_default(),
680 });
681 }
682 "tool_use_input_delta" => {
683 ev.tool_use_input_delta = Some(StreamToolUseInputDelta {
684 id: raw.id.unwrap_or_default(),
685 partial_json: raw.partial_json.unwrap_or_default(),
686 });
687 }
688 "tool_use_complete" => {
689 ev.tool_use_complete = Some(StreamToolUseComplete {
690 id: raw.id.unwrap_or_default(),
691 name: raw.name.unwrap_or_default(),
692 input: raw.input.unwrap_or_default(),
693 });
694 }
695 "usage" => {
696 ev.usage = Some(ChatUsage {
697 input_tokens: raw.input_tokens.unwrap_or(0),
698 output_tokens: raw.output_tokens.unwrap_or(0),
699 cost_ticks: raw.cost_ticks.unwrap_or(0),
700 // Stream "usage" events carry only the
701 // running totals — cached/reasoning splits
702 // arrive on the final non-stream envelope.
703 // None here is faithful, not a bug.
704 cached_tokens: None,
705 reasoning_tokens: None,
706 });
707 }
708 "error" => {
709 ev.error = raw.message;
710 }
711 "heartbeat" => {}
712 _ => {}
713 }
714
715 return Some((ev, lines));
716 }
717 })
718}