Skip to main content

quantum_sdk/
chat.rs

1use std::collections::HashMap;
2use std::pin::Pin;
3use std::task::{Context, Poll};
4
5use futures_util::Stream;
6use pin_project_lite::pin_project;
7use serde::{Deserialize, Serialize};
8
9use crate::client::Client;
10use crate::error::Result;
11
12/// Deserialize null as empty Vec (Gemini sometimes returns null for array fields).
13fn null_as_empty_vec<'de, D, T>(deserializer: D) -> std::result::Result<Vec<T>, D::Error>
14where
15    D: serde::Deserializer<'de>,
16    T: Deserialize<'de>,
17{
18    Option::<Vec<T>>::deserialize(deserializer).map(|v| v.unwrap_or_default())
19}
20
21/// Deserialize null as None for Option<Vec<T>> fields.
22fn deserialize_opt_vec<'de, D, T>(deserializer: D) -> std::result::Result<Option<Vec<T>>, D::Error>
23where
24    D: serde::Deserializer<'de>,
25    T: Deserialize<'de>,
26{
27    // null → None, [] → Some([]), [...] → Some([...])
28    Ok(Option::<Vec<T>>::deserialize(deserializer).unwrap_or(None))
29}
30
31/// Request body for text generation.
32#[derive(Debug, Clone, Serialize, Default)]
33pub struct ChatRequest {
34    /// Model ID that determines provider routing (e.g. "claude-sonnet-4-6", "grok-4-1-fast-non-reasoning").
35    pub model: String,
36
37    /// Conversation history.
38    pub messages: Vec<ChatMessage>,
39
40    /// Functions the model can call.
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub tools: Option<Vec<ChatTool>>,
43
44    /// Constrains tool use: "auto" (default), "any" (force tool use), "none", or a specific tool name.
45    #[serde(skip_serializing_if = "Option::is_none")]
46    pub tool_choice: Option<String>,
47
48    /// JSON Schema for structured output constraints.
49    #[serde(skip_serializing_if = "Option::is_none")]
50    pub output_schema: Option<serde_json::Value>,
51
52    /// Enables server-sent event streaming. Set automatically by `chat_stream`.
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub stream: Option<bool>,
55
56    /// Controls randomness (0.0-2.0).
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub temperature: Option<f64>,
59
60    /// Limits the response length.
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub max_tokens: Option<i32>,
63
64    /// Provider-specific settings (e.g. Anthropic thinking, xAI search).
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub provider_options: Option<HashMap<String, serde_json::Value>>,
67}
68
69/// A single message in a conversation.
70#[derive(Debug, Clone, Serialize, Deserialize, Default)]
71pub struct ChatMessage {
72    /// One of "system", "user", "assistant", or "tool".
73    pub role: String,
74
75    /// Text content of the message.
76    #[serde(skip_serializing_if = "Option::is_none")]
77    pub content: Option<String>,
78
79    /// Structured content for assistant messages with tool calls.
80    /// When present, takes precedence over `content`.
81    #[serde(skip_serializing_if = "Option::is_none", deserialize_with = "deserialize_opt_vec", default)]
82    pub content_blocks: Option<Vec<ContentBlock>>,
83
84    /// Required when role is "tool" — references the tool_use ID.
85    #[serde(skip_serializing_if = "Option::is_none")]
86    pub tool_call_id: Option<String>,
87
88    /// Whether a tool result is an error.
89    #[serde(skip_serializing_if = "Option::is_none")]
90    pub is_error: Option<bool>,
91}
92
93impl ChatMessage {
94    /// Creates a user message.
95    pub fn user(content: impl Into<String>) -> Self {
96        Self {
97            role: "user".to_string(),
98            content: Some(content.into()),
99            ..Default::default()
100        }
101    }
102
103    /// Creates an assistant message.
104    pub fn assistant(content: impl Into<String>) -> Self {
105        Self {
106            role: "assistant".to_string(),
107            content: Some(content.into()),
108            ..Default::default()
109        }
110    }
111
112    /// Creates a system message.
113    pub fn system(content: impl Into<String>) -> Self {
114        Self {
115            role: "system".to_string(),
116            content: Some(content.into()),
117            ..Default::default()
118        }
119    }
120
121    /// Creates a tool result message.
122    pub fn tool_result(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
123        Self {
124            role: "tool".to_string(),
125            content: Some(content.into()),
126            tool_call_id: Some(tool_call_id.into()),
127            ..Default::default()
128        }
129    }
130
131    /// Creates a tool error result message.
132    pub fn tool_error(tool_call_id: impl Into<String>, content: impl Into<String>) -> Self {
133        Self {
134            role: "tool".to_string(),
135            content: Some(content.into()),
136            tool_call_id: Some(tool_call_id.into()),
137            is_error: Some(true),
138            ..Default::default()
139        }
140    }
141}
142
143/// A single block in the response content array.
144#[derive(Debug, Clone, Serialize, Deserialize, Default)]
145pub struct ContentBlock {
146    /// One of "text", "thinking", or "tool_use".
147    #[serde(rename = "type")]
148    pub block_type: String,
149
150    /// Content for "text" and "thinking" blocks.
151    #[serde(skip_serializing_if = "Option::is_none")]
152    pub text: Option<String>,
153
154    /// Tool call identifier for "tool_use" blocks.
155    #[serde(skip_serializing_if = "Option::is_none")]
156    pub id: Option<String>,
157
158    /// Function name for "tool_use" blocks.
159    #[serde(skip_serializing_if = "Option::is_none")]
160    pub name: Option<String>,
161
162    /// Function arguments for "tool_use" blocks.
163    #[serde(skip_serializing_if = "Option::is_none")]
164    pub input: Option<HashMap<String, serde_json::Value>>,
165
166    /// Gemini thought signature — must be echoed back with tool results.
167    #[serde(skip_serializing_if = "Option::is_none")]
168    pub thought_signature: Option<String>,
169
170    /// Base64-encoded data for file/image content blocks.
171    #[serde(skip_serializing_if = "Option::is_none")]
172    pub data: Option<String>,
173
174    /// Filename for file content blocks.
175    #[serde(skip_serializing_if = "Option::is_none")]
176    pub file_name: Option<String>,
177
178    /// MIME type for file/image content blocks.
179    #[serde(skip_serializing_if = "Option::is_none")]
180    pub mime_type: Option<String>,
181}
182
183/// Defines a function the model can call.
184#[derive(Debug, Clone, Serialize, Default)]
185pub struct ChatTool {
186    /// Function name.
187    pub name: String,
188
189    /// Explains what the function does.
190    pub description: String,
191
192    /// JSON Schema for the function's arguments.
193    #[serde(skip_serializing_if = "Option::is_none")]
194    pub parameters: Option<serde_json::Value>,
195
196    /// Enable guaranteed schema validation on tool inputs (Anthropic, OpenAI).
197    #[serde(skip_serializing_if = "Option::is_none")]
198    pub strict: Option<bool>,
199}
200
201/// Response from a non-streaming chat request.
202#[derive(Debug, Clone, Deserialize)]
203pub struct ChatResponse {
204    /// Unique request identifier.
205    pub id: String,
206
207    /// Model that generated the response.
208    pub model: String,
209
210    /// List of content blocks (text, thinking, tool_use).
211    #[serde(default, deserialize_with = "null_as_empty_vec")]
212    pub content: Vec<ContentBlock>,
213
214    /// Token counts and cost.
215    pub usage: Option<ChatUsage>,
216
217    /// Why generation stopped ("end_turn", "tool_use", "max_tokens").
218    #[serde(default)]
219    pub stop_reason: String,
220
221    /// Citations from web search (when search is enabled via provider_options).
222    #[serde(default, deserialize_with = "null_as_empty_vec")]
223    pub citations: Vec<Citation>,
224
225    /// Total cost from the X-QAI-Cost-Ticks header.
226    #[serde(skip)]
227    pub cost_ticks: i64,
228
229    /// From the X-QAI-Request-Id header.
230    #[serde(skip)]
231    pub request_id: String,
232}
233
234impl ChatResponse {
235    /// Returns the concatenated text content, ignoring thinking and tool_use blocks.
236    pub fn text(&self) -> String {
237        self.content
238            .iter()
239            .filter(|b| b.block_type == "text")
240            .filter_map(|b| b.text.as_deref())
241            .collect::<Vec<_>>()
242            .join("")
243    }
244
245    /// Returns the concatenated thinking content.
246    pub fn thinking(&self) -> String {
247        self.content
248            .iter()
249            .filter(|b| b.block_type == "thinking")
250            .filter_map(|b| b.text.as_deref())
251            .collect::<Vec<_>>()
252            .join("")
253    }
254
255    /// Returns all tool_use blocks from the response.
256    pub fn tool_calls(&self) -> Vec<&ContentBlock> {
257        self.content
258            .iter()
259            .filter(|b| b.block_type == "tool_use")
260            .collect()
261    }
262}
263
264/// A source reference from web search grounding.
265#[derive(Debug, Clone, Deserialize, Serialize)]
266pub struct Citation {
267    /// Title of the cited source.
268    #[serde(default)]
269    pub title: String,
270
271    /// URL of the cited source.
272    #[serde(default)]
273    pub url: String,
274
275    /// Relevant text snippet from the source.
276    #[serde(default)]
277    pub text: String,
278
279    /// Position in the response.
280    #[serde(default)]
281    pub index: i32,
282}
283
284/// Token counts and cost for a chat response.
285#[derive(Debug, Clone, Deserialize)]
286pub struct ChatUsage {
287    pub input_tokens: i32,
288    pub output_tokens: i32,
289    pub cost_ticks: i64,
290}
291
292/// A single event from an SSE chat stream.
293#[derive(Debug, Clone)]
294pub struct StreamEvent {
295    /// Event type: "content_delta", "thinking_delta", "tool_use", "usage", "heartbeat", "error", "done".
296    pub event_type: String,
297
298    /// Incremental text for content_delta and thinking_delta events.
299    pub delta: Option<StreamDelta>,
300
301    /// Populated for tool_use events.
302    pub tool_use: Option<StreamToolUse>,
303
304    /// Populated for usage events.
305    pub usage: Option<ChatUsage>,
306
307    /// Populated for error events.
308    pub error: Option<String>,
309
310    /// True when the stream is complete.
311    pub done: bool,
312}
313
314/// Incremental text in a streaming event.
315#[derive(Debug, Clone, Deserialize)]
316pub struct StreamDelta {
317    pub text: String,
318}
319
320/// A tool call from a streaming event.
321#[derive(Debug, Clone, Deserialize)]
322pub struct StreamToolUse {
323    pub id: String,
324    pub name: String,
325    pub input: HashMap<String, serde_json::Value>,
326}
327
328/// Raw JSON from the SSE stream before parsing into typed fields.
329#[derive(Deserialize)]
330struct RawStreamEvent {
331    #[serde(rename = "type")]
332    event_type: String,
333    #[serde(default)]
334    delta: Option<StreamDelta>,
335    #[serde(default)]
336    id: Option<String>,
337    #[serde(default)]
338    name: Option<String>,
339    #[serde(default)]
340    input: Option<HashMap<String, serde_json::Value>>,
341    #[serde(default)]
342    input_tokens: Option<i32>,
343    #[serde(default)]
344    output_tokens: Option<i32>,
345    #[serde(default)]
346    cost_ticks: Option<i64>,
347    #[serde(default)]
348    message: Option<String>,
349}
350
351pin_project! {
352    /// An async stream of [`StreamEvent`]s from an SSE chat response.
353    pub struct ChatStream {
354        #[pin]
355        inner: Pin<Box<dyn Stream<Item = StreamEvent> + Send>>,
356    }
357}
358
359impl Stream for ChatStream {
360    type Item = StreamEvent;
361
362    fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
363        self.project().inner.poll_next(cx)
364    }
365}
366
367impl Client {
368    /// Sends a non-streaming text generation request.
369    pub async fn chat(&self, req: &ChatRequest) -> Result<ChatResponse> {
370        let mut req = req.clone();
371        req.stream = Some(false);
372
373        let (mut resp, meta) = self.post_json::<ChatRequest, ChatResponse>("/qai/v1/chat", &req).await?;
374        resp.cost_ticks = meta.cost_ticks;
375        resp.request_id = meta.request_id;
376        if resp.model.is_empty() {
377            resp.model = meta.model;
378        }
379        Ok(resp)
380    }
381
382    /// Sends a streaming text generation request and returns an async stream of events.
383    ///
384    /// # Example
385    ///
386    /// ```no_run
387    /// use futures_util::StreamExt;
388    ///
389    /// # async fn example() -> quantum_sdk::Result<()> {
390    /// let client = quantum_sdk::Client::new("key");
391    /// let req = quantum_sdk::ChatRequest {
392    ///     model: "claude-sonnet-4-6".into(),
393    ///     messages: vec![quantum_sdk::ChatMessage::user("Hello!")],
394    ///     ..Default::default()
395    /// };
396    /// let mut stream = client.chat_stream(&req).await?;
397    /// while let Some(ev) = stream.next().await {
398    ///     if let Some(delta) = &ev.delta {
399    ///         print!("{}", delta.text);
400    ///     }
401    /// }
402    /// # Ok(())
403    /// # }
404    /// ```
405    pub async fn chat_stream(&self, req: &ChatRequest) -> Result<ChatStream> {
406        let mut req = req.clone();
407        req.stream = Some(true);
408
409        let (resp, _meta) = self.post_stream_raw("/qai/v1/chat", &req).await?;
410
411        let byte_stream = resp.bytes_stream();
412        let event_stream = sse_to_events(byte_stream);
413
414        Ok(ChatStream {
415            inner: Box::pin(event_stream),
416        })
417    }
418}
419
420/// Converts a byte stream into a stream of parsed [`StreamEvent`]s.
421fn sse_to_events<S>(byte_stream: S) -> impl Stream<Item = StreamEvent> + Send
422where
423    S: Stream<Item = std::result::Result<bytes::Bytes, reqwest::Error>> + Send + 'static,
424{
425    // Pin the byte stream so we can poll it inside unfold.
426    let pinned_stream = Box::pin(byte_stream);
427
428    // Accumulate raw bytes into lines to avoid splitting multi-byte UTF-8 characters.
429    // Only convert to String when we have a complete newline-terminated line.
430    let line_stream = futures_util::stream::unfold(
431        (pinned_stream, Vec::<u8>::new()),
432        |(mut stream, mut buffer)| async move {
433            use futures_util::StreamExt;
434            loop {
435                // Check if we have a complete line in the buffer.
436                if let Some(newline_pos) = buffer.iter().position(|&b| b == b'\n') {
437                    let mut line_bytes = buffer[..newline_pos].to_vec();
438                    buffer = buffer[newline_pos + 1..].to_vec();
439                    // Trim trailing \r
440                    if line_bytes.last() == Some(&b'\r') {
441                        line_bytes.pop();
442                    }
443                    let line = String::from_utf8_lossy(&line_bytes).into_owned();
444                    return Some((line, (stream, buffer)));
445                }
446
447                // Read more data.
448                match stream.next().await {
449                    Some(Ok(chunk)) => {
450                        buffer.extend_from_slice(&chunk);
451                    }
452                    Some(Err(_)) | None => {
453                        // Stream ended. Emit remaining buffer if non-empty.
454                        if !buffer.is_empty() {
455                            let remaining = String::from_utf8_lossy(&buffer).into_owned();
456                            buffer.clear();
457                            return Some((remaining, (stream, buffer)));
458                        }
459                        return None;
460                    }
461                }
462            }
463        },
464    );
465
466    let pinned_lines = Box::pin(line_stream);
467    futures_util::stream::unfold(pinned_lines, |mut lines| async move {
468        use futures_util::StreamExt;
469        loop {
470            let line = lines.next().await?;
471
472            if !line.starts_with("data: ") {
473                continue;
474            }
475            let payload = &line["data: ".len()..];
476
477            if payload == "[DONE]" {
478                let ev = StreamEvent {
479                    event_type: "done".to_string(),
480                    delta: None,
481                    tool_use: None,
482                    usage: None,
483                    error: None,
484                    done: true,
485                };
486                return Some((ev, lines));
487            }
488
489            let raw: RawStreamEvent = match serde_json::from_str(payload) {
490                Ok(r) => r,
491                Err(e) => {
492                    let ev = StreamEvent {
493                        event_type: "error".to_string(),
494                        delta: None,
495                        tool_use: None,
496                        usage: None,
497                        error: Some(format!("parse SSE: {e}")),
498                        done: false,
499                    };
500                    return Some((ev, lines));
501                }
502            };
503
504            let mut ev = StreamEvent {
505                event_type: raw.event_type.clone(),
506                delta: None,
507                tool_use: None,
508                usage: None,
509                error: None,
510                done: false,
511            };
512
513            match raw.event_type.as_str() {
514                "content_delta" | "thinking_delta" => {
515                    ev.delta = raw.delta;
516                }
517                "tool_use" => {
518                    ev.tool_use = Some(StreamToolUse {
519                        id: raw.id.unwrap_or_default(),
520                        name: raw.name.unwrap_or_default(),
521                        input: raw.input.unwrap_or_default(),
522                    });
523                }
524                "usage" => {
525                    ev.usage = Some(ChatUsage {
526                        input_tokens: raw.input_tokens.unwrap_or(0),
527                        output_tokens: raw.output_tokens.unwrap_or(0),
528                        cost_ticks: raw.cost_ticks.unwrap_or(0),
529                    });
530                }
531                "error" => {
532                    ev.error = raw.message;
533                }
534                "heartbeat" => {}
535                _ => {}
536            }
537
538            return Some((ev, lines));
539        }
540    })
541}