Skip to main content

quantum_sdk/
documents.rs

1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::client::Client;
6use crate::error::Result;
7
8/// Request body for document extraction.
9#[derive(Debug, Clone, Serialize, Default)]
10pub struct DocumentRequest {
11    /// Base64-encoded file content.
12    pub file_base64: String,
13
14    /// Original filename (helps determine the file type).
15    pub filename: String,
16
17    /// Desired output format (e.g. "markdown", "text").
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub output_format: Option<String>,
20}
21
22/// Response from document extraction.
23#[derive(Debug, Clone, Deserialize)]
24pub struct DocumentResponse {
25    /// Extracted text content.
26    pub content: String,
27
28    /// Format of the extracted content (e.g. "markdown").
29    pub format: String,
30
31    /// Provider-specific metadata about the document.
32    #[serde(default)]
33    pub meta: Option<HashMap<String, serde_json::Value>>,
34
35    /// Total cost in ticks.
36    #[serde(default)]
37    pub cost_ticks: i64,
38
39    /// Unique request identifier.
40    #[serde(default)]
41    pub request_id: String,
42}
43
44/// Request body for document chunking.
45#[derive(Debug, Clone, Serialize, Default)]
46pub struct ChunkDocumentRequest {
47    /// Base64-encoded file content.
48    pub file_base64: String,
49
50    /// Original filename.
51    pub filename: String,
52
53    /// Maximum chunk size in tokens.
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub max_chunk_tokens: Option<i32>,
56
57    /// Overlap between chunks in tokens.
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub overlap_tokens: Option<i32>,
60}
61
62/// A single document chunk.
63#[derive(Debug, Clone, Deserialize)]
64pub struct DocumentChunk {
65    /// Chunk index.
66    pub index: i32,
67
68    /// Chunk text content.
69    pub text: String,
70
71    /// Estimated token count.
72    #[serde(default)]
73    pub token_count: Option<i32>,
74}
75
76/// Backwards-compatible alias.
77pub type ChunkRequest = ChunkDocumentRequest;
78
79/// Response from document chunking.
80#[derive(Debug, Clone, Deserialize)]
81pub struct ChunkDocumentResponse {
82    /// Document chunks.
83    pub chunks: Vec<DocumentChunk>,
84
85    /// Total number of chunks.
86    #[serde(default)]
87    pub total_chunks: Option<i32>,
88
89    /// Total cost in ticks.
90    #[serde(default)]
91    pub cost_ticks: i64,
92
93    /// Unique request identifier.
94    #[serde(default)]
95    pub request_id: String,
96}
97
98/// Backwards-compatible alias.
99pub type ChunkResponse = ChunkDocumentResponse;
100
101/// Request body for document processing (combined extraction + analysis).
102#[derive(Debug, Clone, Serialize, Default)]
103pub struct ProcessDocumentRequest {
104    /// Base64-encoded file content.
105    pub file_base64: String,
106
107    /// Original filename.
108    pub filename: String,
109
110    /// Processing instructions or prompt.
111    #[serde(skip_serializing_if = "Option::is_none")]
112    pub prompt: Option<String>,
113
114    /// Model to use for processing.
115    #[serde(skip_serializing_if = "Option::is_none")]
116    pub model: Option<String>,
117}
118
119/// Backwards-compatible alias.
120pub type ProcessRequest = ProcessDocumentRequest;
121
122/// Response from document processing.
123#[derive(Debug, Clone, Deserialize)]
124pub struct ProcessDocumentResponse {
125    /// Processed content / analysis result.
126    pub content: String,
127
128    /// Model used for processing.
129    #[serde(default)]
130    pub model: Option<String>,
131
132    /// Total cost in ticks.
133    #[serde(default)]
134    pub cost_ticks: i64,
135
136    /// Unique request identifier.
137    #[serde(default)]
138    pub request_id: String,
139}
140
141/// Backwards-compatible alias.
142pub type ProcessResponse = ProcessDocumentResponse;
143
144impl Client {
145    /// Extracts text content from a document (PDF, image, etc.).
146    pub async fn extract_document(&self, req: &DocumentRequest) -> Result<DocumentResponse> {
147        let (mut resp, meta) = self
148            .post_json::<DocumentRequest, DocumentResponse>("/qai/v1/documents/extract", req)
149            .await?;
150        if resp.cost_ticks == 0 {
151            resp.cost_ticks = meta.cost_ticks;
152        }
153        if resp.request_id.is_empty() {
154            resp.request_id = meta.request_id;
155        }
156        Ok(resp)
157    }
158
159    /// Splits a document into chunks suitable for embeddings or RAG.
160    pub async fn chunk_document(&self, req: &ChunkDocumentRequest) -> Result<ChunkDocumentResponse> {
161        let (mut resp, meta) = self
162            .post_json::<ChunkDocumentRequest, ChunkDocumentResponse>("/qai/v1/documents/chunk", req)
163            .await?;
164        if resp.cost_ticks == 0 {
165            resp.cost_ticks = meta.cost_ticks;
166        }
167        if resp.request_id.is_empty() {
168            resp.request_id = meta.request_id;
169        }
170        Ok(resp)
171    }
172
173    /// Processes a document with AI (extraction + analysis in one step).
174    pub async fn process_document(&self, req: &ProcessDocumentRequest) -> Result<ProcessDocumentResponse> {
175        let (mut resp, meta) = self
176            .post_json::<ProcessDocumentRequest, ProcessDocumentResponse>("/qai/v1/documents/process", req)
177            .await?;
178        if resp.cost_ticks == 0 {
179            resp.cost_ticks = meta.cost_ticks;
180        }
181        if resp.request_id.is_empty() {
182            resp.request_id = meta.request_id;
183        }
184        Ok(resp)
185    }
186}