Skip to main content

quantum_sdk/
documents.rs

1use std::collections::HashMap;
2
3use serde::{Deserialize, Serialize};
4
5use crate::client::Client;
6use crate::error::Result;
7
8/// Request body for document extraction.
9#[derive(Debug, Clone, Serialize, Default)]
10pub struct DocumentRequest {
11    /// Base64-encoded file content.
12    pub file_base64: String,
13
14    /// Original filename (helps determine the file type).
15    pub filename: String,
16
17    /// Desired output format (e.g. "markdown", "text").
18    #[serde(skip_serializing_if = "Option::is_none")]
19    pub output_format: Option<String>,
20}
21
22/// Response from document extraction.
23#[derive(Debug, Clone, Deserialize)]
24pub struct DocumentResponse {
25    /// Extracted text content.
26    pub content: String,
27
28    /// Format of the extracted content (e.g. "markdown").
29    pub format: String,
30
31    /// Provider-specific metadata about the document.
32    #[serde(default)]
33    pub meta: Option<HashMap<String, serde_json::Value>>,
34
35    /// Total cost in ticks.
36    #[serde(default)]
37    pub cost_ticks: i64,
38
39    /// Unique request identifier.
40    #[serde(default)]
41    pub request_id: String,
42}
43
44/// Request body for document chunking.
45#[derive(Debug, Clone, Serialize, Default)]
46pub struct ChunkRequest {
47    /// Base64-encoded file content.
48    pub file_base64: String,
49
50    /// Original filename.
51    pub filename: String,
52
53    /// Maximum chunk size in tokens.
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub max_chunk_tokens: Option<i32>,
56
57    /// Overlap between chunks in tokens.
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub overlap_tokens: Option<i32>,
60}
61
62/// A single document chunk.
63#[derive(Debug, Clone, Deserialize)]
64pub struct DocumentChunk {
65    /// Chunk index.
66    pub index: i32,
67
68    /// Chunk text content.
69    pub text: String,
70
71    /// Estimated token count.
72    #[serde(default)]
73    pub token_count: Option<i32>,
74}
75
76/// Response from document chunking.
77#[derive(Debug, Clone, Deserialize)]
78pub struct ChunkResponse {
79    /// Document chunks.
80    pub chunks: Vec<DocumentChunk>,
81
82    /// Total number of chunks.
83    #[serde(default)]
84    pub total_chunks: Option<i32>,
85
86    /// Total cost in ticks.
87    #[serde(default)]
88    pub cost_ticks: i64,
89
90    /// Unique request identifier.
91    #[serde(default)]
92    pub request_id: String,
93}
94
95/// Request body for document processing (combined extraction + analysis).
96#[derive(Debug, Clone, Serialize, Default)]
97pub struct ProcessRequest {
98    /// Base64-encoded file content.
99    pub file_base64: String,
100
101    /// Original filename.
102    pub filename: String,
103
104    /// Processing instructions or prompt.
105    #[serde(skip_serializing_if = "Option::is_none")]
106    pub prompt: Option<String>,
107
108    /// Model to use for processing.
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub model: Option<String>,
111}
112
113/// Response from document processing.
114#[derive(Debug, Clone, Deserialize)]
115pub struct ProcessResponse {
116    /// Processed content / analysis result.
117    pub content: String,
118
119    /// Model used for processing.
120    #[serde(default)]
121    pub model: Option<String>,
122
123    /// Total cost in ticks.
124    #[serde(default)]
125    pub cost_ticks: i64,
126
127    /// Unique request identifier.
128    #[serde(default)]
129    pub request_id: String,
130}
131
132impl Client {
133    /// Extracts text content from a document (PDF, image, etc.).
134    pub async fn extract_document(&self, req: &DocumentRequest) -> Result<DocumentResponse> {
135        let (mut resp, meta) = self
136            .post_json::<DocumentRequest, DocumentResponse>("/qai/v1/documents/extract", req)
137            .await?;
138        if resp.cost_ticks == 0 {
139            resp.cost_ticks = meta.cost_ticks;
140        }
141        if resp.request_id.is_empty() {
142            resp.request_id = meta.request_id;
143        }
144        Ok(resp)
145    }
146
147    /// Splits a document into chunks suitable for embeddings or RAG.
148    pub async fn chunk_document(&self, req: &ChunkRequest) -> Result<ChunkResponse> {
149        let (mut resp, meta) = self
150            .post_json::<ChunkRequest, ChunkResponse>("/qai/v1/documents/chunk", req)
151            .await?;
152        if resp.cost_ticks == 0 {
153            resp.cost_ticks = meta.cost_ticks;
154        }
155        if resp.request_id.is_empty() {
156            resp.request_id = meta.request_id;
157        }
158        Ok(resp)
159    }
160
161    /// Processes a document with AI (extraction + analysis in one step).
162    pub async fn process_document(&self, req: &ProcessRequest) -> Result<ProcessResponse> {
163        let (mut resp, meta) = self
164            .post_json::<ProcessRequest, ProcessResponse>("/qai/v1/documents/process", req)
165            .await?;
166        if resp.cost_ticks == 0 {
167            resp.cost_ticks = meta.cost_ticks;
168        }
169        if resp.request_id.is_empty() {
170            resp.request_id = meta.request_id;
171        }
172        Ok(resp)
173    }
174}