Skip to main content

vectorizer_sdk/client/
files.rs

1//! File-operations surface: content/listing/summary/chunks/outline,
2//! related-file discovery, type-filtered search, file upload.
3//!
4//! 10 methods covering everything the file pipeline exposes today.
5//! `upload_file` and `upload_file_content` build a one-off
6//! [`HttpTransport`] for the multipart POST because the generic
7//! `Transport` trait doesn't model multipart yet — every other
8//! method goes through the dispatched transport.
9
10use super::VectorizerClient;
11use crate::error::{Result, VectorizerError};
12use crate::models::*;
13
14impl VectorizerClient {
15    /// Retrieve the complete content of an indexed file.
16    pub async fn get_file_content(
17        &self,
18        collection: &str,
19        file_path: &str,
20        max_size_kb: Option<usize>,
21    ) -> Result<serde_json::Value> {
22        let mut payload = serde_json::Map::new();
23        payload.insert(
24            "collection".to_string(),
25            serde_json::Value::String(collection.to_string()),
26        );
27        payload.insert(
28            "file_path".to_string(),
29            serde_json::Value::String(file_path.to_string()),
30        );
31        if let Some(max) = max_size_kb {
32            payload.insert(
33                "max_size_kb".to_string(),
34                serde_json::Value::Number(max.into()),
35            );
36        }
37        let response = self
38            .make_request(
39                "POST",
40                "/file/content",
41                Some(serde_json::Value::Object(payload)),
42            )
43            .await?;
44        serde_json::from_str(&response).map_err(|e| {
45            VectorizerError::server(format!("Failed to parse file content response: {e}"))
46        })
47    }
48
49    /// List indexed files in a collection, optionally filtered by
50    /// extension and minimum chunk count.
51    pub async fn list_files_in_collection(
52        &self,
53        collection: &str,
54        filter_by_type: Option<Vec<String>>,
55        min_chunks: Option<usize>,
56        max_results: Option<usize>,
57        sort_by: Option<&str>,
58    ) -> Result<serde_json::Value> {
59        let mut payload = serde_json::Map::new();
60        payload.insert(
61            "collection".to_string(),
62            serde_json::Value::String(collection.to_string()),
63        );
64        if let Some(types) = filter_by_type {
65            payload.insert(
66                "filter_by_type".to_string(),
67                serde_json::to_value(types).unwrap(),
68            );
69        }
70        if let Some(min) = min_chunks {
71            payload.insert(
72                "min_chunks".to_string(),
73                serde_json::Value::Number(min.into()),
74            );
75        }
76        if let Some(max) = max_results {
77            payload.insert(
78                "max_results".to_string(),
79                serde_json::Value::Number(max.into()),
80            );
81        }
82        if let Some(sort) = sort_by {
83            payload.insert(
84                "sort_by".to_string(),
85                serde_json::Value::String(sort.to_string()),
86            );
87        }
88        let response = self
89            .make_request(
90                "POST",
91                "/file/list",
92                Some(serde_json::Value::Object(payload)),
93            )
94            .await?;
95        serde_json::from_str(&response).map_err(|e| {
96            VectorizerError::server(format!("Failed to parse list files response: {e}"))
97        })
98    }
99
100    /// Get an extractive or structural summary of one indexed file.
101    pub async fn get_file_summary(
102        &self,
103        collection: &str,
104        file_path: &str,
105        summary_type: Option<&str>,
106        max_sentences: Option<usize>,
107    ) -> Result<serde_json::Value> {
108        let mut payload = serde_json::Map::new();
109        payload.insert(
110            "collection".to_string(),
111            serde_json::Value::String(collection.to_string()),
112        );
113        payload.insert(
114            "file_path".to_string(),
115            serde_json::Value::String(file_path.to_string()),
116        );
117        if let Some(stype) = summary_type {
118            payload.insert(
119                "summary_type".to_string(),
120                serde_json::Value::String(stype.to_string()),
121            );
122        }
123        if let Some(max) = max_sentences {
124            payload.insert(
125                "max_sentences".to_string(),
126                serde_json::Value::Number(max.into()),
127            );
128        }
129        let response = self
130            .make_request(
131                "POST",
132                "/file/summary",
133                Some(serde_json::Value::Object(payload)),
134            )
135            .await?;
136        serde_json::from_str(&response).map_err(|e| {
137            VectorizerError::server(format!("Failed to parse file summary response: {e}"))
138        })
139    }
140
141    /// Retrieve chunks in original file order for progressive
142    /// reading. Pair with `start_chunk` + `limit` to page.
143    pub async fn get_file_chunks_ordered(
144        &self,
145        collection: &str,
146        file_path: &str,
147        start_chunk: Option<usize>,
148        limit: Option<usize>,
149        include_context: Option<bool>,
150    ) -> Result<serde_json::Value> {
151        let mut payload = serde_json::Map::new();
152        payload.insert(
153            "collection".to_string(),
154            serde_json::Value::String(collection.to_string()),
155        );
156        payload.insert(
157            "file_path".to_string(),
158            serde_json::Value::String(file_path.to_string()),
159        );
160        if let Some(start) = start_chunk {
161            payload.insert(
162                "start_chunk".to_string(),
163                serde_json::Value::Number(start.into()),
164            );
165        }
166        if let Some(lim) = limit {
167            payload.insert("limit".to_string(), serde_json::Value::Number(lim.into()));
168        }
169        if let Some(ctx) = include_context {
170            payload.insert("include_context".to_string(), serde_json::Value::Bool(ctx));
171        }
172        let response = self
173            .make_request(
174                "POST",
175                "/file/chunks",
176                Some(serde_json::Value::Object(payload)),
177            )
178            .await?;
179        serde_json::from_str(&response)
180            .map_err(|e| VectorizerError::server(format!("Failed to parse chunks response: {e}")))
181    }
182
183    /// Generate a hierarchical project structure overview.
184    pub async fn get_project_outline(
185        &self,
186        collection: &str,
187        max_depth: Option<usize>,
188        include_summaries: Option<bool>,
189        highlight_key_files: Option<bool>,
190    ) -> Result<serde_json::Value> {
191        let mut payload = serde_json::Map::new();
192        payload.insert(
193            "collection".to_string(),
194            serde_json::Value::String(collection.to_string()),
195        );
196        if let Some(depth) = max_depth {
197            payload.insert(
198                "max_depth".to_string(),
199                serde_json::Value::Number(depth.into()),
200            );
201        }
202        if let Some(summ) = include_summaries {
203            payload.insert(
204                "include_summaries".to_string(),
205                serde_json::Value::Bool(summ),
206            );
207        }
208        if let Some(highlight) = highlight_key_files {
209            payload.insert(
210                "highlight_key_files".to_string(),
211                serde_json::Value::Bool(highlight),
212            );
213        }
214        let response = self
215            .make_request(
216                "POST",
217                "/file/outline",
218                Some(serde_json::Value::Object(payload)),
219            )
220            .await?;
221        serde_json::from_str(&response)
222            .map_err(|e| VectorizerError::server(format!("Failed to parse outline response: {e}")))
223    }
224
225    /// Find semantically-related files by vector similarity.
226    pub async fn get_related_files(
227        &self,
228        collection: &str,
229        file_path: &str,
230        limit: Option<usize>,
231        similarity_threshold: Option<f32>,
232        include_reason: Option<bool>,
233    ) -> Result<serde_json::Value> {
234        let mut payload = serde_json::Map::new();
235        payload.insert(
236            "collection".to_string(),
237            serde_json::Value::String(collection.to_string()),
238        );
239        payload.insert(
240            "file_path".to_string(),
241            serde_json::Value::String(file_path.to_string()),
242        );
243        if let Some(lim) = limit {
244            payload.insert("limit".to_string(), serde_json::Value::Number(lim.into()));
245        }
246        if let Some(thresh) = similarity_threshold {
247            payload.insert(
248                "similarity_threshold".to_string(),
249                serde_json::json!(thresh),
250            );
251        }
252        if let Some(reason) = include_reason {
253            payload.insert(
254                "include_reason".to_string(),
255                serde_json::Value::Bool(reason),
256            );
257        }
258        let response = self
259            .make_request(
260                "POST",
261                "/file/related",
262                Some(serde_json::Value::Object(payload)),
263            )
264            .await?;
265        serde_json::from_str(&response).map_err(|e| {
266            VectorizerError::server(format!("Failed to parse related files response: {e}"))
267        })
268    }
269
270    /// Semantic search filtered by file type. Empty `file_types` is
271    /// rejected — pass at least one extension.
272    pub async fn search_by_file_type(
273        &self,
274        collection: &str,
275        query: &str,
276        file_types: Vec<String>,
277        limit: Option<usize>,
278        return_full_files: Option<bool>,
279    ) -> Result<serde_json::Value> {
280        if file_types.is_empty() {
281            return Err(VectorizerError::validation("file_types cannot be empty"));
282        }
283        let mut payload = serde_json::Map::new();
284        payload.insert(
285            "collection".to_string(),
286            serde_json::Value::String(collection.to_string()),
287        );
288        payload.insert(
289            "query".to_string(),
290            serde_json::Value::String(query.to_string()),
291        );
292        payload.insert(
293            "file_types".to_string(),
294            serde_json::to_value(file_types).unwrap(),
295        );
296        if let Some(lim) = limit {
297            payload.insert("limit".to_string(), serde_json::Value::Number(lim.into()));
298        }
299        if let Some(full) = return_full_files {
300            payload.insert(
301                "return_full_files".to_string(),
302                serde_json::Value::Bool(full),
303            );
304        }
305        let response = self
306            .make_request(
307                "POST",
308                "/file/search_by_type",
309                Some(serde_json::Value::Object(payload)),
310            )
311            .await?;
312        serde_json::from_str(&response).map_err(|e| {
313            VectorizerError::server(format!("Failed to parse search by type response: {e}"))
314        })
315    }
316
317    /// Upload a file for automatic text extraction, chunking, and
318    /// indexing.
319    ///
320    /// # Arguments
321    /// * `file_bytes` - File content as bytes
322    /// * `filename` - Name of the file (used for extension detection)
323    /// * `collection_name` - Target collection name
324    /// * `options` - Upload options (chunk size, overlap, metadata)
325    ///
326    /// # Example
327    /// ```no_run
328    /// use vectorizer_sdk::{VectorizerClient, ClientConfig, UploadFileOptions};
329    ///
330    /// #[tokio::main]
331    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
332    ///     let config = ClientConfig::default();
333    ///     let client = VectorizerClient::new(config)?;
334    ///
335    ///     let file_bytes = std::fs::read("document.pdf")?;
336    ///     let options = UploadFileOptions::default();
337    ///
338    ///     let response = client.upload_file(
339    ///         file_bytes,
340    ///         "document.pdf",
341    ///         "my-docs",
342    ///         options
343    ///     ).await?;
344    ///
345    ///     println!("Uploaded: {} chunks created", response.chunks_created);
346    ///     Ok(())
347    /// }
348    /// ```
349    pub async fn upload_file(
350        &self,
351        file_bytes: Vec<u8>,
352        filename: &str,
353        collection_name: &str,
354        options: UploadFileOptions,
355    ) -> Result<FileUploadResponse> {
356        let mut form_fields = std::collections::HashMap::new();
357        form_fields.insert("collection_name".to_string(), collection_name.to_string());
358        if let Some(chunk_size) = options.chunk_size {
359            form_fields.insert("chunk_size".to_string(), chunk_size.to_string());
360        }
361        if let Some(chunk_overlap) = options.chunk_overlap {
362            form_fields.insert("chunk_overlap".to_string(), chunk_overlap.to_string());
363        }
364        if let Some(metadata) = options.metadata {
365            let metadata_json = serde_json::to_string(&metadata).map_err(|e| {
366                VectorizerError::validation(format!("Failed to serialize metadata: {e}"))
367            })?;
368            form_fields.insert("metadata".to_string(), metadata_json);
369        }
370        if let Some(public_key) = options.public_key {
371            form_fields.insert("public_key".to_string(), public_key);
372        }
373
374        // The generic `Transport` trait doesn't model multipart yet,
375        // so we build a one-off `HttpTransport` here. When the
376        // trait grows a multipart method (or the RPC backend lands
377        // its own file-upload primitive), this branch collapses
378        // back into `self.make_request`.
379        let http_transport = crate::http_transport::HttpTransport::new(
380            self.base_url(),
381            self.config.api_key.as_deref(),
382            self.config.timeout_secs.unwrap_or(30),
383        )?;
384        let response = http_transport
385            .post_multipart("/files/upload", file_bytes, filename, form_fields)
386            .await?;
387        serde_json::from_str(&response)
388            .map_err(|e| VectorizerError::server(format!("Failed to parse upload response: {e}")))
389    }
390
391    /// Upload file content directly as a string. Convenience wrapper
392    /// around [`Self::upload_file`].
393    ///
394    /// # Example
395    /// ```no_run
396    /// use vectorizer_sdk::{VectorizerClient, ClientConfig, UploadFileOptions};
397    ///
398    /// #[tokio::main]
399    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
400    ///     let client = VectorizerClient::new(ClientConfig::default())?;
401    ///     let code = r#"fn main() { println!("Hello!"); }"#;
402    ///     let response = client.upload_file_content(
403    ///         code, "main.rs", "rust-code", UploadFileOptions::default()
404    ///     ).await?;
405    ///     println!("Uploaded: {} vectors created", response.vectors_created);
406    ///     Ok(())
407    /// }
408    /// ```
409    pub async fn upload_file_content(
410        &self,
411        content: &str,
412        filename: &str,
413        collection_name: &str,
414        options: UploadFileOptions,
415    ) -> Result<FileUploadResponse> {
416        let file_bytes = content.as_bytes().to_vec();
417        self.upload_file(file_bytes, filename, collection_name, options)
418            .await
419    }
420
421    /// Get file upload configuration from the server (max file size,
422    /// allowed extensions, default chunk settings).
423    ///
424    /// # Example
425    /// ```no_run
426    /// use vectorizer_sdk::{VectorizerClient, ClientConfig};
427    ///
428    /// #[tokio::main]
429    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
430    ///     let client = VectorizerClient::new(ClientConfig::default())?;
431    ///     let upload_config = client.get_upload_config().await?;
432    ///     println!("Max file size: {}MB", upload_config.max_file_size_mb);
433    ///     Ok(())
434    /// }
435    /// ```
436    pub async fn get_upload_config(&self) -> Result<FileUploadConfig> {
437        let response = self.make_request("GET", "/files/config", None).await?;
438        serde_json::from_str(&response)
439            .map_err(|e| VectorizerError::server(format!("Failed to parse upload config: {e}")))
440    }
441}