zeroentropy_community/resources/
documents.rs

1use crate::client::Client;
2use crate::error::Result;
3use crate::types::{
4    DocumentContent, DocumentInfoListResponse, DocumentInfoResponse, DocumentResponse,
5    IndexStatus, Metadata, PageInfoResponse,
6};
7use serde::Serialize;
8
9/// Documents resource for managing documents in collections
10pub struct Documents<'a> {
11    client: &'a Client,
12}
13
14impl<'a> Documents<'a> {
15    pub(crate) fn new(client: &'a Client) -> Self {
16        Self { client }
17    }
18
19    /// Add a document to a collection
20    ///
21    /// # Arguments
22    /// * `collection_name` - Name of the collection
23    /// * `path` - Document path/identifier
24    /// * `content` - Document content
25    /// * `metadata` - Optional metadata
26    /// * `overwrite` - Whether to overwrite if exists
27    pub async fn add(
28        &self,
29        collection_name: impl Into<String>,
30        path: impl Into<String>,
31        content: DocumentContent,
32        metadata: Option<Metadata>,
33        overwrite: Option<bool>,
34    ) -> Result<DocumentResponse> {
35        #[derive(Serialize)]
36        struct Request {
37            collection_name: String,
38            path: String,
39            content: DocumentContent,
40            #[serde(skip_serializing_if = "Option::is_none")]
41            metadata: Option<Metadata>,
42            #[serde(skip_serializing_if = "Option::is_none")]
43            overwrite: Option<bool>,
44        }
45
46        let body = Request {
47            collection_name: collection_name.into(),
48            path: path.into(),
49            content,
50            metadata,
51            overwrite,
52        };
53
54        self.client.post("/documents/add-document", &body).await
55    }
56
57    /// Add a text document
58    ///
59    /// Convenience method for adding plain text documents
60    pub async fn add_text(
61        &self,
62        collection_name: impl Into<String>,
63        path: impl Into<String>,
64        text: impl Into<String>,
65        metadata: Option<Metadata>,
66    ) -> Result<DocumentResponse> {
67        let content = DocumentContent::Text {
68            text: text.into(),
69        };
70        self.add(collection_name, path, content, metadata, None).await
71    }
72
73    /// Add a PDF document from base64 data
74    ///
75    /// Convenience method for adding PDF documents with OCR
76    pub async fn add_pdf(
77        &self,
78        collection_name: impl Into<String>,
79        path: impl Into<String>,
80        base64_data: impl Into<String>,
81        metadata: Option<Metadata>,
82    ) -> Result<DocumentResponse> {
83        let content = DocumentContent::Auto {
84            base64_data: base64_data.into(),
85        };
86        self.add(collection_name, path, content, metadata, None).await
87    }
88
89    /// Add a PDF document from file path
90    ///
91    /// Reads the file and encodes it as base64
92    pub async fn add_pdf_file(
93        &self,
94        collection_name: impl Into<String>,
95        document_path: impl Into<String>,
96        file_path: impl AsRef<std::path::Path>,
97        metadata: Option<Metadata>,
98    ) -> Result<DocumentResponse> {
99        use base64::{engine::general_purpose, Engine as _};
100        
101        let bytes = tokio::fs::read(file_path).await?;
102        let base64_data = general_purpose::STANDARD.encode(&bytes);
103        
104        self.add_pdf(collection_name, document_path, base64_data, metadata).await
105    }
106
107    /// Update a document's metadata or index status
108    pub async fn update(
109        &self,
110        collection_name: impl Into<String>,
111        path: impl Into<String>,
112        metadata: Option<Metadata>,
113        index_status: Option<IndexStatus>,
114    ) -> Result<DocumentResponse> {
115        #[derive(Serialize)]
116        struct Request {
117            collection_name: String,
118            path: String,
119            #[serde(skip_serializing_if = "Option::is_none")]
120            metadata: Option<Metadata>,
121            #[serde(skip_serializing_if = "Option::is_none")]
122            index_status: Option<IndexStatus>,
123        }
124
125        let body = Request {
126            collection_name: collection_name.into(),
127            path: path.into(),
128            metadata,
129            index_status,
130        };
131
132        self.client.post("/documents/update-document", &body).await
133    }
134
135    /// Delete a document
136    pub async fn delete(
137        &self,
138        collection_name: impl Into<String>,
139        path: impl Into<String>,
140    ) -> Result<DocumentResponse> {
141        #[derive(Serialize)]
142        struct Request {
143            collection_name: String,
144            path: String,
145        }
146
147        let body = Request {
148            collection_name: collection_name.into(),
149            path: path.into(),
150        };
151
152        self.client.post("/documents/delete-document", &body).await
153    }
154
155    /// Get document information
156    pub async fn get_info(
157        &self,
158        collection_name: impl Into<String>,
159        path: impl Into<String>,
160        include_content: Option<bool>,
161    ) -> Result<DocumentInfoResponse> {
162        #[derive(Serialize)]
163        struct Request {
164            collection_name: String,
165            path: String,
166            #[serde(skip_serializing_if = "Option::is_none")]
167            include_content: Option<bool>,
168        }
169
170        let body = Request {
171            collection_name: collection_name.into(),
172            path: path.into(),
173            include_content,
174        };
175
176        self.client.post("/documents/get-document-info", &body).await
177    }
178
179    /// Get list of documents in a collection
180    pub async fn get_info_list(
181        &self,
182        collection_name: impl Into<String>,
183        limit: Option<u32>,
184        path_gt: Option<String>,
185    ) -> Result<DocumentInfoListResponse> {
186        #[derive(Serialize)]
187        struct Request {
188            collection_name: String,
189            #[serde(skip_serializing_if = "Option::is_none")]
190            limit: Option<u32>,
191            #[serde(skip_serializing_if = "Option::is_none")]
192            path_gt: Option<String>,
193        }
194
195        let body = Request {
196            collection_name: collection_name.into(),
197            limit,
198            path_gt,
199        };
200
201        self.client.post("/documents/get-document-info-list", &body).await
202    }
203
204    /// Get information about a specific page
205    pub async fn get_page_info(
206        &self,
207        collection_name: impl Into<String>,
208        path: impl Into<String>,
209        page_number: u32,
210        include_content: Option<bool>,
211    ) -> Result<PageInfoResponse> {
212        #[derive(Serialize)]
213        struct Request {
214            collection_name: String,
215            path: String,
216            page_number: u32,
217            #[serde(skip_serializing_if = "Option::is_none")]
218            include_content: Option<bool>,
219        }
220
221        let body = Request {
222            collection_name: collection_name.into(),
223            path: path.into(),
224            page_number,
225            include_content,
226        };
227
228        self.client.post("/documents/get-page-info", &body).await
229    }
230}