Skip to main content

paperless_api/
client.rs

1//! The central client for interacting with Paperless.
2
3use std::{collections::HashMap, fmt::Write, path::Path, str::FromStr, sync::Arc};
4
5use enum_iterator::Sequence;
6use reqwest::{
7    Method, StatusCode,
8    header::{ACCEPT, HeaderMap, HeaderName, InvalidHeaderValue},
9    multipart,
10};
11use serde::{Deserialize, de::DeserializeOwned};
12use tracing::{debug, trace};
13
14use crate::{
15    Error, Group, Result, SavedView, User,
16    document::{Document, DocumentData},
17    document_query::DocumentQueryBuilder,
18    dto::Item,
19    id::{
20        CorrespondentId, CustomFieldId, DocumentId, DocumentTypeId, GroupId, StoragePathId, TagId,
21        TaskId, UserId,
22    },
23    metadata::{
24        correspondent::Correspondent, custom_field::CustomField, document_type::DocumentType,
25        storage_path::StoragePath, tag::Tag,
26    },
27    task::Task,
28    util,
29    workflow::Workflow,
30};
31
32const QUERY_PARAM_FULL_PERMISSIONS: &str = "full_perms";
33
34/// Selects which cached metadata to refresh.
35///
36/// Cached data is data which is rarly updated,
37/// refreshing it is normally not necessary on every request.
38#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Sequence)]
39#[non_exhaustive]
40pub enum RefreshMetaData {
41    Tags,
42    CustomFields,
43    Correspondents,
44    DocumentTypes,
45    Groups,
46    Users,
47    StoragePaths,
48}
49
50/// Client to interact with Paperless.
51#[derive(Debug, Clone)]
52pub struct PaperlessClient {
53    /// Whether to request full permissions data for items.
54    pub request_full_permissions: bool,
55
56    pub(crate) base_url: Arc<str>,
57
58    client: reqwest::Client,
59    cached_data: Arc<CachedData>,
60}
61
62#[derive(Debug, Clone)]
63struct CachedData {
64    correspondents: HashMap<CorrespondentId, Correspondent>,
65    custom_fields: HashMap<CustomFieldId, CustomField>,
66    document_types: HashMap<DocumentTypeId, DocumentType>,
67    groups: HashMap<GroupId, Group>,
68    storage_paths: HashMap<StoragePathId, StoragePath>,
69    tags: HashMap<TagId, Tag>,
70    users: HashMap<UserId, User>,
71}
72
73#[derive(Debug, Deserialize)]
74struct PaginatedResponse<T> {
75    results: Vec<T>,
76    next: Option<String>,
77}
78
79impl PaperlessClient {
80    /// Create a new Paperless client.
81    ///
82    /// # Arguments
83    ///
84    /// * `base_url` - The base URL of the Paperless API.
85    /// * `token` - The authentication token for the Paperless API.
86    /// * `headers` - Optional additional headers to include in requests.
87    pub fn new(
88        base_url: &str,
89        token: &str,
90        headers: Option<&HashMap<String, String>>,
91    ) -> std::result::Result<Self, String> {
92        Self::new_with_client(
93            base_url,
94            token,
95            headers,
96            reqwest::Client::builder().zstd(true),
97        )
98    }
99
100    /// Create a new Paperless client.
101    ///
102    /// Provide a [`reqwest::ClientBuilder`] to customize the HTTP client,
103    /// such as adding custom headers or disabling compression.
104    ///
105    /// # Arguments
106    ///
107    /// * `base_url` - The base URL of the Paperless API.
108    /// * `token` - The authentication token for the Paperless API.
109    /// * `headers` - Optional additional headers to include in requests.
110    /// * `client_builder` - [`reqwest::ClientBuilder`] to use for creating the HTTP client.
111    pub fn new_with_client(
112        base_url: &str,
113        token: &str,
114        headers: Option<&HashMap<String, String>>,
115        client_builder: reqwest::ClientBuilder,
116    ) -> std::result::Result<Self, String> {
117        let mut headers_map = HeaderMap::new();
118
119        // Add additional headers if provided
120        if let Some(headers) = headers {
121            for (key, value) in headers {
122                headers_map.insert(
123                    HeaderName::from_str(key).map_err(|err| err.to_string())?,
124                    value
125                        .parse()
126                        .map_err(|err: InvalidHeaderValue| err.to_string())?,
127                );
128            }
129        }
130
131        // Add the Paperless token header
132        headers_map.insert(
133            HeaderName::from_str("Authorization").map_err(|err| err.to_string())?,
134            format!("Token {token}")
135                .parse()
136                .map_err(|err: InvalidHeaderValue| err.to_string())?,
137        );
138
139        Ok(Self {
140            request_full_permissions: false,
141            base_url: base_url.into(),
142            client: client_builder
143                .default_headers(headers_map)
144                .build()
145                .map_err(|err| err.to_string())?,
146            cached_data: Arc::new(CachedData {
147                custom_fields: HashMap::new(),
148                correspondents: HashMap::new(),
149                document_types: HashMap::new(),
150                groups: HashMap::new(),
151                storage_paths: HashMap::new(),
152                tags: HashMap::new(),
153                users: HashMap::new(),
154            }),
155        })
156    }
157
158    /// Sets whether to request full permissions data for items during refresh.
159    ///
160    /// If not enabled only simple permission data is loaded.
161    /// See [`ItemPermissions`](crate::metadata::permission::ItemPermissions) for more details.
162    #[must_use]
163    pub fn with_full_permissions(mut self, req: bool) -> Self {
164        self.request_full_permissions = req;
165        self
166    }
167
168    /// Loads all items of the given type from the API.
169    async fn load_items<T: Item + DeserializeOwned>(&self) -> Result<HashMap<T::Id, T>> {
170        debug!("Loading {}", T::endpoint());
171        let endpoint = format!("/api/{}/", T::endpoint());
172
173        let items: Vec<T> = self
174            .fetch_all_pages(&endpoint, self.permissions_query_param())
175            .await?;
176        Ok(items.into_iter().map(|item| (item.id(), item)).collect())
177    }
178
179    fn permissions_query_param(&self) -> Option<&'static [(&'static str, &'static str)]> {
180        if self.request_full_permissions {
181            Some(&[(QUERY_PARAM_FULL_PERMISSIONS, "true")])
182        } else {
183            None
184        }
185    }
186
187    /// Refresh and cache all metadata.
188    ///
189    /// Only updates the cache for this instance, cloned instances will not see the changes.
190    ///
191    /// # Arguments
192    ///
193    /// * `full_permissions` - Whether to use request full permissions data for the items.
194    pub async fn refresh_all(&mut self) -> Result<()> {
195        self.refresh(enum_iterator::all::<RefreshMetaData>()).await
196    }
197
198    /// Refresh and cache the selected metadata.
199    ///
200    /// Only updates the cache for this instance, cloned instances will not see the changes.
201    ///
202    /// # Arguments
203    ///
204    /// * `data` - The metadata to refresh.
205    /// * `full_permissions` - Whether to use request full permissions data for the items being refreshed.
206    pub async fn refresh(&mut self, data: impl IntoIterator<Item = RefreshMetaData>) -> Result<()> {
207        #[rustfmt::skip]
208        async fn inner(
209            client: &mut PaperlessClient,
210            data: &mut dyn Iterator<Item = RefreshMetaData>,
211        ) -> Result<()> {
212            let selected: std::collections::HashSet<_> = data.into_iter().collect();
213
214            if selected.is_empty() {
215                return Ok(());
216            }
217
218            let (tags, custom_fields, correspondents, document_types, groups, users, storage_paths) =
219                futures_util::try_join!(
220                    async {
221                        if selected.contains(&RefreshMetaData::Tags) {
222                            Ok(Some(client.load_items::<Tag>().await?))
223                        } else {
224                            Ok::<Option<_>, Error>(None)
225                        }
226                    },
227                    async {
228                        if selected.contains(&RefreshMetaData::CustomFields) {
229                            Ok(Some(client.load_items::<CustomField>().await?))
230                        } else {
231                            Ok(None)
232                        }
233                    },
234                    async {
235                        if selected.contains(&RefreshMetaData::Correspondents) {
236                            Ok(Some(client.load_items::<Correspondent>().await?))
237                        } else {
238                            Ok(None)
239                        }
240                    },
241                    async {
242                        if selected.contains(&RefreshMetaData::DocumentTypes) {
243                            Ok(Some(client.load_items::<DocumentType>().await?))
244                        } else {
245                            Ok(None)
246                        }
247                    },
248                    async {
249                        if selected.contains(&RefreshMetaData::Groups) {
250                            Ok(Some(client.load_items::<Group>().await?))
251                        } else {
252                            Ok(None)
253                        }
254                    },
255                    async {
256                        if selected.contains(&RefreshMetaData::Users) {
257                            Ok(Some(client.load_items::<User>().await?))
258                        } else {
259                            Ok(None)
260                        }
261                    },
262                    async {
263                        if selected.contains(&RefreshMetaData::StoragePaths) {
264                            Ok(Some(client.load_items::<StoragePath>().await?))
265                        } else {
266                            Ok(None)
267                        }
268                    },
269                )?;
270
271            let cached_data = Arc::make_mut(&mut client.cached_data);
272
273            if let Some(value) = custom_fields { cached_data.custom_fields = value; }
274            if let Some(value) = correspondents { cached_data.correspondents = value; }
275            if let Some(value) = document_types { cached_data.document_types = value; }
276            if let Some(value) = groups { cached_data.groups = value; }
277            if let Some(value) = storage_paths { cached_data.storage_paths = value; }
278            if let Some(value) = tags { cached_data.tags = value; }
279            if let Some(value) = users { cached_data.users = value; }
280
281            Ok(())
282        }
283
284        inner(self, &mut data.into_iter()).await
285    }
286
287    pub async fn query_documents(&self, query: DocumentQueryBuilder) -> Result<Vec<Document>> {
288        let full_content = query.full_content;
289
290        let query_params = query.build();
291        let query_vec: Vec<_> = query_params
292            .query
293            .iter()
294            .map(|(k, v)| (*k, v.as_str()))
295            .collect();
296        let query_slice = query_vec.as_slice();
297
298        let documents: Vec<_> = self
299            .fetch_all_pages::<DocumentData>("/api/documents/", Some(query_slice))
300            .await?
301            .into_iter()
302            .map(|data| Document::new(data, Arc::new(self.clone()), !full_content))
303            .collect();
304
305        Ok(documents)
306    }
307
308    /// Get all documents with any of the given tags.
309    pub fn get_documents_by_tags(
310        &self,
311        tag_ids: &[TagId],
312        truncate_content: bool,
313    ) -> impl Future<Output = Result<Vec<Document>>> {
314        let query = DocumentQueryBuilder::default()
315            .full_content(!truncate_content)
316            .tags_id_in(tag_ids.to_vec());
317
318        self.query_documents(query)
319    }
320
321    pub(crate) async fn get_document_data_by_id(&self, id: DocumentId) -> Result<DocumentData> {
322        let resp = self
323            .request(Method::GET, &format!("/api/documents/{}/", id.0), None)
324            .await?;
325
326        let document_data: DocumentData = resp
327            .json()
328            .await
329            .map_err(|e| Error::Other(format!("Failed to parse document: {e:?}")))?;
330
331        Ok(document_data)
332    }
333
334    /// Get a document by its ID.
335    pub async fn get_document_by_id(&self, id: DocumentId) -> Result<Document> {
336        Ok(Document::new(
337            self.get_document_data_by_id(id).await?,
338            Arc::new(self.clone()),
339            false,
340        ))
341    }
342
343    pub(crate) async fn request(
344        &self,
345        method: Method,
346        endpoint: &str,
347        body: Option<&serde_json::Value>,
348    ) -> Result<reqwest::Response> {
349        let mut req = self
350            .client
351            .request(method, format!("{}{endpoint}", self.base_url))
352            .header(ACCEPT, "application/json");
353
354        // Set payload body if provided
355        if let Some(json_body) = body {
356            req = req.json(json_body);
357        }
358
359        let resp = req
360            .send()
361            .await
362            .map_err(|e| Error::Other(format!("Failed to send request: {e}")))?;
363
364        if resp.status() == StatusCode::NOT_FOUND {
365            return Err(Error::NotFound);
366        }
367
368        if !resp.status().is_success() {
369            return Err(Error::Response {
370                status_code: resp.status().as_u16(),
371                body: resp.text().await.unwrap_or_default(),
372            });
373        }
374
375        Ok(resp)
376    }
377
378    pub(crate) async fn request_with_body(
379        &self,
380        method: Method,
381        endpoint: &str,
382        body: &impl serde::Serialize,
383    ) -> Result<reqwest::Response> {
384        let body = serde_json::to_value(body).map_err(|e| Error::Other(e.to_string()))?;
385        self.request(method, endpoint, Some(&body)).await
386    }
387
388    pub(crate) async fn fetch_all_pages<T: for<'de> Deserialize<'de>>(
389        &self,
390        endpoint: &str,
391        query_params: Option<&[(&str, &str)]>,
392    ) -> Result<Vec<T>> {
393        let mut results = Vec::new();
394        let mut current_url = endpoint.to_string();
395        let mut first_param = true;
396
397        if let Some(params) = query_params {
398            for param in params {
399                if first_param {
400                    current_url.push('?');
401                    first_param = false;
402                } else {
403                    current_url.push('&');
404                }
405                let _ = write!(current_url, "{}={}", param.0, param.1);
406            }
407        }
408
409        let mut current_url = Some(current_url);
410
411        while let Some(url) = current_url {
412            let resp = self.request(Method::GET, &url, None).await?;
413
414            let page: PaginatedResponse<T> = resp.json().await.map_err(|e| {
415                Error::InvalidJson(format!(
416                    "Failed to parse paginated response for {endpoint}: {e:?}"
417                ))
418            })?;
419
420            results.extend(page.results);
421
422            current_url = page.next.and_then(|next_url| {
423                // Extract just the path from the full URL
424                next_url
425                    .trim_start_matches(&*self.base_url)
426                    .to_string()
427                    .into()
428            });
429        }
430
431        Ok(results)
432    }
433
434    /// Get all tasks with optional filtering by ID, name, or acknowledged status.
435    pub async fn get_task_status(
436        &self,
437        task_id: Option<&TaskId>,
438        task_name: Option<&str>,
439        acknowledged: Option<bool>,
440    ) -> Result<Vec<Task>> {
441        let mut query = Vec::new();
442
443        if let Some(id) = task_id {
444            query.push(("task_id", id.to_string()));
445        }
446
447        if let Some(name) = task_name {
448            query.push(("task_name", name.to_string()));
449        }
450
451        if let Some(ack) = acknowledged {
452            query.push(("acknowledged", ack.to_string()));
453        }
454
455        let resp = self
456            .request(
457                Method::GET,
458                &format!(
459                    "/api/tasks/?{}",
460                    serde_urlencoded::to_string(&query)
461                        .map_err(|e| Error::Other(format!("Failed to serialize query: {e}")))?
462                ),
463                None::<&serde_json::Value>,
464            )
465            .await?;
466
467        trace!("get_task_status response: {:?}", resp);
468
469        let body = resp
470            .text()
471            .await
472            .map_err(|e| Error::Other(format!("Failed to read response body: {e:?}")))?;
473
474        let tasks: Vec<Task> = match serde_json::from_str(&body) {
475            Ok(t) => t,
476            Err(e) => {
477                return Err(Error::InvalidJson(format!(
478                    "Failed to parse response body: {e:?}"
479                )));
480            }
481        };
482
483        if tasks.is_empty() {
484            return Err(Error::NotFound);
485        }
486
487        Ok(tasks)
488    }
489
490    pub fn get_workflows(&self) -> impl Future<Output = Result<Vec<Workflow>>> {
491        self.fetch_all_pages("/api/workflows/", None)
492    }
493
494    pub fn get_saved_views(&self) -> impl Future<Output = Result<Vec<SavedView>>> {
495        self.fetch_all_pages("/api/saved_views/", None)
496    }
497
498    pub async fn get_statistics(&self) -> Result<util::Statistics> {
499        self.request(Method::GET, "/api/statistics/", None)
500            .await
501            .map_err(|e| Error::Other(format!("Failed to send request: {e}")))?
502            .json()
503            .await
504            .map_err(|e| Error::Other(format!("Failed to parse response body: {e:?}")))
505    }
506
507    pub async fn get_status(&self) -> Result<util::ServerStatus> {
508        self.request(Method::GET, "/api/status/", None)
509            .await
510            .map_err(|e| Error::Other(format!("Failed to send request: {e}")))?
511            .json()
512            .await
513            .map_err(|e| Error::Other(format!("Failed to parse response body: {e:?}")))
514    }
515
516    /// Create a new item in Paperless.
517    ///
518    /// All structs which implement [`CreateDtoObject`](crate::dto::CreateDtoObject) can be used as `new_item`.
519    ///
520    /// Returns the created item
521    pub async fn create<T: Item>(&self, new_item: T::CreateDto) -> Result<T::BaseType> {
522        let url = format!("/api/{}/", T::endpoint());
523        let resp = self
524            .request_with_body(Method::POST, &url, &new_item)
525            .await?;
526
527        resp.json::<T::BaseType>()
528            .await
529            .map_err(|e| Error::Other(format!("Failed to parse response body: {e:?}")))
530    }
531
532    /// Updates an existing item in Paperless.
533    ///
534    /// All structs which implement [`UpdateDtoObject`](crate::dto::UpdateDtoObject) can be used as `item`.
535    pub async fn update<T: Item>(&self, id: T::Id, item: T::UpdateDto) -> Result<()> {
536        let url = format!("/api/{}/{}/", T::endpoint(), id);
537        self.request_with_body(Method::PATCH, &url, &item).await?;
538        Ok(())
539    }
540
541    /// Deletes an existing item in Paperless.
542    pub async fn delete<T: Item>(&self, id: T::Id) -> Result<()> {
543        let url = format!("/api/{}/{}/", T::endpoint(), id);
544        self.request(Method::DELETE, &url, None).await?;
545        Ok(())
546    }
547
548    /// Upload a document to Paperless.
549    ///
550    /// Returns the task ID on success.
551    pub async fn upload_document(&self, file_path: &Path, filename: &str) -> Result<TaskId> {
552        let file_bytes = std::fs::read(file_path)
553            .map_err(|e| Error::Other(format!("Failed to read file: {e}")))?;
554
555        let form = multipart::Form::new().part(
556            "document",
557            multipart::Part::bytes(file_bytes).file_name(filename.to_string()),
558        );
559
560        let url = format!("{}/api/documents/post_document/", self.base_url);
561
562        let resp = self
563            .client
564            .post(&url)
565            .multipart(form)
566            .send()
567            .await
568            .map_err(|e| Error::Other(format!("Failed to send request: {e}")))?;
569
570        let status = resp.status();
571        if !resp.status().is_success() {
572            return Err(Error::Response {
573                status_code: status.as_u16(),
574                body: resp.text().await.unwrap_or_default(),
575            });
576        }
577
578        let task_id: String = resp
579            .json()
580            .await
581            .map_err(|e| Error::Other(format!("Failed to parse task ID: {e:?}")))?;
582        Ok(TaskId(task_id))
583    }
584
585    #[inline]
586    #[must_use]
587    pub fn tags(&self) -> &HashMap<TagId, Tag> {
588        &self.cached_data.tags
589    }
590
591    #[inline]
592    #[must_use]
593    pub fn storage_paths(&self) -> &HashMap<StoragePathId, StoragePath> {
594        &self.cached_data.storage_paths
595    }
596
597    #[must_use]
598    pub fn find_tag_by_name(&self, name: &str) -> Option<&Tag> {
599        self.cached_data.tags.values().find(|tag| tag.name == name)
600    }
601
602    #[inline]
603    #[must_use]
604    pub fn document_types(&self) -> &HashMap<DocumentTypeId, DocumentType> {
605        &self.cached_data.document_types
606    }
607
608    #[must_use]
609    pub fn find_document_type_by_name(&self, name: &str) -> Option<&DocumentType> {
610        self.cached_data
611            .document_types
612            .values()
613            .find(|dt| dt.name == name)
614    }
615
616    #[inline]
617    #[must_use]
618    pub fn correspondents(&self) -> &HashMap<CorrespondentId, Correspondent> {
619        &self.cached_data.correspondents
620    }
621
622    #[inline]
623    #[must_use]
624    pub fn custom_fields(&self) -> &HashMap<CustomFieldId, CustomField> {
625        &self.cached_data.custom_fields
626    }
627
628    #[must_use]
629    pub fn find_custom_field_by_name(&self, name: &str) -> Option<&CustomField> {
630        self.cached_data
631            .custom_fields
632            .values()
633            .find(|field| field.name == name)
634    }
635
636    #[inline]
637    #[must_use]
638    pub fn users(&self) -> &HashMap<UserId, User> {
639        &self.cached_data.users
640    }
641
642    #[inline]
643    #[must_use]
644    pub fn groups(&self) -> &HashMap<GroupId, Group> {
645        &self.cached_data.groups
646    }
647}