Skip to main content

paperless_api/
client.rs

1//! The central client for interacting with Paperless.
2
3use std::{collections::HashMap, fmt::Write, path::Path, str::FromStr, sync::Arc};
4
5use enum_iterator::Sequence;
6use reqwest::{
7    Method, StatusCode,
8    header::{ACCEPT, HeaderMap, HeaderName, InvalidHeaderValue},
9    multipart,
10};
11use serde::{Deserialize, de::DeserializeOwned};
12use tracing::{debug, trace};
13
14use crate::{
15    Error, Group, Result, SavedView, User,
16    document::{Document, DocumentData},
17    document_query::DocumentQueryBuilder,
18    dto::Item,
19    id::{
20        CorrespondentId, CustomFieldId, DocumentId, DocumentTypeId, GroupId, StoragePathId, TagId,
21        TaskId, UserId,
22    },
23    metadata::{
24        correspondent::Correspondent, custom_field::CustomField, document_type::DocumentType,
25        storage_path::StoragePath, tag::Tag,
26    },
27    task::Task,
28    util,
29    workflow::Workflow,
30};
31
32const QUERY_PARAM_FULL_PERMISSIONS: &str = "full_perms";
33const QUERY_PARAM_TRUNCATE_CONTENT: &str = "truncate_content";
34const QUERY_PARAM_TAGS_ID_IN: &str = "tags__id__in";
35
36/// Selects which cached metadata to refresh.
37///
38/// Cached data is data which is rarly updated,
39/// refreshing it is normally not necessary on every request.
40#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Sequence)]
41#[non_exhaustive]
42pub enum RefreshMetaData {
43    Tags,
44    CustomFields,
45    Correspondents,
46    DocumentTypes,
47    Groups,
48    Users,
49    StoragePaths,
50}
51
52/// Client to interact with Paperless.
53#[derive(Debug, Clone)]
54pub struct PaperlessClient {
55    /// Whether to request full permissions data for items.
56    pub request_full_permissions: bool,
57
58    pub(crate) base_url: Arc<str>,
59
60    client: reqwest::Client,
61    cached_data: Arc<CachedData>,
62}
63
64#[derive(Debug, Clone)]
65struct CachedData {
66    correspondents: HashMap<CorrespondentId, Correspondent>,
67    custom_fields: HashMap<CustomFieldId, CustomField>,
68    document_types: HashMap<DocumentTypeId, DocumentType>,
69    groups: HashMap<GroupId, Group>,
70    storage_paths: HashMap<StoragePathId, StoragePath>,
71    tags: HashMap<TagId, Tag>,
72    users: HashMap<UserId, User>,
73}
74
75#[derive(Debug, Deserialize)]
76struct PaginatedResponse<T> {
77    results: Vec<T>,
78    next: Option<String>,
79}
80
81impl PaperlessClient {
82    /// Create a new Paperless client.
83    ///
84    /// # Arguments
85    ///
86    /// * `base_url` - The base URL of the Paperless API.
87    /// * `token` - The authentication token for the Paperless API.
88    /// * `headers` - Optional additional headers to include in requests.
89    pub fn new(
90        base_url: &str,
91        token: &str,
92        headers: Option<&HashMap<String, String>>,
93    ) -> std::result::Result<Self, String> {
94        Self::new_with_client(
95            base_url,
96            token,
97            headers,
98            reqwest::Client::builder().zstd(true),
99        )
100    }
101
102    /// Create a new Paperless client.
103    ///
104    /// Provide a [`reqwest::ClientBuilder`] to customize the HTTP client,
105    /// such as adding custom headers or disabling compression.
106    ///
107    /// # Arguments
108    ///
109    /// * `base_url` - The base URL of the Paperless API.
110    /// * `token` - The authentication token for the Paperless API.
111    /// * `headers` - Optional additional headers to include in requests.
112    /// * `client_builder` - [`reqwest::ClientBuilder`] to use for creating the HTTP client.
113    pub fn new_with_client(
114        base_url: &str,
115        token: &str,
116        headers: Option<&HashMap<String, String>>,
117        client_builder: reqwest::ClientBuilder,
118    ) -> std::result::Result<Self, String> {
119        let mut headers_map = HeaderMap::new();
120
121        // Add additional headers if provided
122        if let Some(headers) = headers {
123            for (key, value) in headers {
124                headers_map.insert(
125                    HeaderName::from_str(key).map_err(|err| err.to_string())?,
126                    value
127                        .parse()
128                        .map_err(|err: InvalidHeaderValue| err.to_string())?,
129                );
130            }
131        }
132
133        // Add the Paperless token header
134        headers_map.insert(
135            HeaderName::from_str("Authorization").map_err(|err| err.to_string())?,
136            format!("Token {token}")
137                .parse()
138                .map_err(|err: InvalidHeaderValue| err.to_string())?,
139        );
140
141        Ok(Self {
142            request_full_permissions: false,
143            base_url: base_url.into(),
144            client: client_builder
145                .default_headers(headers_map)
146                .build()
147                .map_err(|err| err.to_string())?,
148            cached_data: Arc::new(CachedData {
149                custom_fields: HashMap::new(),
150                correspondents: HashMap::new(),
151                document_types: HashMap::new(),
152                groups: HashMap::new(),
153                storage_paths: HashMap::new(),
154                tags: HashMap::new(),
155                users: HashMap::new(),
156            }),
157        })
158    }
159
160    /// Sets whether to request full permissions data for items during refresh.
161    ///
162    /// If not enabled only simple permission data is loaded.
163    /// See [`ItemPermissions`](crate::metadata::permission::ItemPermissions) for more details.
164    #[must_use]
165    pub fn with_full_permissions(mut self, req: bool) -> Self {
166        self.request_full_permissions = req;
167        self
168    }
169
170    /// Loads all items of the given type from the API.
171    async fn load_items<T: Item + DeserializeOwned>(&self) -> Result<HashMap<T::Id, T>> {
172        debug!("Loading {}", T::endpoint());
173        let endpoint = format!("/api/{}/", T::endpoint());
174
175        let items: Vec<T> = self
176            .fetch_all_pages(&endpoint, self.permissions_query_param())
177            .await?;
178        Ok(items.into_iter().map(|item| (item.id(), item)).collect())
179    }
180
181    fn permissions_query_param(&self) -> Option<&'static [(&'static str, &'static str)]> {
182        if self.request_full_permissions {
183            Some(&[(QUERY_PARAM_FULL_PERMISSIONS, "true")])
184        } else {
185            None
186        }
187    }
188
189    /// Refresh and cache all metadata.
190    ///
191    /// Only updates the cache for this instance, cloned instances will not see the changes.
192    ///
193    /// # Arguments
194    ///
195    /// * `full_permissions` - Whether to use request full permissions data for the items.
196    pub async fn refresh_all(&mut self) -> Result<()> {
197        self.refresh(enum_iterator::all::<RefreshMetaData>()).await
198    }
199
200    /// Refresh and cache the selected metadata.
201    ///
202    /// Only updates the cache for this instance, cloned instances will not see the changes.
203    ///
204    /// # Arguments
205    ///
206    /// * `data` - The metadata to refresh.
207    /// * `full_permissions` - Whether to use request full permissions data for the items being refreshed.
208    pub async fn refresh(&mut self, data: impl IntoIterator<Item = RefreshMetaData>) -> Result<()> {
209        #[rustfmt::skip]
210        async fn inner(
211            client: &mut PaperlessClient,
212            data: &mut dyn Iterator<Item = RefreshMetaData>,
213        ) -> Result<()> {
214            let selected: std::collections::HashSet<_> = data.into_iter().collect();
215
216            if selected.is_empty() {
217                return Ok(());
218            }
219
220            let (tags, custom_fields, correspondents, document_types, groups, users, storage_paths) =
221                futures_util::try_join!(
222                    async {
223                        if selected.contains(&RefreshMetaData::Tags) {
224                            Ok(Some(client.load_items::<Tag>().await?))
225                        } else {
226                            Ok::<Option<_>, Error>(None)
227                        }
228                    },
229                    async {
230                        if selected.contains(&RefreshMetaData::CustomFields) {
231                            Ok(Some(client.load_items::<CustomField>().await?))
232                        } else {
233                            Ok(None)
234                        }
235                    },
236                    async {
237                        if selected.contains(&RefreshMetaData::Correspondents) {
238                            Ok(Some(client.load_items::<Correspondent>().await?))
239                        } else {
240                            Ok(None)
241                        }
242                    },
243                    async {
244                        if selected.contains(&RefreshMetaData::DocumentTypes) {
245                            Ok(Some(client.load_items::<DocumentType>().await?))
246                        } else {
247                            Ok(None)
248                        }
249                    },
250                    async {
251                        if selected.contains(&RefreshMetaData::Groups) {
252                            Ok(Some(client.load_items::<Group>().await?))
253                        } else {
254                            Ok(None)
255                        }
256                    },
257                    async {
258                        if selected.contains(&RefreshMetaData::Users) {
259                            Ok(Some(client.load_items::<User>().await?))
260                        } else {
261                            Ok(None)
262                        }
263                    },
264                    async {
265                        if selected.contains(&RefreshMetaData::StoragePaths) {
266                            Ok(Some(client.load_items::<StoragePath>().await?))
267                        } else {
268                            Ok(None)
269                        }
270                    },
271                )?;
272
273            let cached_data = Arc::make_mut(&mut client.cached_data);
274
275            if let Some(value) = custom_fields { cached_data.custom_fields = value; }
276            if let Some(value) = correspondents { cached_data.correspondents = value; }
277            if let Some(value) = document_types { cached_data.document_types = value; }
278            if let Some(value) = groups { cached_data.groups = value; }
279            if let Some(value) = storage_paths { cached_data.storage_paths = value; }
280            if let Some(value) = tags { cached_data.tags = value; }
281            if let Some(value) = users { cached_data.users = value; }
282
283            Ok(())
284        }
285
286        inner(self, &mut data.into_iter()).await
287    }
288
289    pub async fn query_documents(&self, query: DocumentQueryBuilder) -> Result<Vec<Document>> {
290        let full_content = query.full_content;
291
292        let query_params = query.build();
293        let query_vec: Vec<_> = query_params
294            .query
295            .iter()
296            .map(|(k, v)| (*k, v.as_str()))
297            .collect();
298        let query_slice = query_vec.as_slice();
299
300        let documents: Vec<_> = self
301            .fetch_all_pages::<DocumentData>("/api/documents/", Some(query_slice))
302            .await?
303            .into_iter()
304            .map(|data| Document::new(data, Arc::new(self.clone()), !full_content))
305            .collect();
306
307        Ok(documents)
308    }
309
310    /// Get all documents with any of the given tags.
311    pub fn get_documents_by_tags(
312        &self,
313        tag_ids: &[TagId],
314        truncate_content: bool,
315    ) -> impl Future<Output = Result<Vec<Document>>> {
316        let query = DocumentQueryBuilder::default()
317            .full_content(!truncate_content)
318            .tags_id_in(tag_ids.iter().copied().collect());
319
320        self.query_documents(query)
321    }
322
323    pub(crate) async fn get_document_data_by_id(&self, id: DocumentId) -> Result<DocumentData> {
324        let resp = self
325            .request(Method::GET, &format!("/api/documents/{}/", id.0), None)
326            .await?;
327
328        let document_data: DocumentData = resp
329            .json()
330            .await
331            .map_err(|e| Error::Other(format!("Failed to parse document: {e}")))?;
332
333        Ok(document_data)
334    }
335
336    /// Get a document by its ID.
337    pub async fn get_document_by_id(&self, id: DocumentId) -> Result<Document> {
338        Ok(Document::new(
339            self.get_document_data_by_id(id).await?,
340            Arc::new(self.clone()),
341            false,
342        ))
343    }
344
345    pub(crate) async fn request(
346        &self,
347        method: Method,
348        endpoint: &str,
349        body: Option<&serde_json::Value>,
350    ) -> Result<reqwest::Response> {
351        let mut req = self
352            .client
353            .request(method, format!("{}{endpoint}", self.base_url))
354            .header(ACCEPT, "application/json");
355
356        // Set payload body if provided
357        if let Some(json_body) = body {
358            req = req.json(json_body);
359        }
360
361        let resp = req
362            .send()
363            .await
364            .map_err(|e| Error::Other(format!("Failed to send request: {e}")))?;
365
366        if resp.status() == StatusCode::NOT_FOUND {
367            return Err(Error::NotFound);
368        }
369
370        if !resp.status().is_success() {
371            return Err(Error::Response {
372                status_code: resp.status().as_u16(),
373                body: resp.text().await.unwrap_or_default(),
374            });
375        }
376
377        Ok(resp)
378    }
379
380    pub(crate) async fn request_with_body(
381        &self,
382        method: Method,
383        endpoint: &str,
384        body: &impl serde::Serialize,
385    ) -> Result<reqwest::Response> {
386        let body = serde_json::to_value(body).map_err(|e| Error::Other(e.to_string()))?;
387        self.request(method, endpoint, Some(&body)).await
388    }
389
390    pub(crate) async fn fetch_all_pages<T: for<'de> Deserialize<'de>>(
391        &self,
392        endpoint: &str,
393        query_params: Option<&[(&str, &str)]>,
394    ) -> Result<Vec<T>> {
395        let mut results = Vec::new();
396        let mut current_url = endpoint.to_string();
397        let mut first_param = true;
398
399        if let Some(params) = query_params {
400            for param in params {
401                if first_param {
402                    current_url.push('?');
403                    first_param = false;
404                } else {
405                    current_url.push('&');
406                }
407                let _ = write!(current_url, "{}={}", param.0, param.1);
408            }
409        }
410
411        let mut current_url = Some(current_url);
412
413        while let Some(url) = current_url {
414            let resp = self.request(Method::GET, &url, None).await?;
415
416            let page: PaginatedResponse<T> = resp.json().await.map_err(|e| {
417                Error::InvalidJson(format!(
418                    "Failed to parse paginated response for {endpoint}: {e:?}"
419                ))
420            })?;
421
422            results.extend(page.results);
423
424            current_url = page.next.and_then(|next_url| {
425                // Extract just the path from the full URL
426                next_url
427                    .trim_start_matches(&*self.base_url)
428                    .to_string()
429                    .into()
430            });
431        }
432
433        Ok(results)
434    }
435
436    /// Get all tasks with optional filtering by ID, name, or acknowledged status.
437    pub async fn get_task_status(
438        &self,
439        task_id: Option<&TaskId>,
440        task_name: Option<&str>,
441        acknowledged: Option<bool>,
442    ) -> Result<Vec<Task>> {
443        let mut query = Vec::new();
444
445        if let Some(id) = task_id {
446            query.push(("task_id", id.to_string()));
447        }
448
449        if let Some(name) = task_name {
450            query.push(("task_name", name.to_string()));
451        }
452
453        if let Some(ack) = acknowledged {
454            query.push(("acknowledged", ack.to_string()));
455        }
456
457        let resp = self
458            .request(
459                Method::GET,
460                &format!(
461                    "/api/tasks/?{}",
462                    serde_urlencoded::to_string(&query)
463                        .map_err(|e| Error::Other(format!("Failed to serialize query: {e}")))?
464                ),
465                None::<&serde_json::Value>,
466            )
467            .await?;
468
469        trace!("get_task_status response: {:?}", resp);
470
471        let body = resp
472            .text()
473            .await
474            .map_err(|e| Error::Other(format!("Failed to read response body: {e}")))?;
475
476        let tasks: Vec<Task> = match serde_json::from_str(&body) {
477            Ok(t) => t,
478            Err(e) => {
479                return Err(Error::InvalidJson(format!(
480                    "Failed to parse response body: {e}"
481                )));
482            }
483        };
484
485        if tasks.is_empty() {
486            return Err(Error::NotFound);
487        }
488
489        Ok(tasks)
490    }
491
492    pub fn get_workflows(&self) -> impl Future<Output = Result<Vec<Workflow>>> {
493        self.fetch_all_pages("/api/workflows/", None)
494    }
495
496    pub fn get_saved_views(&self) -> impl Future<Output = Result<Vec<SavedView>>> {
497        self.fetch_all_pages("/api/saved_views/", None)
498    }
499
500    pub async fn get_statistics(&self) -> Result<util::Statistics> {
501        self.request(Method::GET, "/api/statistics/", None)
502            .await
503            .map_err(|e| Error::Other(format!("Failed to send request: {e}")))?
504            .json()
505            .await
506            .map_err(|e| Error::Other(format!("Failed to parse response body: {e:?}")))
507    }
508
509    pub async fn get_status(&self) -> Result<util::ServerStatus> {
510        self.request(Method::GET, "/api/status/", None)
511            .await
512            .map_err(|e| Error::Other(format!("Failed to send request: {e}")))?
513            .json()
514            .await
515            .map_err(|e| Error::Other(format!("Failed to parse response body: {e:?}")))
516    }
517
518    /// Create a new item in Paperless.
519    ///
520    /// All structs which implement [`CreateDtoObject`] can be used as `new_item`.
521    ///
522    /// Returns the created item
523    pub async fn create<T: Item>(&self, new_item: T::CreateDto) -> Result<T::BaseType> {
524        let url = format!("/api/{}/", T::endpoint());
525        let resp = self
526            .request_with_body(Method::POST, &url, &new_item)
527            .await?;
528
529        resp.json::<T::BaseType>()
530            .await
531            .map_err(|e| Error::Other(format!("Failed to parse response body: {e}")))
532    }
533
534    /// Updates an existing item in Paperless.
535    ///
536    /// All structs which implement [`UpdateDtoObject`] can be used as `item`.
537    pub async fn update<T: Item>(&self, id: T::Id, item: T::UpdateDto) -> Result<()> {
538        let url = format!("/api/{}/{}/", T::endpoint(), id);
539        self.request_with_body(Method::PATCH, &url, &item).await?;
540        Ok(())
541    }
542
543    /// Deletes an existing item in Paperless.
544    pub async fn delete<T: Item>(&self, id: T::Id) -> Result<()> {
545        let url = format!("/api/{}/{}/", T::endpoint(), id);
546        self.request(Method::DELETE, &url, None).await?;
547        Ok(())
548    }
549
550    /// Upload a document to Paperless.
551    ///
552    /// Returns the task ID on success.
553    pub async fn upload_document(&self, file_path: &Path, filename: &str) -> Result<TaskId> {
554        let file_bytes = std::fs::read(file_path)
555            .map_err(|e| Error::Other(format!("Failed to read file: {e}")))?;
556
557        let form = multipart::Form::new().part(
558            "document",
559            multipart::Part::bytes(file_bytes).file_name(filename.to_string()),
560        );
561
562        let url = format!("{}/api/documents/post_document/", self.base_url);
563
564        let resp = self
565            .client
566            .post(&url)
567            .multipart(form)
568            .send()
569            .await
570            .map_err(|e| Error::Other(format!("Failed to send request: {e}")))?;
571
572        let status = resp.status();
573        if !resp.status().is_success() {
574            return Err(Error::Response {
575                status_code: status.as_u16(),
576                body: resp.text().await.unwrap_or_default(),
577            });
578        }
579
580        let task_id: String = resp
581            .json()
582            .await
583            .map_err(|e| Error::Other(format!("Failed to parse task ID: {e}")))?;
584        Ok(TaskId(task_id))
585    }
586
587    #[inline]
588    #[must_use]
589    pub fn tags(&self) -> &HashMap<TagId, Tag> {
590        &self.cached_data.tags
591    }
592
593    #[inline]
594    #[must_use]
595    pub fn storage_paths(&self) -> &HashMap<StoragePathId, StoragePath> {
596        &self.cached_data.storage_paths
597    }
598
599    #[must_use]
600    pub fn find_tag_by_name(&self, name: &str) -> Option<&Tag> {
601        self.cached_data.tags.values().find(|tag| tag.name == name)
602    }
603
604    #[inline]
605    #[must_use]
606    pub fn document_types(&self) -> &HashMap<DocumentTypeId, DocumentType> {
607        &self.cached_data.document_types
608    }
609
610    #[must_use]
611    pub fn find_document_type_by_name(&self, name: &str) -> Option<&DocumentType> {
612        self.cached_data
613            .document_types
614            .values()
615            .find(|dt| dt.name == name)
616    }
617
618    #[inline]
619    #[must_use]
620    pub fn correspondents(&self) -> &HashMap<CorrespondentId, Correspondent> {
621        &self.cached_data.correspondents
622    }
623
624    #[inline]
625    #[must_use]
626    pub fn custom_fields(&self) -> &HashMap<CustomFieldId, CustomField> {
627        &self.cached_data.custom_fields
628    }
629
630    #[must_use]
631    pub fn find_custom_field_by_name(&self, name: &str) -> Option<&CustomField> {
632        self.cached_data
633            .custom_fields
634            .values()
635            .find(|field| field.name == name)
636    }
637
638    #[inline]
639    #[must_use]
640    pub fn users(&self) -> &HashMap<UserId, User> {
641        &self.cached_data.users
642    }
643
644    #[inline]
645    #[must_use]
646    pub fn groups(&self) -> &HashMap<GroupId, Group> {
647        &self.cached_data.groups
648    }
649}