Skip to main content

paperless_api/
document.rs

1//! Types for working with Paperless documents.
2//!
3//! Document mutations are applied locally first.
4//! Methods such as [`set_title`](Document::set_title),
5//! [`set_content`](Document::set_content),
6//! [`add_tag`](Document::add_tag), etc..
7//! only update the in-memory [`Document`] value and mark it as changed.
8//! The changes are only sent to the Paperless server when
9//! [`patch`](Document::patch) is called.
10
11use std::{fmt::Display, sync::Arc, time::Duration};
12
13use chrono::{DateTime, NaiveDate, Utc};
14use derive_more::Display;
15use enumflags2::{BitFlags, bitflags};
16use futures_util::TryStreamExt;
17use reqwest::Method;
18use serde::{Deserialize, Serialize};
19use tokio::io::AsyncWriteExt;
20
21use paperless_api_macros::UpdateDto;
22
23use crate::{
24    Error, Result,
25    client::PaperlessClient,
26    id::{
27        CorrespondentId, CustomFieldId, DocumentId, DocumentTypeId, StoragePathId, TagId, UserId,
28    },
29    metadata::{custom_field::DocumentCustomField, permission::ItemPermissions},
30    note::Note,
31    share_link::{CreateShareLink, ShareLink, ShareLinkFileVersion},
32};
33
34/// Represents a document.
35///
36/// Changes made through mutating methods such as
37/// [`set_title`](Document::set_title),
38/// [`set_content`](Document::set_content),
39/// [`add_tag`](Document::add_tag), and
40/// [`set_custom_field`](Document::set_custom_field)
41/// are only tracked locally at first.
42///
43/// They are not sent to the Paperless server until
44/// [`patch`](Document::patch) is called.
45#[derive(Debug, Clone)]
46pub struct Document {
47    data: DocumentData,
48
49    client: Arc<PaperlessClient>,
50    content_is_truncated: bool,
51    changed_values: BitFlags<ChangedAttributes>,
52}
53
54#[derive(Debug, Clone, Deserialize, UpdateDto)]
55#[api_info(endpoint = "documents", id = DocumentId)]
56pub(crate) struct DocumentData {
57    #[dto(skip)]
58    id: DocumentId,
59
60    archive_serial_number: Option<ArchiveSerialNumber>,
61
62    #[dto(skip)]
63    original_file_name: String,
64
65    #[dto(skip)]
66    added: DateTime<Utc>,
67
68    created: Option<NaiveDate>,
69
70    #[dto(skip)]
71    modified: DateTime<Utc>,
72
73    #[dto(skip)]
74    page_count: Option<u32>,
75
76    title: String,
77    content: String,
78    tags: Vec<TagId>,
79    owner: Option<UserId>,
80    correspondent: Option<CorrespondentId>,
81    custom_fields: Vec<DocumentCustomField>,
82    document_type: Option<DocumentTypeId>,
83    storage_path: Option<StoragePathId>,
84
85    #[dto(skip)]
86    notes: Vec<Note>,
87
88    #[serde(flatten)]
89    #[dto(skip)]
90    permissions: ItemPermissions,
91
92    #[dto(skip)]
93    mime_type: Option<String>,
94}
95
96#[derive(Debug, Display, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize)]
97#[repr(transparent)]
98pub struct ArchiveSerialNumber(pub u32);
99
100#[bitflags]
101#[repr(u16)]
102#[derive(Copy, Clone, Debug, PartialEq)]
103enum ChangedAttributes {
104    ArchiveSerialNumber,
105    Title,
106    Content,
107    Tags,
108    CustomFields,
109    Correspondent,
110    DocumentType,
111    Created,
112    Owner,
113    StoragePath,
114
115    Deleted,
116}
117
118/// The content (OCR) of a document, either full or truncated.
119#[derive(Debug, Clone)]
120pub enum Content<'a> {
121    /// Full content of the document.
122    Full(&'a str),
123
124    /// Truncated content of the document.
125    Truncated(&'a str),
126}
127
128impl Document {
129    pub(crate) fn new(
130        data: DocumentData,
131        client: Arc<PaperlessClient>,
132        content_is_truncated: bool,
133    ) -> Self {
134        Self {
135            data,
136            client,
137            content_is_truncated,
138            changed_values: BitFlags::default(),
139        }
140    }
141
142    /// Get the unique identifier of the document.
143    #[inline]
144    #[must_use]
145    pub fn id(&self) -> DocumentId {
146        self.data.id
147    }
148
149    /// Get the archive serial number of the document.
150    #[inline]
151    #[must_use]
152    pub fn archive_serial_number(&self) -> Option<ArchiveSerialNumber> {
153        self.data.archive_serial_number
154    }
155
156    /// Get the timestamp when the document was added.
157    #[inline]
158    #[must_use]
159    pub fn added(&self) -> &DateTime<Utc> {
160        &self.data.added
161    }
162
163    /// Get the created timestamp of the document.
164    #[inline]
165    #[must_use]
166    pub fn created(&self) -> Option<&NaiveDate> {
167        self.data.created.as_ref()
168    }
169
170    /// Get the modified timestamp of the document.
171    #[inline]
172    #[must_use]
173    pub fn modified(&self) -> &DateTime<Utc> {
174        &self.data.modified
175    }
176
177    /// Get the title of the document.
178    #[inline]
179    #[must_use]
180    pub fn title(&self) -> &str {
181        &self.data.title
182    }
183
184    /// Get the original file name of the document.
185    #[inline]
186    #[must_use]
187    pub fn original_file_name(&self) -> &str {
188        &self.data.original_file_name
189    }
190
191    /// Get the MIME type
192    #[inline]
193    #[must_use]
194    pub fn mime_type(&self) -> Option<&str> {
195        self.data.mime_type.as_deref()
196    }
197
198    /// Get the correspondent id of the document.
199    #[inline]
200    #[must_use]
201    pub fn correspondent(&self) -> Option<CorrespondentId> {
202        self.data.correspondent
203    }
204
205    /// Get the owner id of the document.
206    #[inline]
207    #[must_use]
208    pub fn owner(&self) -> Option<UserId> {
209        self.data.owner
210    }
211
212    /// Get the document type id of the document.
213    #[inline]
214    #[must_use]
215    pub fn document_type(&self) -> Option<DocumentTypeId> {
216        self.data.document_type
217    }
218
219    /// Get the number of pages in the document.
220    #[inline]
221    #[must_use]
222    pub fn page_count(&self) -> Option<u32> {
223        self.data.page_count
224    }
225
226    /// Get all tag-ids for the document.
227    #[inline]
228    #[must_use]
229    pub fn tags(&self) -> &[TagId] {
230        &self.data.tags
231    }
232
233    /// Get all custom fields for the document.
234    #[inline]
235    #[must_use]
236    pub fn custom_fields(&self) -> &[DocumentCustomField] {
237        &self.data.custom_fields
238    }
239
240    /// Get the content of the document.
241    #[inline]
242    #[must_use]
243    pub fn content(&self) -> Content<'_> {
244        if self.content_is_truncated {
245            Content::Truncated(&self.data.content)
246        } else {
247            Content::Full(&self.data.content)
248        }
249    }
250
251    /// Get the storage path of the document.
252    #[inline]
253    #[must_use]
254    pub fn storage_path(&self) -> Option<StoragePathId> {
255        self.data.storage_path
256    }
257
258    /// Get the notes for the document.
259    #[inline]
260    #[must_use]
261    pub fn notes(&self) -> &[Note] {
262        &self.data.notes
263    }
264
265    /// Get the permissions for the document.
266    #[inline]
267    #[must_use]
268    pub fn permissions(&self) -> &ItemPermissions {
269        &self.data.permissions
270    }
271
272    /// Set the archive serial number of the document.
273    #[inline]
274    pub fn set_archive_serial_number(
275        &mut self,
276        archive_serial_number: Option<ArchiveSerialNumber>,
277    ) {
278        self.data.archive_serial_number = archive_serial_number;
279        self.changed_values |= ChangedAttributes::ArchiveSerialNumber;
280    }
281
282    /// Add a tag to the document.
283    pub fn add_tag(&mut self, tag_id: TagId) {
284        if !self.data.tags.contains(&tag_id) {
285            self.data.tags.push(tag_id);
286            self.changed_values |= ChangedAttributes::Tags;
287        }
288    }
289
290    /// Remove a tag from the document.
291    pub fn remove_tag(&mut self, tag_id: TagId) {
292        if let Some(index) = self.data.tags.iter().position(|id| *id == tag_id) {
293            self.data.tags.remove(index);
294            self.changed_values |= ChangedAttributes::Tags;
295        }
296    }
297
298    /// Set the title of the document.
299    pub fn set_title(&mut self, title: impl Into<String>) {
300        self.data.title = title.into();
301        self.changed_values |= ChangedAttributes::Title;
302    }
303
304    /// Set the content of the document.
305    pub fn set_content(&mut self, content: impl Into<String>) {
306        self.data.content = content.into();
307        self.content_is_truncated = false;
308        self.changed_values |= ChangedAttributes::Content;
309    }
310
311    /// Set a custom field for the document.
312    pub fn set_custom_field(&mut self, field: CustomFieldId, value: impl Into<String>) {
313        for custom_field in &mut self.data.custom_fields {
314            if custom_field.field == field {
315                custom_field.value = value.into();
316                self.changed_values |= ChangedAttributes::CustomFields;
317                return;
318            }
319        }
320
321        self.data.custom_fields.push(DocumentCustomField {
322            field,
323            value: value.into(),
324        });
325        self.changed_values |= ChangedAttributes::CustomFields;
326    }
327
328    /// Remove a custom field from the document.
329    pub fn remove_custom_field(&mut self, field: CustomFieldId) {
330        if let Some(index) = self
331            .data
332            .custom_fields
333            .iter()
334            .position(|custom_field| custom_field.field == field)
335        {
336            self.data.custom_fields.remove(index);
337            self.changed_values |= ChangedAttributes::CustomFields;
338        }
339    }
340
341    /// Set the created date of the document.
342    pub fn set_created(&mut self, created: NaiveDate) {
343        self.data.created = Some(created);
344        self.changed_values |= ChangedAttributes::Created;
345    }
346
347    /// Set the owner of the document.
348    pub fn set_owner(&mut self, owner: UserId) {
349        self.data.owner = Some(owner);
350        self.changed_values |= ChangedAttributes::Owner;
351    }
352
353    /// Set the correspondent of the document.
354    pub fn set_correspondent(&mut self, correspondent: CorrespondentId) {
355        self.data.correspondent = Some(correspondent);
356        self.changed_values |= ChangedAttributes::Correspondent;
357    }
358
359    /// Set the document type of the document.
360    pub fn set_document_type(&mut self, document_type: DocumentTypeId) {
361        self.data.document_type = Some(document_type);
362        self.changed_values |= ChangedAttributes::DocumentType;
363    }
364
365    /// Set the storage path of the document.
366    pub fn set_storage_path(&mut self, storage_path: StoragePathId) {
367        self.data.storage_path = Some(storage_path);
368        self.changed_values |= ChangedAttributes::StoragePath;
369    }
370
371    /// Returns `true` if the document has unsaved changes.
372    #[inline]
373    #[must_use]
374    pub fn is_dirty(&self) -> bool {
375        !self.changed_values.is_empty() && !self.changed_values.contains(ChangedAttributes::Deleted)
376    }
377
378    /// Returns `true` if the document was deleted.
379    #[inline]
380    #[must_use]
381    pub fn is_deleted(&self) -> bool {
382        self.changed_values.contains(ChangedAttributes::Deleted)
383    }
384
385    fn fail_if_deleted(&self) -> Result<()> {
386        if self.is_deleted() {
387            Err(Error::AlreadyDeleted)
388        } else {
389            Ok(())
390        }
391    }
392
393    /// Refresh the document from the server.
394    ///
395    /// This will discard any local changes and replace them with the server's state.
396    pub async fn refresh(&mut self) -> Result<()> {
397        let document_data = self
398            .client
399            .as_ref()
400            .get_document_data_by_id(self.data.id)
401            .await?;
402
403        self.data = document_data;
404
405        self.changed_values = BitFlags::empty();
406        self.content_is_truncated = false;
407        Ok(())
408    }
409
410    /// Get the document thumbnail.
411    ///
412    /// Returns the raw thumbnail image data.
413    pub async fn thumbnail(&self) -> Result<Vec<u8>> {
414        let resp = self
415            .client
416            .request(
417                Method::GET,
418                &format!("/api/documents/{}/thumb/", self.data.id),
419                None,
420                None,
421            )
422            .await?;
423
424        Ok(resp
425            .bytes()
426            .await
427            .map_err(|e| Error::Other(format!("Failed to read response body: {e}")))?
428            .to_vec())
429    }
430
431    /// Update the document on the server.
432    ///
433    /// This applies the currently tracked local changes to the remote Paperless document.
434    pub async fn patch(&mut self) -> Result<()> {
435        if !self.is_dirty() {
436            return Ok(());
437        }
438
439        self.fail_if_deleted()?;
440
441        let patch = UpdateDocumentData {
442            title: self
443                .changed_values
444                .contains(ChangedAttributes::Title)
445                .then_some(self.data.title.clone()),
446
447            archive_serial_number: self
448                .changed_values
449                .contains(ChangedAttributes::ArchiveSerialNumber)
450                .then_some(self.data.archive_serial_number),
451
452            content: self
453                .changed_values
454                .contains(ChangedAttributes::Content)
455                .then_some(self.data.content.clone()),
456
457            tags: self
458                .changed_values
459                .contains(ChangedAttributes::Tags)
460                .then_some(self.data.tags.clone()),
461
462            custom_fields: self
463                .changed_values
464                .contains(ChangedAttributes::CustomFields)
465                .then_some(self.data.custom_fields.clone()),
466
467            correspondent: self
468                .changed_values
469                .contains(ChangedAttributes::Correspondent)
470                .then_some(self.data.correspondent),
471
472            document_type: self
473                .changed_values
474                .contains(ChangedAttributes::DocumentType)
475                .then_some(self.data.document_type),
476
477            created: self
478                .changed_values
479                .contains(ChangedAttributes::Created)
480                .then_some(self.data.created),
481
482            owner: self
483                .changed_values
484                .contains(ChangedAttributes::Owner)
485                .then_some(self.data.owner),
486
487            storage_path: self
488                .changed_values
489                .contains(ChangedAttributes::StoragePath)
490                .then_some(self.data.storage_path),
491        };
492
493        self.client
494            .request(
495                Method::PATCH,
496                &format!("/api/documents/{}/", self.data.id),
497                Some(&serde_json::to_value(&patch).map_err(|e| Error::Other(e.to_string()))?),
498                None,
499            )
500            .await?;
501
502        self.changed_values = BitFlags::empty();
503        Ok(())
504    }
505
506    /// Delete the document
507    pub async fn delete(&mut self) -> Result<()> {
508        self.client
509            .request(
510                Method::DELETE,
511                &format!("/api/documents/{}/", self.data.id),
512                None,
513                None,
514            )
515            .await?;
516
517        self.changed_values = BitFlags::from(ChangedAttributes::Deleted);
518        Ok(())
519    }
520
521    /// Get the full content of the document, replacing any truncated content.
522    pub async fn get_full_content(&mut self) -> Result<()> {
523        self.fail_if_deleted()?;
524
525        if !self.content_is_truncated {
526            return Ok(());
527        }
528
529        let doc = self.client.get_document_data_by_id(self.data.id).await?;
530        self.data.content = doc.content;
531        self.content_is_truncated = false;
532        Ok(())
533    }
534
535    /// Download the document to a buffer.
536    pub async fn download_to_buffer(&self) -> Result<Vec<u8>> {
537        self.fail_if_deleted()?;
538
539        let resp = self
540            .client
541            .request(
542                Method::GET,
543                &format!("/api/documents/{}/download/", self.data.id),
544                None,
545                None,
546            )
547            .await?;
548
549        if resp.status().is_success() {
550            let bytes = resp
551                .bytes()
552                .await
553                .map_err(|e| Error::Other(format!("Failed to read response body: {e}")))?;
554            Ok(bytes.to_vec())
555        } else {
556            Err(Error::Other(format!(
557                "Failed to download document: {}",
558                resp.status()
559            )))
560        }
561    }
562
563    /// Download the document to a file, requires the `tokio-fs` feature.
564    pub async fn download_to_file(&self, path: &std::path::Path) -> Result<()> {
565        self.fail_if_deleted()?;
566
567        let resp = self
568            .client
569            .request(
570                Method::GET,
571                &format!("/api/documents/{}/download/", self.data.id),
572                None,
573                None,
574            )
575            .await?;
576
577        if !resp.status().is_success() {
578            return Err(Error::Other(format!(
579                "Failed to download document: {}",
580                resp.status()
581            )));
582        }
583
584        let mut file = tokio::fs::File::create(path)
585            .await
586            .map_err(|e| Error::Other(format!("Failed to create file: {e}")))?;
587
588        resp.bytes_stream()
589            .map_err(|e| Error::Other(format!("Failed to read document chunk: {e}")))
590            .try_fold(&mut file, |file, chunk| async move {
591                file.write_all(&chunk).await.map_err(|e| {
592                    Error::Other(format!("Failed to save document chunk to file: {e}"))
593                })?;
594                Ok(file)
595            })
596            .await?;
597
598        Ok(())
599    }
600
601    /// Generates a share link for the document that expires after the specified duration.
602    pub fn generate_share_link_duration(
603        &self,
604        valid_for: Duration,
605        version: ShareLinkFileVersion,
606    ) -> impl Future<Output = Result<ShareLink>> {
607        let expires = Utc::now() + valid_for;
608        self.generate_share_link_expires(expires, version)
609    }
610
611    /// Generates a share link for the document that expires at the specified time.
612    pub async fn generate_share_link_expires(
613        &self,
614        expires: DateTime<Utc>,
615        version: ShareLinkFileVersion,
616    ) -> Result<ShareLink> {
617        self.fail_if_deleted()?;
618
619        let mut share_link = self
620            .client
621            .request_json::<ShareLink>(
622                Method::POST,
623                "/api/share_links/",
624                Some(
625                    &serde_json::to_value(&CreateShareLink {
626                        document: self.id(),
627                        expiration: expires,
628                        file_version: version,
629                    })
630                    .map_err(|e| Error::Other(e.to_string()))?,
631                ),
632                None,
633            )
634            .await?;
635
636        share_link.base_url = self.client.base_url.clone();
637        Ok(share_link)
638    }
639}
640
641impl Display for Content<'_> {
642    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
643        match self {
644            Content::Full(text) => write!(f, "{text}"),
645            Content::Truncated(text) => write!(f, "{text}..."),
646        }
647    }
648}
649
650impl AsRef<str> for Content<'_> {
651    fn as_ref(&self) -> &str {
652        match self {
653            Content::Full(text) | Content::Truncated(text) => text,
654        }
655    }
656}