Skip to main content

paperless_api/
document.rs

1//! Types for working with Paperless documents.
2//!
3//! Document mutations are applied locally first.
4//! Methods such as [`set_title`](Document::set_title),
5//! [`set_content`](Document::set_content),
6//! [`add_tag`](Document::add_tag), etc..
7//! only update the in-memory [`Document`] value and mark it as changed.
8//! The changes are only sent to the Paperless server when
9//! [`patch`](Document::patch) is called.
10
11use std::{fmt::Display, sync::Arc, time::Duration};
12
13use chrono::{DateTime, NaiveDate, Utc};
14use derive_more::Display;
15use enumflags2::{BitFlags, bitflags};
16use futures_util::TryStreamExt;
17use reqwest::Method;
18use serde::{Deserialize, Serialize};
19use tokio::io::AsyncWriteExt;
20
21use paperless_api_macros::UpdateDto;
22
23use crate::{
24    Error, Result,
25    client::PaperlessClient,
26    id::{
27        CorrespondentId, CustomFieldId, DocumentId, DocumentTypeId, StoragePathId, TagId, UserId,
28    },
29    metadata::{custom_field::DocumentCustomField, permission::ItemPermissions},
30    note::Note,
31    share_link::{CreateShareLink, ShareLink, ShareLinkFileVersion},
32};
33
34/// Represents a document.
35///
36/// Changes made through mutating methods such as
37/// [`set_title`](Document::set_title),
38/// [`set_content`](Document::set_content),
39/// [`add_tag`](Document::add_tag), and
40/// [`set_custom_field`](Document::set_custom_field)
41/// are only tracked locally at first.
42///
43/// They are not sent to the Paperless server until
44/// [`patch`](Document::patch) is called.
45#[derive(Debug, Clone)]
46pub struct Document {
47    data: DocumentData,
48
49    client: Arc<PaperlessClient>,
50    content_is_truncated: bool,
51    changed_values: BitFlags<ChangedAttributes>,
52}
53
54#[derive(Debug, Clone, Deserialize, UpdateDto)]
55#[api_info(id = DocumentId)]
56pub(crate) struct DocumentData {
57    #[dto(skip)]
58    id: DocumentId,
59
60    archive_serial_number: Option<ArchiveSerialNumber>,
61
62    #[dto(skip)]
63    original_file_name: String,
64
65    #[dto(skip)]
66    added: DateTime<Utc>,
67
68    created: Option<NaiveDate>,
69
70    #[dto(skip)]
71    modified: DateTime<Utc>,
72
73    #[dto(skip)]
74    page_count: Option<u32>,
75
76    title: String,
77    content: String,
78    tags: Vec<TagId>,
79    owner: Option<UserId>,
80    correspondent: Option<CorrespondentId>,
81    custom_fields: Vec<DocumentCustomField>,
82    document_type: Option<DocumentTypeId>,
83    storage_path: Option<StoragePathId>,
84
85    #[dto(skip)]
86    notes: Vec<Note>,
87
88    #[serde(flatten)]
89    #[dto(skip)]
90    permissions: ItemPermissions,
91
92    #[dto(skip)]
93    mime_type: Option<String>,
94}
95
96#[derive(Debug, Display, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize)]
97#[repr(transparent)]
98pub struct ArchiveSerialNumber(pub u32);
99
100#[bitflags]
101#[repr(u16)]
102#[derive(Copy, Clone, Debug, PartialEq)]
103enum ChangedAttributes {
104    ArchiveSerialNumber,
105    Title,
106    Content,
107    Tags,
108    CustomFields,
109    Correspondent,
110    DocumentType,
111    Created,
112    Owner,
113    StoragePath,
114
115    Deleted,
116}
117
118/// The content (OCR) of a document, either full or truncated.
119#[derive(Debug, Clone)]
120pub enum Content<'a> {
121    /// Full content of the document.
122    Full(&'a str),
123
124    /// Truncated content of the document.
125    Truncated(&'a str),
126}
127
128impl Document {
129    pub(crate) fn new(
130        data: DocumentData,
131        client: Arc<PaperlessClient>,
132        content_is_truncated: bool,
133    ) -> Self {
134        Self {
135            data,
136            client,
137            content_is_truncated,
138            changed_values: BitFlags::default(),
139        }
140    }
141
142    /// Get the unique identifier of the document.
143    #[inline]
144    #[must_use]
145    pub fn id(&self) -> DocumentId {
146        self.data.id
147    }
148
149    /// Get the archive serial number of the document.
150    #[inline]
151    #[must_use]
152    pub fn archive_serial_number(&self) -> Option<ArchiveSerialNumber> {
153        self.data.archive_serial_number
154    }
155
156    /// Get the timestamp when the document was added.
157    #[inline]
158    #[must_use]
159    pub fn added(&self) -> &DateTime<Utc> {
160        &self.data.added
161    }
162
163    /// Get the created timestamp of the document.
164    #[inline]
165    #[must_use]
166    pub fn created(&self) -> Option<&NaiveDate> {
167        self.data.created.as_ref()
168    }
169
170    /// Get the modified timestamp of the document.
171    #[inline]
172    #[must_use]
173    pub fn modified(&self) -> &DateTime<Utc> {
174        &self.data.modified
175    }
176
177    /// Get the title of the document.
178    #[inline]
179    #[must_use]
180    pub fn title(&self) -> &str {
181        &self.data.title
182    }
183
184    /// Get the original file name of the document.
185    #[inline]
186    #[must_use]
187    pub fn original_file_name(&self) -> &str {
188        &self.data.original_file_name
189    }
190
191    /// Get the MIME type
192    #[inline]
193    #[must_use]
194    pub fn mime_type(&self) -> Option<&str> {
195        self.data.mime_type.as_deref()
196    }
197
198    /// Get the correspondent id of the document.
199    #[inline]
200    #[must_use]
201    pub fn correspondent(&self) -> Option<CorrespondentId> {
202        self.data.correspondent
203    }
204
205    /// Get the owner id of the document.
206    #[inline]
207    #[must_use]
208    pub fn owner(&self) -> Option<UserId> {
209        self.data.owner
210    }
211
212    /// Get the document type id of the document.
213    #[inline]
214    #[must_use]
215    pub fn document_type(&self) -> Option<DocumentTypeId> {
216        self.data.document_type
217    }
218
219    /// Get the number of pages in the document.
220    #[inline]
221    #[must_use]
222    pub fn page_count(&self) -> Option<u32> {
223        self.data.page_count
224    }
225
226    /// Get all tag-ids for the document.
227    #[inline]
228    #[must_use]
229    pub fn tags(&self) -> &[TagId] {
230        &self.data.tags
231    }
232
233    /// Get all custom fields for the document.
234    #[inline]
235    #[must_use]
236    pub fn custom_fields(&self) -> &[DocumentCustomField] {
237        &self.data.custom_fields
238    }
239
240    /// Get the content of the document.
241    #[inline]
242    #[must_use]
243    pub fn content(&self) -> Content<'_> {
244        if self.content_is_truncated {
245            Content::Truncated(&self.data.content)
246        } else {
247            Content::Full(&self.data.content)
248        }
249    }
250
251    /// Get the storage path of the document.
252    #[inline]
253    #[must_use]
254    pub fn storage_path(&self) -> Option<StoragePathId> {
255        self.data.storage_path
256    }
257
258    /// Get the notes for the document.
259    #[inline]
260    #[must_use]
261    pub fn notes(&self) -> &[Note] {
262        &self.data.notes
263    }
264
265    /// Get the permissions for the document.
266    #[inline]
267    #[must_use]
268    pub fn permissions(&self) -> &ItemPermissions {
269        &self.data.permissions
270    }
271
272    /// Set the archive serial number of the document.
273    #[inline]
274    pub fn set_archive_serial_number(
275        &mut self,
276        archive_serial_number: Option<ArchiveSerialNumber>,
277    ) {
278        self.data.archive_serial_number = archive_serial_number;
279        self.changed_values |= ChangedAttributes::ArchiveSerialNumber;
280    }
281
282    /// Add a tag to the document.
283    pub fn add_tag(&mut self, tag_id: TagId) {
284        if !self.data.tags.contains(&tag_id) {
285            self.data.tags.push(tag_id);
286            self.changed_values |= ChangedAttributes::Tags;
287        }
288    }
289
290    /// Remove a tag from the document.
291    pub fn remove_tag(&mut self, tag_id: TagId) {
292        if let Some(index) = self.data.tags.iter().position(|id| *id == tag_id) {
293            self.data.tags.remove(index);
294            self.changed_values |= ChangedAttributes::Tags;
295        }
296    }
297
298    /// Set the title of the document.
299    pub fn set_title(&mut self, title: impl Into<String>) {
300        self.data.title = title.into();
301        self.changed_values |= ChangedAttributes::Title;
302    }
303
304    /// Set the content of the document.
305    pub fn set_content(&mut self, content: impl Into<String>) {
306        self.data.content = content.into();
307        self.content_is_truncated = false;
308        self.changed_values |= ChangedAttributes::Content;
309    }
310
311    /// Set a custom field for the document.
312    pub fn set_custom_field(&mut self, field: CustomFieldId, value: impl Into<String>) {
313        for custom_field in &mut self.data.custom_fields {
314            if custom_field.field == field {
315                custom_field.value = value.into();
316                self.changed_values |= ChangedAttributes::CustomFields;
317                return;
318            }
319        }
320
321        self.data.custom_fields.push(DocumentCustomField {
322            field,
323            value: value.into(),
324        });
325        self.changed_values |= ChangedAttributes::CustomFields;
326    }
327
328    /// Remove a custom field from the document.
329    pub fn remove_custom_field(&mut self, field: CustomFieldId) {
330        if let Some(index) = self
331            .data
332            .custom_fields
333            .iter()
334            .position(|custom_field| custom_field.field == field)
335        {
336            self.data.custom_fields.remove(index);
337            self.changed_values |= ChangedAttributes::CustomFields;
338        }
339    }
340
341    /// Set the created date of the document.
342    pub fn set_created(&mut self, created: NaiveDate) {
343        self.data.created = Some(created);
344        self.changed_values |= ChangedAttributes::Created;
345    }
346
347    /// Set the owner of the document.
348    pub fn set_owner(&mut self, owner: UserId) {
349        self.data.owner = Some(owner);
350        self.changed_values |= ChangedAttributes::Owner;
351    }
352
353    /// Set the correspondent of the document.
354    pub fn set_correspondent(&mut self, correspondent: CorrespondentId) {
355        self.data.correspondent = Some(correspondent);
356        self.changed_values |= ChangedAttributes::Correspondent;
357    }
358
359    /// Set the document type of the document.
360    pub fn set_document_type(&mut self, document_type: DocumentTypeId) {
361        self.data.document_type = Some(document_type);
362        self.changed_values |= ChangedAttributes::DocumentType;
363    }
364
365    /// Set the storage path of the document.
366    pub fn set_storage_path(&mut self, storage_path: StoragePathId) {
367        self.data.storage_path = Some(storage_path);
368        self.changed_values |= ChangedAttributes::StoragePath;
369    }
370
371    /// Returns `true` if the document has unsaved changes.
372    #[inline]
373    #[must_use]
374    pub fn is_dirty(&self) -> bool {
375        !self.changed_values.is_empty() && !self.changed_values.contains(ChangedAttributes::Deleted)
376    }
377
378    /// Returns `true` if the document was deleted.
379    #[inline]
380    #[must_use]
381    pub fn is_deleted(&self) -> bool {
382        self.changed_values.contains(ChangedAttributes::Deleted)
383    }
384
385    fn fail_if_deleted(&self) -> Result<()> {
386        if self.is_deleted() {
387            Err(Error::AlreadyDeleted)
388        } else {
389            Ok(())
390        }
391    }
392
393    /// Refresh the document from the server.
394    ///
395    /// This will discard any local changes and replace them with the server's state.
396    pub async fn refresh(&mut self) -> Result<()> {
397        let document_data = self
398            .client
399            .as_ref()
400            .get_document_data_by_id(self.data.id, Some(!self.content_is_truncated), None)
401            .await?;
402
403        self.data = document_data;
404        self.changed_values = BitFlags::empty();
405        Ok(())
406    }
407
408    /// Get the document thumbnail.
409    ///
410    /// Returns the raw thumbnail image data.
411    pub async fn thumbnail(&self) -> Result<Vec<u8>> {
412        let resp = self
413            .client
414            .request_no_body(
415                Method::GET,
416                &format!("/api/documents/{}/thumb/", self.data.id),
417                None,
418            )
419            .await?;
420
421        Ok(resp
422            .bytes()
423            .await
424            .map_err(|e| Error::Other(format!("Failed to read response body: {e}")))?
425            .to_vec())
426    }
427
428    /// Update the document on the server.
429    ///
430    /// This applies the currently tracked local changes to the remote Paperless document.
431    pub async fn patch(&mut self) -> Result<()> {
432        if !self.is_dirty() {
433            return Ok(());
434        }
435
436        self.fail_if_deleted()?;
437
438        let patch = UpdateDocumentData {
439            title: self
440                .changed_values
441                .contains(ChangedAttributes::Title)
442                .then_some(self.data.title.clone()),
443
444            archive_serial_number: self
445                .changed_values
446                .contains(ChangedAttributes::ArchiveSerialNumber)
447                .then_some(self.data.archive_serial_number),
448
449            content: self
450                .changed_values
451                .contains(ChangedAttributes::Content)
452                .then_some(self.data.content.clone()),
453
454            tags: self
455                .changed_values
456                .contains(ChangedAttributes::Tags)
457                .then_some(self.data.tags.clone()),
458
459            custom_fields: self
460                .changed_values
461                .contains(ChangedAttributes::CustomFields)
462                .then_some(self.data.custom_fields.clone()),
463
464            correspondent: self
465                .changed_values
466                .contains(ChangedAttributes::Correspondent)
467                .then_some(self.data.correspondent),
468
469            document_type: self
470                .changed_values
471                .contains(ChangedAttributes::DocumentType)
472                .then_some(self.data.document_type),
473
474            created: self
475                .changed_values
476                .contains(ChangedAttributes::Created)
477                .then_some(self.data.created),
478
479            owner: self
480                .changed_values
481                .contains(ChangedAttributes::Owner)
482                .then_some(self.data.owner),
483
484            storage_path: self
485                .changed_values
486                .contains(ChangedAttributes::StoragePath)
487                .then_some(self.data.storage_path),
488        };
489
490        self.client
491            .request(
492                Method::PATCH,
493                &format!("/api/documents/{}/", self.data.id),
494                Some(&patch),
495                None,
496            )
497            .await?;
498
499        self.changed_values = BitFlags::empty();
500        Ok(())
501    }
502
503    /// Delete the document
504    pub async fn delete(&mut self) -> Result<()> {
505        self.client
506            .request_no_body(
507                Method::DELETE,
508                &format!("/api/documents/{}/", self.data.id),
509                None,
510            )
511            .await?;
512
513        self.changed_values = BitFlags::from(ChangedAttributes::Deleted);
514        Ok(())
515    }
516
517    /// Get the full content of the document, replacing any truncated content.
518    pub async fn get_full_content(&mut self) -> Result<()> {
519        self.fail_if_deleted()?;
520
521        if !self.content_is_truncated {
522            return Ok(());
523        }
524
525        let doc = self
526            .client
527            .get_document_data_by_id(self.data.id, Some(true), None)
528            .await?;
529        self.data.content = doc.content;
530        self.content_is_truncated = false;
531        Ok(())
532    }
533
534    /// Download the document to a buffer.
535    pub async fn download_to_buffer(&self) -> Result<Vec<u8>> {
536        self.fail_if_deleted()?;
537
538        let resp = self
539            .client
540            .request_no_body(
541                Method::GET,
542                &format!("/api/documents/{}/download/", self.data.id),
543                None,
544            )
545            .await?;
546
547        if resp.status().is_success() {
548            let bytes = resp
549                .bytes()
550                .await
551                .map_err(|e| Error::Other(format!("Failed to read response body: {e}")))?;
552            Ok(bytes.to_vec())
553        } else {
554            Err(Error::Other(format!(
555                "Failed to download document: {}",
556                resp.status()
557            )))
558        }
559    }
560
561    /// Download the document to a file, requires the `tokio-fs` feature.
562    pub async fn download_to_file(&self, path: &std::path::Path) -> Result<()> {
563        self.fail_if_deleted()?;
564
565        let resp = self
566            .client
567            .request_no_body(
568                Method::GET,
569                &format!("/api/documents/{}/download/", self.data.id),
570                None,
571            )
572            .await?;
573
574        if !resp.status().is_success() {
575            return Err(Error::Other(format!(
576                "Failed to download document: {}",
577                resp.status()
578            )));
579        }
580
581        let mut file = tokio::fs::File::create(path)
582            .await
583            .map_err(|e| Error::Other(format!("Failed to create file: {e}")))?;
584
585        resp.bytes_stream()
586            .map_err(|e| Error::Other(format!("Failed to read document chunk: {e}")))
587            .try_fold(&mut file, |file, chunk| async move {
588                file.write_all(&chunk).await.map_err(|e| {
589                    Error::Other(format!("Failed to save document chunk to file: {e}"))
590                })?;
591                Ok(file)
592            })
593            .await?;
594
595        Ok(())
596    }
597
598    /// Generates a share link for the document that expires after the specified duration.
599    pub fn generate_share_link_duration(
600        &self,
601        valid_for: Duration,
602        version: ShareLinkFileVersion,
603    ) -> impl Future<Output = Result<ShareLink>> {
604        let expires = Utc::now() + valid_for;
605        self.generate_share_link_expires(expires, version)
606    }
607
608    /// Generates a share link for the document that expires at the specified time.
609    pub async fn generate_share_link_expires(
610        &self,
611        expires: DateTime<Utc>,
612        version: ShareLinkFileVersion,
613    ) -> Result<ShareLink> {
614        self.fail_if_deleted()?;
615
616        self.client
617            .create(&CreateShareLink {
618                document: self.id(),
619                expiration: expires,
620                file_version: version,
621            })
622            .await
623    }
624}
625
626impl Display for Content<'_> {
627    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
628        match self {
629            Content::Full(text) => write!(f, "{text}"),
630            Content::Truncated(text) => write!(f, "{text}..."),
631        }
632    }
633}
634
635impl AsRef<str> for Content<'_> {
636    fn as_ref(&self) -> &str {
637        match self {
638            Content::Full(text) | Content::Truncated(text) => text,
639        }
640    }
641}