docbox_core/processing/office/
mod.rs

1use crate::{
2    files::generated::QueuedUpload,
3    processing::{
4        ProcessingError, ProcessingOutput,
5        office::convert_server::is_known_pdf_convertable,
6        pdf::{is_pdf_file, process_pdf},
7    },
8};
9use bytes::Bytes;
10use convert_server::OfficeConverterServer;
11use docbox_database::models::generated_file::GeneratedFileType;
12use mime::Mime;
13use office_convert_client::RequestError;
14use thiserror::Error;
15
16pub mod convert_server;
17
18const DISALLOW_MALFORMED_OFFICE: bool = true;
19
20#[derive(Debug, Error)]
21pub enum PdfConvertError {
22    /// Failed to convert the file to a pdf
23    #[error(transparent)]
24    ConversionFailed(#[from] RequestError),
25
26    #[error("office document is malformed")]
27    MalformedDocument,
28
29    #[error("office document is password protected")]
30    EncryptedDocument,
31}
32
33#[derive(Clone)]
34pub enum OfficeConverter {
35    ConverterServer(OfficeConverterServer),
36}
37
38#[derive(Clone)]
39pub struct OfficeProcessingLayer {
40    pub converter: OfficeConverter,
41}
42
43impl OfficeConverter {
44    pub async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError> {
45        match self {
46            OfficeConverter::ConverterServer(inner) => inner.convert_to_pdf(bytes).await,
47        }
48    }
49
50    pub fn is_convertable(&self, mime: &Mime) -> bool {
51        match self {
52            OfficeConverter::ConverterServer(inner) => inner.is_convertable(mime),
53        }
54    }
55}
56
57/// Trait for converting some file input bytes into some output bytes
58/// for a converted PDF file
59pub(crate) trait ConvertToPdf {
60    async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError>;
61
62    fn is_convertable(&self, mime: &Mime) -> bool;
63}
64
65/// Checks if the provided mime type either is a PDF
66/// or can be converted to a PDF
67pub fn is_pdf_compatible(mime: &Mime) -> bool {
68    is_pdf_file(mime) || is_known_pdf_convertable(mime)
69}
70
71/// Processes a PDF compatible office/other supported file format. Converts to
72/// PDF then processes as a PDF with [process_pdf]
73pub async fn process_office(
74    layer: &OfficeProcessingLayer,
75    file_bytes: Bytes,
76) -> Result<ProcessingOutput, ProcessingError> {
77    // Convert file to a pdf
78    let file_bytes = match layer.converter.convert_to_pdf(file_bytes).await {
79        Ok(value) => value,
80
81        // Encrypted document
82        Err(PdfConvertError::EncryptedDocument) => {
83            return Ok(ProcessingOutput {
84                encrypted: true,
85                ..Default::default()
86            });
87        }
88
89        // Malformed document
90        Err(PdfConvertError::MalformedDocument) => {
91            if DISALLOW_MALFORMED_OFFICE {
92                return Err(ProcessingError::MalformedFile(
93                    "office file appears to be malformed failed conversion".to_string(),
94                ));
95            }
96
97            return Ok(ProcessingOutput::default());
98        }
99
100        // Other error
101        Err(cause) => {
102            tracing::error!(?cause, "failed to convert document to pdf");
103            return Err(ProcessingError::ConvertFile(cause));
104        }
105    };
106
107    let mut output = process_pdf(&file_bytes).await?;
108
109    // Store the converted pdf file
110    output.upload_queue.push(QueuedUpload::new(
111        mime::APPLICATION_PDF,
112        GeneratedFileType::Pdf,
113        file_bytes,
114    ));
115
116    Ok(output)
117}