docbox_processing/office/
mod.rs

1use crate::{
2    ProcessingError, ProcessingOutput, QueuedUpload,
3    office::convert_server::{
4        OfficeConvertServerConfig, OfficeConvertServerError, is_known_pdf_convertable,
5    },
6    pdf::{is_pdf_file, process_pdf},
7};
8use bytes::Bytes;
9use convert_server::OfficeConverterServer;
10use docbox_database::models::generated_file::GeneratedFileType;
11use mime::Mime;
12use office_convert_client::RequestError;
13use serde::{Deserialize, Serialize};
14use thiserror::Error;
15
16pub mod convert_server;
17
18const DISALLOW_MALFORMED_OFFICE: bool = true;
19
20#[derive(Debug, Error)]
21pub enum PdfConvertError {
22    /// Failed to convert the file to a pdf
23    #[error(transparent)]
24    ConversionFailed(#[from] RequestError),
25
26    #[error("office document is malformed")]
27    MalformedDocument,
28
29    #[error("office document is password protected")]
30    EncryptedDocument,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub enum OfficeConverterConfig {
35    ConverterServer(OfficeConvertServerConfig),
36}
37
38impl OfficeConverterConfig {
39    pub fn from_env() -> OfficeConverterConfig {
40        let config = OfficeConvertServerConfig::from_env();
41        OfficeConverterConfig::ConverterServer(config)
42    }
43}
44
45#[derive(Clone)]
46pub enum OfficeConverter {
47    ConverterServer(OfficeConverterServer),
48}
49
50#[derive(Debug, Error)]
51pub enum OfficeConverterError {
52    #[error(transparent)]
53    ConverterServer(#[from] OfficeConvertServerError),
54}
55
56#[derive(Clone)]
57pub struct OfficeProcessingLayer {
58    pub converter: OfficeConverter,
59}
60
61impl OfficeConverter {
62    pub fn from_config(
63        config: OfficeConverterConfig,
64    ) -> Result<OfficeConverter, OfficeConverterError> {
65        match config {
66            OfficeConverterConfig::ConverterServer(config) => {
67                let converter_server = OfficeConverterServer::from_config(config)?;
68                Ok(OfficeConverter::ConverterServer(converter_server))
69            }
70        }
71    }
72
73    pub async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError> {
74        match self {
75            OfficeConverter::ConverterServer(inner) => inner.convert_to_pdf(bytes).await,
76        }
77    }
78
79    pub fn is_convertable(&self, mime: &Mime) -> bool {
80        match self {
81            OfficeConverter::ConverterServer(inner) => inner.is_convertable(mime),
82        }
83    }
84}
85
86/// Trait for converting some file input bytes into some output bytes
87/// for a converted PDF file
88pub(crate) trait ConvertToPdf {
89    async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError>;
90
91    fn is_convertable(&self, mime: &Mime) -> bool;
92}
93
94/// Checks if the provided mime type either is a PDF
95/// or can be converted to a PDF
96pub fn is_pdf_compatible(mime: &Mime) -> bool {
97    is_pdf_file(mime) || is_known_pdf_convertable(mime)
98}
99
100/// Processes a PDF compatible office/other supported file format. Converts to
101/// PDF then processes as a PDF with [process_pdf]
102pub async fn process_office(
103    layer: &OfficeProcessingLayer,
104    file_bytes: Bytes,
105) -> Result<ProcessingOutput, ProcessingError> {
106    // Convert file to a pdf
107    let file_bytes = match layer.converter.convert_to_pdf(file_bytes).await {
108        Ok(value) => value,
109
110        // Encrypted document
111        Err(PdfConvertError::EncryptedDocument) => {
112            return Ok(ProcessingOutput {
113                encrypted: true,
114                ..Default::default()
115            });
116        }
117
118        // Malformed document
119        Err(PdfConvertError::MalformedDocument) => {
120            if DISALLOW_MALFORMED_OFFICE {
121                return Err(ProcessingError::MalformedFile(
122                    "office file appears to be malformed failed conversion".to_string(),
123                ));
124            }
125
126            return Ok(ProcessingOutput::default());
127        }
128
129        // Other error
130        Err(cause) => {
131            tracing::error!(?cause, "failed to convert document to pdf");
132            return Err(ProcessingError::ConvertFile(cause));
133        }
134    };
135
136    let mut output = process_pdf(&file_bytes).await?;
137
138    // Store the converted pdf file
139    output.upload_queue.push(QueuedUpload::new(
140        mime::APPLICATION_PDF,
141        GeneratedFileType::Pdf,
142        file_bytes,
143    ));
144
145    Ok(output)
146}