docbox_core/processing/office/
mod.rs

1use crate::{
2    files::generated::QueuedUpload,
3    processing::{
4        ProcessingError, ProcessingOutput,
5        office::convert_server::{OfficeConvertServerConfig, is_known_pdf_convertable},
6        pdf::{is_pdf_file, process_pdf},
7    },
8};
9use bytes::Bytes;
10use convert_server::OfficeConverterServer;
11use docbox_database::models::generated_file::GeneratedFileType;
12use mime::Mime;
13use office_convert_client::RequestError;
14use serde::{Deserialize, Serialize};
15use thiserror::Error;
16
17pub mod convert_server;
18
19const DISALLOW_MALFORMED_OFFICE: bool = true;
20
21#[derive(Debug, Error)]
22pub enum PdfConvertError {
23    /// Failed to convert the file to a pdf
24    #[error(transparent)]
25    ConversionFailed(#[from] RequestError),
26
27    #[error("office document is malformed")]
28    MalformedDocument,
29
30    #[error("office document is password protected")]
31    EncryptedDocument,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub enum OfficeConverterConfig {
36    ConverterServer(OfficeConvertServerConfig),
37}
38
39impl OfficeConverterConfig {
40    pub fn from_env() -> anyhow::Result<OfficeConverterConfig> {
41        OfficeConvertServerConfig::from_env().map(OfficeConverterConfig::ConverterServer)
42    }
43}
44
45#[derive(Clone)]
46pub enum OfficeConverter {
47    ConverterServer(OfficeConverterServer),
48}
49
50#[derive(Clone)]
51pub struct OfficeProcessingLayer {
52    pub converter: OfficeConverter,
53}
54
55impl OfficeConverter {
56    pub fn from_config(config: OfficeConverterConfig) -> anyhow::Result<OfficeConverter> {
57        match config {
58            OfficeConverterConfig::ConverterServer(config) => {
59                let converter_server = OfficeConverterServer::from_config(config)?;
60                Ok(OfficeConverter::ConverterServer(converter_server))
61            }
62        }
63    }
64
65    pub async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError> {
66        match self {
67            OfficeConverter::ConverterServer(inner) => inner.convert_to_pdf(bytes).await,
68        }
69    }
70
71    pub fn is_convertable(&self, mime: &Mime) -> bool {
72        match self {
73            OfficeConverter::ConverterServer(inner) => inner.is_convertable(mime),
74        }
75    }
76}
77
78/// Trait for converting some file input bytes into some output bytes
79/// for a converted PDF file
80pub(crate) trait ConvertToPdf {
81    async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError>;
82
83    fn is_convertable(&self, mime: &Mime) -> bool;
84}
85
86/// Checks if the provided mime type either is a PDF
87/// or can be converted to a PDF
88pub fn is_pdf_compatible(mime: &Mime) -> bool {
89    is_pdf_file(mime) || is_known_pdf_convertable(mime)
90}
91
92/// Processes a PDF compatible office/other supported file format. Converts to
93/// PDF then processes as a PDF with [process_pdf]
94pub async fn process_office(
95    layer: &OfficeProcessingLayer,
96    file_bytes: Bytes,
97) -> Result<ProcessingOutput, ProcessingError> {
98    // Convert file to a pdf
99    let file_bytes = match layer.converter.convert_to_pdf(file_bytes).await {
100        Ok(value) => value,
101
102        // Encrypted document
103        Err(PdfConvertError::EncryptedDocument) => {
104            return Ok(ProcessingOutput {
105                encrypted: true,
106                ..Default::default()
107            });
108        }
109
110        // Malformed document
111        Err(PdfConvertError::MalformedDocument) => {
112            if DISALLOW_MALFORMED_OFFICE {
113                return Err(ProcessingError::MalformedFile(
114                    "office file appears to be malformed failed conversion".to_string(),
115                ));
116            }
117
118            return Ok(ProcessingOutput::default());
119        }
120
121        // Other error
122        Err(cause) => {
123            tracing::error!(?cause, "failed to convert document to pdf");
124            return Err(ProcessingError::ConvertFile(cause));
125        }
126    };
127
128    let mut output = process_pdf(&file_bytes).await?;
129
130    // Store the converted pdf file
131    output.upload_queue.push(QueuedUpload::new(
132        mime::APPLICATION_PDF,
133        GeneratedFileType::Pdf,
134        file_bytes,
135    ));
136
137    Ok(output)
138}