Skip to main content

docbox_processing/office/
mod.rs

1//! # Office Conversion
2//!
3//! Various backends for converting files from office formats to PDF formats
4//!
5//! ## Environment Variables
6//!
7//! * `DOCBOX_OFFICE_CONVERTER` - "server" to use a [convert_server] or "lambda" to use "convert_lambda"
8//!
9//! See individual modules for service specific environment variables
10
11use crate::{
12    ProcessingError, ProcessingOutput, QueuedUpload,
13    office::{
14        convert_lambda::{
15            OfficeConvertLambdaConfig, OfficeConvertLambdaConfigError, OfficeConvertLambdaError,
16            OfficeConverterLambda,
17        },
18        convert_server::{OfficeConvertServerConfig, OfficeConvertServerError},
19        libreoffice::is_known_libreoffice_pdf_convertable,
20    },
21    pdf::{is_pdf_file, process_pdf},
22};
23use aws_config::SdkConfig;
24use bytes::Bytes;
25use convert_server::OfficeConverterServer;
26use docbox_database::models::generated_file::GeneratedFileType;
27use docbox_storage::StorageLayerFactory;
28use mime::Mime;
29use office_convert_client::RequestError;
30use serde::{Deserialize, Serialize};
31use thiserror::Error;
32
33pub mod convert_lambda;
34pub mod convert_server;
35pub mod libreoffice;
36
37const DISALLOW_MALFORMED_OFFICE: bool = true;
38
39#[derive(Debug, Error)]
40pub enum PdfConvertError {
41    /// Failed to convert the file to a pdf
42    #[error(transparent)]
43    ConversionFailed(#[from] RequestError),
44
45    /// Failed to convert the file to a pdf
46    #[error(transparent)]
47    ConversionFailedLambda(#[from] OfficeConvertLambdaError),
48
49    #[error("office document is malformed")]
50    MalformedDocument,
51
52    #[error("office document is password protected")]
53    EncryptedDocument,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub enum OfficeConverterConfig {
58    ConverterServer(OfficeConvertServerConfig),
59    ConverterLambda(OfficeConvertLambdaConfig),
60}
61
62#[derive(Debug, Error)]
63pub enum OfficeConverterConfigError {
64    #[error(transparent)]
65    ConverterLambda(#[from] OfficeConvertLambdaConfigError),
66}
67
68impl OfficeConverterConfig {
69    pub fn from_env() -> Result<OfficeConverterConfig, OfficeConverterConfigError> {
70        let variant =
71            std::env::var("DOCBOX_OFFICE_CONVERTER").unwrap_or_else(|_| "server".to_string());
72
73        match variant.as_str() {
74            "lambda" => {
75                let config = OfficeConvertLambdaConfig::from_env()?;
76                Ok(OfficeConverterConfig::ConverterLambda(config))
77            }
78
79            _ => {
80                let config = OfficeConvertServerConfig::from_env();
81                Ok(OfficeConverterConfig::ConverterServer(config))
82            }
83        }
84    }
85}
86
87#[derive(Clone)]
88pub enum OfficeConverter {
89    ConverterServer(OfficeConverterServer),
90    ConverterLambda(OfficeConverterLambda),
91}
92
93#[derive(Debug, Error)]
94pub enum OfficeConverterError {
95    #[error(transparent)]
96    ConverterServer(#[from] OfficeConvertServerError),
97    #[error(transparent)]
98    ConverterLambda(#[from] OfficeConvertLambdaError),
99}
100
101#[derive(Clone)]
102pub struct OfficeProcessingLayer {
103    pub converter: OfficeConverter,
104}
105
106impl OfficeConverter {
107    /// Create a [OfficeConverter] from the provided `config`
108    ///
109    /// `storage` is required when using the lambda option as a temporary
110    /// bucket is required for the lambda to perform processing
111    pub fn from_config(
112        aws_config: &SdkConfig,
113        storage: &StorageLayerFactory,
114        config: OfficeConverterConfig,
115    ) -> Result<OfficeConverter, OfficeConverterError> {
116        match config {
117            OfficeConverterConfig::ConverterServer(config) => {
118                let converter_server = OfficeConverterServer::from_config(config)?;
119                Ok(OfficeConverter::ConverterServer(converter_server))
120            }
121
122            OfficeConverterConfig::ConverterLambda(config) => {
123                let converter_server =
124                    OfficeConverterLambda::from_config(aws_config, storage, config)?;
125                Ok(OfficeConverter::ConverterLambda(converter_server))
126            }
127        }
128    }
129
130    pub async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError> {
131        match self {
132            OfficeConverter::ConverterServer(inner) => inner.convert_to_pdf(bytes).await,
133            OfficeConverter::ConverterLambda(inner) => inner.convert_to_pdf(bytes).await,
134        }
135    }
136
137    pub fn is_convertable(&self, mime: &Mime) -> bool {
138        match self {
139            OfficeConverter::ConverterServer(inner) => inner.is_convertable(mime),
140            OfficeConverter::ConverterLambda(inner) => inner.is_convertable(mime),
141        }
142    }
143}
144
145/// Trait for converting some file input bytes into some output bytes
146/// for a converted PDF file
147pub(crate) trait ConvertToPdf {
148    async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError>;
149
150    fn is_convertable(&self, mime: &Mime) -> bool;
151}
152
153/// Checks if the provided mime type either is a PDF
154/// or can be converted to a PDF
155pub fn is_pdf_compatible(mime: &Mime) -> bool {
156    is_pdf_file(mime) || is_known_libreoffice_pdf_convertable(mime)
157}
158
159/// Processes a PDF compatible office/other supported file format. Converts to
160/// PDF then processes as a PDF with [process_pdf]
161pub async fn process_office(
162    layer: &OfficeProcessingLayer,
163    file_bytes: Bytes,
164) -> Result<ProcessingOutput, ProcessingError> {
165    // Convert file to a pdf
166    let file_bytes = match layer.converter.convert_to_pdf(file_bytes).await {
167        Ok(value) => value,
168
169        // Encrypted document
170        Err(PdfConvertError::EncryptedDocument) => {
171            return Ok(ProcessingOutput {
172                encrypted: true,
173                ..Default::default()
174            });
175        }
176
177        // Malformed document
178        Err(PdfConvertError::MalformedDocument) => {
179            if DISALLOW_MALFORMED_OFFICE {
180                return Err(ProcessingError::MalformedFile(
181                    "office file appears to be malformed failed conversion".to_string(),
182                ));
183            }
184
185            return Ok(ProcessingOutput::default());
186        }
187
188        // Other error
189        Err(error) => {
190            tracing::error!(?error, "failed to convert document to pdf");
191            return Err(ProcessingError::ConvertFile(error));
192        }
193    };
194
195    let mut output = process_pdf(&file_bytes).await?;
196
197    // Store the converted pdf file
198    output.upload_queue.push(QueuedUpload::new(
199        mime::APPLICATION_PDF,
200        GeneratedFileType::Pdf,
201        file_bytes,
202    ));
203
204    Ok(output)
205}