docbox_processing/office/
mod.rs1use crate::{
12 ProcessingError, ProcessingOutput, QueuedUpload,
13 office::{
14 convert_lambda::{
15 OfficeConvertLambdaConfig, OfficeConvertLambdaConfigError, OfficeConvertLambdaError,
16 OfficeConverterLambda,
17 },
18 convert_server::{OfficeConvertServerConfig, OfficeConvertServerError},
19 libreoffice::is_known_libreoffice_pdf_convertable,
20 },
21 pdf::{is_pdf_file, process_pdf},
22};
23use aws_config::SdkConfig;
24use bytes::Bytes;
25use convert_server::OfficeConverterServer;
26use docbox_database::models::generated_file::GeneratedFileType;
27use docbox_storage::StorageLayerFactory;
28use mime::Mime;
29use office_convert_client::RequestError;
30use serde::{Deserialize, Serialize};
31use thiserror::Error;
32
33pub mod convert_lambda;
34pub mod convert_server;
35pub mod libreoffice;
36
37const DISALLOW_MALFORMED_OFFICE: bool = true;
38
39#[derive(Debug, Error)]
40pub enum PdfConvertError {
41 #[error(transparent)]
43 ConversionFailed(#[from] RequestError),
44
45 #[error(transparent)]
47 ConversionFailedLambda(#[from] OfficeConvertLambdaError),
48
49 #[error("office document is malformed")]
50 MalformedDocument,
51
52 #[error("office document is password protected")]
53 EncryptedDocument,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
57pub enum OfficeConverterConfig {
58 ConverterServer(OfficeConvertServerConfig),
59 ConverterLambda(OfficeConvertLambdaConfig),
60}
61
62#[derive(Debug, Error)]
63pub enum OfficeConverterConfigError {
64 #[error(transparent)]
65 ConverterLambda(#[from] OfficeConvertLambdaConfigError),
66}
67
68impl OfficeConverterConfig {
69 pub fn from_env() -> Result<OfficeConverterConfig, OfficeConverterConfigError> {
70 let variant =
71 std::env::var("DOCBOX_OFFICE_CONVERTER").unwrap_or_else(|_| "server".to_string());
72
73 match variant.as_str() {
74 "lambda" => {
75 let config = OfficeConvertLambdaConfig::from_env()?;
76 Ok(OfficeConverterConfig::ConverterLambda(config))
77 }
78
79 _ => {
80 let config = OfficeConvertServerConfig::from_env();
81 Ok(OfficeConverterConfig::ConverterServer(config))
82 }
83 }
84 }
85}
86
87#[derive(Clone)]
88pub enum OfficeConverter {
89 ConverterServer(OfficeConverterServer),
90 ConverterLambda(OfficeConverterLambda),
91}
92
93#[derive(Debug, Error)]
94pub enum OfficeConverterError {
95 #[error(transparent)]
96 ConverterServer(#[from] OfficeConvertServerError),
97 #[error(transparent)]
98 ConverterLambda(#[from] OfficeConvertLambdaError),
99}
100
101#[derive(Clone)]
102pub struct OfficeProcessingLayer {
103 pub converter: OfficeConverter,
104}
105
106impl OfficeConverter {
107 pub fn from_config(
112 aws_config: &SdkConfig,
113 storage: &StorageLayerFactory,
114 config: OfficeConverterConfig,
115 ) -> Result<OfficeConverter, OfficeConverterError> {
116 match config {
117 OfficeConverterConfig::ConverterServer(config) => {
118 let converter_server = OfficeConverterServer::from_config(config)?;
119 Ok(OfficeConverter::ConverterServer(converter_server))
120 }
121
122 OfficeConverterConfig::ConverterLambda(config) => {
123 let converter_server =
124 OfficeConverterLambda::from_config(aws_config, storage, config)?;
125 Ok(OfficeConverter::ConverterLambda(converter_server))
126 }
127 }
128 }
129
130 pub async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError> {
131 match self {
132 OfficeConverter::ConverterServer(inner) => inner.convert_to_pdf(bytes).await,
133 OfficeConverter::ConverterLambda(inner) => inner.convert_to_pdf(bytes).await,
134 }
135 }
136
137 pub fn is_convertable(&self, mime: &Mime) -> bool {
138 match self {
139 OfficeConverter::ConverterServer(inner) => inner.is_convertable(mime),
140 OfficeConverter::ConverterLambda(inner) => inner.is_convertable(mime),
141 }
142 }
143}
144
145pub(crate) trait ConvertToPdf {
148 async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError>;
149
150 fn is_convertable(&self, mime: &Mime) -> bool;
151}
152
153pub fn is_pdf_compatible(mime: &Mime) -> bool {
156 is_pdf_file(mime) || is_known_libreoffice_pdf_convertable(mime)
157}
158
159pub async fn process_office(
162 layer: &OfficeProcessingLayer,
163 file_bytes: Bytes,
164) -> Result<ProcessingOutput, ProcessingError> {
165 let file_bytes = match layer.converter.convert_to_pdf(file_bytes).await {
167 Ok(value) => value,
168
169 Err(PdfConvertError::EncryptedDocument) => {
171 return Ok(ProcessingOutput {
172 encrypted: true,
173 ..Default::default()
174 });
175 }
176
177 Err(PdfConvertError::MalformedDocument) => {
179 if DISALLOW_MALFORMED_OFFICE {
180 return Err(ProcessingError::MalformedFile(
181 "office file appears to be malformed failed conversion".to_string(),
182 ));
183 }
184
185 return Ok(ProcessingOutput::default());
186 }
187
188 Err(error) => {
190 tracing::error!(?error, "failed to convert document to pdf");
191 return Err(ProcessingError::ConvertFile(error));
192 }
193 };
194
195 let mut output = process_pdf(&file_bytes).await?;
196
197 output.upload_queue.push(QueuedUpload::new(
199 mime::APPLICATION_PDF,
200 GeneratedFileType::Pdf,
201 file_bytes,
202 ));
203
204 Ok(output)
205}