docbox_processing/office/
mod.rs1use crate::{
2 ProcessingError, ProcessingOutput, QueuedUpload,
3 office::convert_server::{
4 OfficeConvertServerConfig, OfficeConvertServerError, is_known_pdf_convertable,
5 },
6 pdf::{is_pdf_file, process_pdf},
7};
8use bytes::Bytes;
9use convert_server::OfficeConverterServer;
10use docbox_database::models::generated_file::GeneratedFileType;
11use mime::Mime;
12use office_convert_client::RequestError;
13use serde::{Deserialize, Serialize};
14use thiserror::Error;
15
16pub mod convert_server;
17
18const DISALLOW_MALFORMED_OFFICE: bool = true;
19
20#[derive(Debug, Error)]
21pub enum PdfConvertError {
22 #[error(transparent)]
24 ConversionFailed(#[from] RequestError),
25
26 #[error("office document is malformed")]
27 MalformedDocument,
28
29 #[error("office document is password protected")]
30 EncryptedDocument,
31}
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub enum OfficeConverterConfig {
35 ConverterServer(OfficeConvertServerConfig),
36}
37
38impl OfficeConverterConfig {
39 pub fn from_env() -> OfficeConverterConfig {
40 let config = OfficeConvertServerConfig::from_env();
41 OfficeConverterConfig::ConverterServer(config)
42 }
43}
44
45#[derive(Clone)]
46pub enum OfficeConverter {
47 ConverterServer(OfficeConverterServer),
48}
49
50#[derive(Debug, Error)]
51pub enum OfficeConverterError {
52 #[error(transparent)]
53 ConverterServer(#[from] OfficeConvertServerError),
54}
55
56#[derive(Clone)]
57pub struct OfficeProcessingLayer {
58 pub converter: OfficeConverter,
59}
60
61impl OfficeConverter {
62 pub fn from_config(
63 config: OfficeConverterConfig,
64 ) -> Result<OfficeConverter, OfficeConverterError> {
65 match config {
66 OfficeConverterConfig::ConverterServer(config) => {
67 let converter_server = OfficeConverterServer::from_config(config)?;
68 Ok(OfficeConverter::ConverterServer(converter_server))
69 }
70 }
71 }
72
73 pub async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError> {
74 match self {
75 OfficeConverter::ConverterServer(inner) => inner.convert_to_pdf(bytes).await,
76 }
77 }
78
79 pub fn is_convertable(&self, mime: &Mime) -> bool {
80 match self {
81 OfficeConverter::ConverterServer(inner) => inner.is_convertable(mime),
82 }
83 }
84}
85
86pub(crate) trait ConvertToPdf {
89 async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError>;
90
91 fn is_convertable(&self, mime: &Mime) -> bool;
92}
93
94pub fn is_pdf_compatible(mime: &Mime) -> bool {
97 is_pdf_file(mime) || is_known_pdf_convertable(mime)
98}
99
100pub async fn process_office(
103 layer: &OfficeProcessingLayer,
104 file_bytes: Bytes,
105) -> Result<ProcessingOutput, ProcessingError> {
106 let file_bytes = match layer.converter.convert_to_pdf(file_bytes).await {
108 Ok(value) => value,
109
110 Err(PdfConvertError::EncryptedDocument) => {
112 return Ok(ProcessingOutput {
113 encrypted: true,
114 ..Default::default()
115 });
116 }
117
118 Err(PdfConvertError::MalformedDocument) => {
120 if DISALLOW_MALFORMED_OFFICE {
121 return Err(ProcessingError::MalformedFile(
122 "office file appears to be malformed failed conversion".to_string(),
123 ));
124 }
125
126 return Ok(ProcessingOutput::default());
127 }
128
129 Err(cause) => {
131 tracing::error!(?cause, "failed to convert document to pdf");
132 return Err(ProcessingError::ConvertFile(cause));
133 }
134 };
135
136 let mut output = process_pdf(&file_bytes).await?;
137
138 output.upload_queue.push(QueuedUpload::new(
140 mime::APPLICATION_PDF,
141 GeneratedFileType::Pdf,
142 file_bytes,
143 ));
144
145 Ok(output)
146}