docbox_core/processing/office/
mod.rs1use crate::{
2 files::generated::QueuedUpload,
3 processing::{
4 ProcessingError, ProcessingOutput,
5 office::convert_server::{OfficeConvertServerConfig, is_known_pdf_convertable},
6 pdf::{is_pdf_file, process_pdf},
7 },
8};
9use bytes::Bytes;
10use convert_server::OfficeConverterServer;
11use docbox_database::models::generated_file::GeneratedFileType;
12use mime::Mime;
13use office_convert_client::RequestError;
14use serde::{Deserialize, Serialize};
15use thiserror::Error;
16
17pub mod convert_server;
18
19const DISALLOW_MALFORMED_OFFICE: bool = true;
20
21#[derive(Debug, Error)]
22pub enum PdfConvertError {
23 #[error(transparent)]
25 ConversionFailed(#[from] RequestError),
26
27 #[error("office document is malformed")]
28 MalformedDocument,
29
30 #[error("office document is password protected")]
31 EncryptedDocument,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub enum OfficeConverterConfig {
36 ConverterServer(OfficeConvertServerConfig),
37}
38
39impl OfficeConverterConfig {
40 pub fn from_env() -> anyhow::Result<OfficeConverterConfig> {
41 OfficeConvertServerConfig::from_env().map(OfficeConverterConfig::ConverterServer)
42 }
43}
44
45#[derive(Clone)]
46pub enum OfficeConverter {
47 ConverterServer(OfficeConverterServer),
48}
49
50#[derive(Clone)]
51pub struct OfficeProcessingLayer {
52 pub converter: OfficeConverter,
53}
54
55impl OfficeConverter {
56 pub fn from_config(config: OfficeConverterConfig) -> anyhow::Result<OfficeConverter> {
57 match config {
58 OfficeConverterConfig::ConverterServer(config) => {
59 let converter_server = OfficeConverterServer::from_config(config)?;
60 Ok(OfficeConverter::ConverterServer(converter_server))
61 }
62 }
63 }
64
65 pub async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError> {
66 match self {
67 OfficeConverter::ConverterServer(inner) => inner.convert_to_pdf(bytes).await,
68 }
69 }
70
71 pub fn is_convertable(&self, mime: &Mime) -> bool {
72 match self {
73 OfficeConverter::ConverterServer(inner) => inner.is_convertable(mime),
74 }
75 }
76}
77
78pub(crate) trait ConvertToPdf {
81 async fn convert_to_pdf(&self, bytes: Bytes) -> Result<Bytes, PdfConvertError>;
82
83 fn is_convertable(&self, mime: &Mime) -> bool;
84}
85
86pub fn is_pdf_compatible(mime: &Mime) -> bool {
89 is_pdf_file(mime) || is_known_pdf_convertable(mime)
90}
91
92pub async fn process_office(
95 layer: &OfficeProcessingLayer,
96 file_bytes: Bytes,
97) -> Result<ProcessingOutput, ProcessingError> {
98 let file_bytes = match layer.converter.convert_to_pdf(file_bytes).await {
100 Ok(value) => value,
101
102 Err(PdfConvertError::EncryptedDocument) => {
104 return Ok(ProcessingOutput {
105 encrypted: true,
106 ..Default::default()
107 });
108 }
109
110 Err(PdfConvertError::MalformedDocument) => {
112 if DISALLOW_MALFORMED_OFFICE {
113 return Err(ProcessingError::MalformedFile(
114 "office file appears to be malformed failed conversion".to_string(),
115 ));
116 }
117
118 return Ok(ProcessingOutput::default());
119 }
120
121 Err(cause) => {
123 tracing::error!(?cause, "failed to convert document to pdf");
124 return Err(ProcessingError::ConvertFile(cause));
125 }
126 };
127
128 let mut output = process_pdf(&file_bytes).await?;
129
130 output.upload_queue.push(QueuedUpload::new(
132 mime::APPLICATION_PDF,
133 GeneratedFileType::Pdf,
134 file_bytes,
135 ));
136
137 Ok(output)
138}