1#![forbid(unsafe_code)]
2
3use std::num::ParseIntError;
4
5use crate::{
6 email::{EmailProcessingError, is_mail_mime, process_email},
7 image::process_image_async,
8 office::{PdfConvertError, process_office},
9 pdf::{GeneratePdfImagesError, process_pdf},
10};
11use ::image::{ImageError, ImageFormat};
12use bytes::Bytes;
13use docbox_database::models::{file::FileId, generated_file::GeneratedFileType};
14use docbox_search::models::DocumentPage;
15use mime::Mime;
16use office::OfficeProcessingLayer;
17use pdf::is_pdf_file;
18use pdf_process::{PdfInfoError, PdfTextError};
19use serde::{Deserialize, Serialize};
20use thiserror::Error;
21use tokio::task::JoinError;
22use utoipa::ToSchema;
23
24pub mod email;
25pub mod html_to_text;
26pub mod image;
27pub mod office;
28pub mod pdf;
29
30#[derive(Debug, Error)]
31pub enum ProcessingError {
32 #[error("file is invalid or malformed: {0}")]
34 MalformedFile(String),
35
36 #[error("failed to convert file")]
38 ConvertFile(#[from] PdfConvertError),
39
40 #[error("failed to read pdf info")]
42 ReadPdfInfo(PdfInfoError),
43
44 #[error("failed to extract pdf file text: {0}")]
46 ExtractFileText(PdfTextError),
47
48 #[error("failed to decode image file: {0}")]
50 DecodeImage(ImageError),
51
52 #[error("failed to generate file thumbnail: {0}")]
54 GenerateThumbnail(ImageError),
55
56 #[error("failed to generate pdf file thumbnail: {0}")]
58 GeneratePdfThumbnail(GeneratePdfImagesError),
59
60 #[error("failed to process email file: {0}")]
62 Email(#[from] EmailProcessingError),
63
64 #[error("error waiting for image processing")]
66 Threading(#[from] JoinError),
67}
68
69#[derive(Debug, Default, Clone, Deserialize, Serialize, ToSchema)]
70#[serde(default)]
71pub struct ProcessingConfig {
72 pub email: Option<EmailProcessingConfig>,
74
75 pub max_unpack_iterations: Option<usize>,
81}
82
83#[derive(Debug, Default, Clone, Deserialize, Serialize, ToSchema)]
84#[serde(default)]
85pub struct EmailProcessingConfig {
86 pub skip_attachments: Option<bool>,
88}
89
90#[derive(Debug)]
91pub struct QueuedUpload {
92 pub mime: Mime,
93 pub ty: GeneratedFileType,
94 pub bytes: Bytes,
95}
96
97impl QueuedUpload {
98 pub fn new(mime: Mime, ty: GeneratedFileType, bytes: Bytes) -> Self {
99 Self { mime, ty, bytes }
100 }
101}
102
103#[derive(Debug)]
106pub struct AdditionalProcessingFile {
107 pub fixed_id: Option<FileId>,
109 pub name: String,
111 pub mime: Mime,
113 pub bytes: Bytes,
115}
116
117#[derive(Debug, Default)]
118pub struct ProcessingOutput {
119 pub upload_queue: Vec<QueuedUpload>,
121
122 pub additional_files: Vec<AdditionalProcessingFile>,
125
126 pub index_metadata: Option<ProcessingIndexMetadata>,
128
129 pub encrypted: bool,
131}
132
133#[derive(Debug, Default)]
134pub struct ProcessingIndexMetadata {
135 pub pages: Option<Vec<DocumentPage>>,
137}
138
139#[derive(Clone)]
140pub struct ProcessingLayer {
141 pub office: OfficeProcessingLayer,
142 pub config: ProcessingLayerConfig,
143}
144
145#[derive(Debug, Default, Deserialize, Serialize, Clone)]
146pub struct ProcessingLayerConfig {
147 pub max_unpack_iterations: Option<usize>,
157}
158
159#[derive(Debug, Error)]
160pub enum ProcessingLayerConfigError {
161 #[error("invalid DOCBOX_MAX_FILE_UNPACK_ITERATIONS value must be a number")]
163 InvalidMaxIterations(ParseIntError),
164}
165
166impl ProcessingLayerConfig {
167 pub fn from_env() -> Result<ProcessingLayerConfig, ProcessingLayerConfigError> {
168 let max_unpack_iterations = std::env::var("DOCBOX_MAX_FILE_UNPACK_ITERATIONS")
169 .ok()
170 .map(|value| {
171 value
172 .parse::<usize>()
173 .map_err(ProcessingLayerConfigError::InvalidMaxIterations)
174 })
175 .transpose()?;
176
177 Ok(ProcessingLayerConfig {
178 max_unpack_iterations,
179 })
180 }
181}
182
183pub async fn process_file(
191 config: &Option<ProcessingConfig>,
192 layer: &ProcessingLayer,
193 bytes: Bytes,
194 mime: &Mime,
195) -> Result<Option<ProcessingOutput>, ProcessingError> {
196 if is_pdf_file(mime) {
198 tracing::debug!("processing pdf file");
199
200 let output = process_pdf(&bytes).await?;
201 Ok(Some(output))
202 }
203 else if layer.office.converter.is_convertable(mime) {
205 tracing::debug!("processing office compatible file");
206
207 let output = process_office(&layer.office, bytes).await?;
208 Ok(Some(output))
209 }
210 else if is_mail_mime(mime) {
212 tracing::debug!("processing email file");
213
214 let output = process_email(config, &bytes)?;
215 Ok(Some(output))
216 }
217 else if let Some(image_format) = ImageFormat::from_mime_type(mime) {
219 tracing::debug!("processing image file");
220
221 let output = process_image_async(bytes, image_format).await?;
222 Ok(Some(output))
223 }
224 else {
226 Ok(None)
227 }
228}