docbox_core/processing/
mod.rs1use crate::{
2 files::{generated::QueuedUpload, upload_file::ProcessingConfig},
3 processing::{
4 email::{is_mail_mime, process_email},
5 image::process_image_async,
6 office::{PdfConvertError, process_office},
7 pdf::process_pdf,
8 },
9};
10use ::image::{ImageError, ImageFormat};
11use bytes::Bytes;
12use docbox_database::models::file::FileId;
13use docbox_search::models::DocumentPage;
14use mime::Mime;
15use office::OfficeProcessingLayer;
16use pdf::is_pdf_file;
17use pdf_process::{PdfInfoError, PdfTextError};
18use thiserror::Error;
19use tokio::task::JoinError;
20
21pub mod email;
22pub mod html_to_text;
23pub mod image;
24pub mod office;
25pub mod pdf;
26
27#[derive(Debug, Error)]
28pub enum ProcessingError {
29 #[error("file is invalid or malformed: {0}")]
31 MalformedFile(String),
32
33 #[error("internal server error")]
35 InternalServerError,
36
37 #[error("failed to convert file: {0}")]
39 ConvertFile(#[from] PdfConvertError),
40
41 #[error("failed to read pdf info: {0}")]
43 ReadPdfInfo(PdfInfoError),
44
45 #[error("failed to extract pdf file text: {0}")]
47 ExtractFileText(PdfTextError),
48
49 #[error("failed to decode image file: {0}")]
51 DecodeImage(ImageError),
52
53 #[error("failed to generate file thumbnail: {0}")]
55 GenerateThumbnail(anyhow::Error),
56
57 #[error("error waiting for image processing")]
59 Threading(#[from] JoinError),
60}
61
62#[derive(Debug)]
65pub struct AdditionalProcessingFile {
66 pub fixed_id: Option<FileId>,
68 pub name: String,
70 pub mime: Mime,
72 pub bytes: Bytes,
74}
75
76#[derive(Debug, Default)]
77pub struct ProcessingOutput {
78 pub upload_queue: Vec<QueuedUpload>,
80
81 pub additional_files: Vec<AdditionalProcessingFile>,
84
85 pub index_metadata: Option<ProcessingIndexMetadata>,
87
88 pub encrypted: bool,
90}
91
92#[derive(Debug, Default)]
93pub struct ProcessingIndexMetadata {
94 pub pages: Option<Vec<DocumentPage>>,
96}
97
98#[derive(Clone)]
99pub struct ProcessingLayer {
100 pub office: OfficeProcessingLayer,
101}
102
103pub async fn process_file(
111 config: &Option<ProcessingConfig>,
112 layer: &ProcessingLayer,
113 bytes: Bytes,
114 mime: &Mime,
115) -> Result<Option<ProcessingOutput>, ProcessingError> {
116 if is_pdf_file(mime) {
118 tracing::debug!("processing pdf file");
119
120 let output = process_pdf(&bytes).await?;
121 Ok(Some(output))
122 }
123 else if layer.office.converter.is_convertable(mime) {
125 tracing::debug!("processing office compatible file");
126
127 let output = process_office(&layer.office, bytes).await?;
128 Ok(Some(output))
129 }
130 else if is_mail_mime(mime) {
132 tracing::debug!("processing email file");
133
134 let output = process_email(config, &bytes)?;
135 Ok(Some(output))
136 }
137 else if let Some(image_format) = ImageFormat::from_mime_type(mime) {
139 tracing::debug!("processing image file");
140
141 let output = process_image_async(bytes, image_format).await?;
142 Ok(Some(output))
143 }
144 else {
146 return Ok(None);
147 }
148}