docbox_processing/office/
convert_server.rs1use super::{ConvertToPdf, PdfConvertError};
2use bytes::Bytes;
3use office_convert_client::{
4 OfficeConvertClient, OfficeConvertLoadBalancer, OfficeConverter, RequestError,
5};
6use reqwest::Client;
7use serde::{Deserialize, Serialize};
8use thiserror::Error;
9
10pub const CONVERTABLE_FORMATS: &[&str] = &[
12 "application/vnd.ms-word.template.macroenabled.12",
14 "application/vnd.ms-excel.sheet.binary.macroenabled.12",
16 "application/vnd.ms-excel.sheet.macroenabled.12",
18 "application/vnd.ms-excel.template.macroenabled.12",
20 "application/vnd.oasis.opendocument.spreadsheet",
22 "text/html",
23 "application/msword",
24 "application/vnd.oasis.opendocument.text-flat-xml",
25 "application/rtf",
26 "application/vnd.sun.xml.writer",
27 "application/vnd.wordperfect",
28 "application/vnd.ms-works",
29 "application/x-mswrite",
30 "application/clarisworks",
31 "application/macwriteii",
32 "application/x-abiword",
33 "application/x-t602",
34 "application/vnd.lotus-wordpro",
35 "application/x-hwp",
36 "application/vnd.sun.xml.writer.template",
37 "application/pdf",
38 "application/vnd.oasis.opendocument.text",
39 "application/vnd.oasis.opendocument.text-template",
40 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
41 "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
42 "application/vnd.openxmlformats-officedocument.wordprocessingml.slideshow",
43 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
44 "application/vnd.oasis.opendocument.presentation",
45 "application/x-fictionbook+xml",
46 "application/x-aportisdoc",
47 "application/prs.plucker",
48 "application/x-iwork-pages-sffpages",
49 "application/vnd.palm",
50 "application/epub+zip",
51 "application/x-pocket-word",
52 "application/vnd.oasis.opendocument.spreadsheet-flat-xml",
53 "application/vnd.lotus-1-2-3",
54 "application/vnd.ms-excel",
55 "text/spreadsheet",
56 "application/vnd.sun.xml.calc",
57 "application/vnd.sun.xml.calc.template",
58 "application/x-gnumeric",
59 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
60 "application/vnd.ms-excel.sheet.macroEnabled.12",
61 "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
62 "application/clarisworks",
63 "application/x-iwork-numbers-sffnumbers",
64 "application/mathml+xml",
65 "application/vnd.sun.xml.math",
66 "application/vnd.oasis.opendocument.formula",
67 "application/vnd.sun.xml.base",
68 "image/jpeg",
69 "image/png",
70 "image/svg+xml",
71 "image/webp",
72 "application/docbook+xml",
73 "application/xhtml+xml",
74];
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
77pub struct OfficeConvertServerConfig {
78 pub addresses: Vec<String>,
79 pub use_proxy: bool,
80}
81
82impl OfficeConvertServerConfig {
83 pub fn from_env() -> OfficeConvertServerConfig {
84 let addresses =
85 std::env::var("CONVERT_SERVER_ADDRESS").unwrap_or("http://127.0.0.1:8081".to_string());
86 let addresses = addresses
87 .split(',')
88 .map(|value| value.to_string())
89 .collect();
90
91 let use_proxy = match std::env::var("CONVERT_SERVER_USE_PROXY") {
97 Ok(value) => match value.parse::<bool>() {
98 Ok(value) => value,
99 Err(error) => {
100 tracing::error!(
101 ?error,
102 "invalid CONVERT_SERVER_USE_PROXY environment variable, defaulting to false"
103 );
104 false
105 }
106 },
107 Err(_) => false,
108 };
109
110 OfficeConvertServerConfig {
111 addresses,
112 use_proxy,
113 }
114 }
115}
116
117#[derive(Clone)]
120pub struct OfficeConverterServer {
121 client: OfficeConverter,
122}
123
124#[derive(Debug, Error)]
125pub enum OfficeConvertServerError {
126 #[error("failed to build http client")]
127 BuildHttpClient(reqwest::Error),
128 #[error("no office convert server addresses provided")]
129 NoAddresses,
130}
131
132impl OfficeConverterServer {
133 pub fn new(client: OfficeConverter) -> Self {
134 Self { client }
135 }
136
137 pub fn from_config(
138 config: OfficeConvertServerConfig,
139 ) -> Result<Self, OfficeConvertServerError> {
140 Self::from_addresses(
141 config.addresses.iter().map(|value| value.as_str()),
142 config.use_proxy,
143 )
144 }
145
146 pub fn from_addresses<'a, I>(
147 addresses: I,
148 use_proxy: bool,
149 ) -> Result<Self, OfficeConvertServerError>
150 where
151 I: IntoIterator<Item = &'a str>,
152 {
153 let mut convert_clients: Vec<OfficeConvertClient> = Vec::new();
154 let mut http_client = Client::builder();
155
156 if !use_proxy {
157 http_client = http_client.no_proxy();
158 }
159
160 let http_client = http_client
161 .build()
162 .map_err(OfficeConvertServerError::BuildHttpClient)?;
163
164 for convert_server_address in addresses {
165 tracing::debug!(address = ?convert_server_address, "added convert server");
166
167 let convert_client =
168 OfficeConvertClient::from_client(convert_server_address, http_client.clone());
169
170 convert_clients.push(convert_client);
171 }
172
173 if convert_clients.is_empty() {
174 return Err(OfficeConvertServerError::NoAddresses);
175 }
176
177 let load_balancer = OfficeConvertLoadBalancer::new(convert_clients);
179 Ok(Self::new(OfficeConverter::from_load_balancer(
180 load_balancer,
181 )))
182 }
183}
184
185impl ConvertToPdf for OfficeConverterServer {
186 async fn convert_to_pdf(&self, file_bytes: Bytes) -> Result<Bytes, PdfConvertError> {
187 self.client
188 .convert(file_bytes)
189 .await
190 .map_err(|err| match err {
191 RequestError::ErrorResponse { reason, .. } if reason == "file is encrypted" => {
193 PdfConvertError::EncryptedDocument
194 }
195 RequestError::ErrorResponse { reason, .. } if reason == "file is corrupted" => {
197 PdfConvertError::MalformedDocument
198 }
199 err => PdfConvertError::ConversionFailed(err),
201 })
202 }
203
204 fn is_convertable(&self, mime: &mime::Mime) -> bool {
205 is_known_pdf_convertable(mime)
206 }
207}
208
209pub fn is_known_pdf_convertable(mime: &mime::Mime) -> bool {
211 mime.type_() != mime::IMAGE &&
213 CONVERTABLE_FORMATS.contains(&mime.essence_str())
215}