docbox_core/processing/office/
convert_server.rs1use anyhow::Context;
2use bytes::Bytes;
3use office_convert_client::{
4 OfficeConvertClient, OfficeConvertLoadBalancer, OfficeConverter, RequestError,
5};
6use reqwest::Client;
7use serde::{Deserialize, Serialize};
8
9use super::{ConvertToPdf, PdfConvertError};
10
11pub const CONVERTABLE_FORMATS: &[&str] = &[
13 "application/vnd.ms-word.template.macroenabled.12",
15 "application/vnd.ms-excel.sheet.binary.macroenabled.12",
17 "application/vnd.ms-excel.sheet.macroenabled.12",
19 "application/vnd.ms-excel.template.macroenabled.12",
21 "application/vnd.oasis.opendocument.spreadsheet",
23 "text/html",
24 "application/msword",
25 "application/vnd.oasis.opendocument.text-flat-xml",
26 "application/rtf",
27 "application/vnd.sun.xml.writer",
28 "application/vnd.wordperfect",
29 "application/vnd.ms-works",
30 "application/x-mswrite",
31 "application/clarisworks",
32 "application/macwriteii",
33 "application/x-abiword",
34 "application/x-t602",
35 "application/vnd.lotus-wordpro",
36 "application/x-hwp",
37 "application/vnd.sun.xml.writer.template",
38 "application/pdf",
39 "application/vnd.oasis.opendocument.text",
40 "application/vnd.oasis.opendocument.text-template",
41 "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
42 "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
43 "application/vnd.openxmlformats-officedocument.wordprocessingml.slideshow",
44 "application/vnd.openxmlformats-officedocument.presentationml.presentation",
45 "application/vnd.oasis.opendocument.presentation",
46 "application/x-fictionbook+xml",
47 "application/x-aportisdoc",
48 "application/prs.plucker",
49 "application/x-iwork-pages-sffpages",
50 "application/vnd.palm",
51 "application/epub+zip",
52 "application/x-pocket-word",
53 "application/vnd.oasis.opendocument.spreadsheet-flat-xml",
54 "application/vnd.lotus-1-2-3",
55 "application/vnd.ms-excel",
56 "text/spreadsheet",
57 "application/vnd.sun.xml.calc",
58 "application/vnd.sun.xml.calc.template",
59 "application/x-gnumeric",
60 "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
61 "application/vnd.ms-excel.sheet.macroEnabled.12",
62 "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
63 "application/clarisworks",
64 "application/x-iwork-numbers-sffnumbers",
65 "application/mathml+xml",
66 "application/vnd.sun.xml.math",
67 "application/vnd.oasis.opendocument.formula",
68 "application/vnd.sun.xml.base",
69 "image/jpeg",
70 "image/png",
71 "image/svg+xml",
72 "image/webp",
73 "application/docbook+xml",
74 "application/xhtml+xml",
75];
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct OfficeConvertServerConfig {
79 pub addresses: Vec<String>,
80}
81
82impl OfficeConvertServerConfig {
83 pub fn from_env() -> anyhow::Result<OfficeConvertServerConfig> {
84 let addresses =
85 std::env::var("CONVERT_SERVER_ADDRESS").unwrap_or("http://127.0.0.1:8081".to_string());
86 let addresses = addresses
87 .split(',')
88 .map(|value| value.to_string())
89 .collect();
90
91 Ok(OfficeConvertServerConfig { addresses })
92 }
93}
94
95#[derive(Clone)]
98pub struct OfficeConverterServer {
99 client: OfficeConverter,
100}
101
102impl OfficeConverterServer {
103 pub fn new(client: OfficeConverter) -> Self {
104 Self { client }
105 }
106
107 pub fn from_config(config: OfficeConvertServerConfig) -> anyhow::Result<Self> {
108 Self::from_addresses(config.addresses.iter().map(|value| value.as_str()))
109 }
110
111 pub fn from_addresses<'a, I>(addresses: I) -> anyhow::Result<Self>
112 where
113 I: IntoIterator<Item = &'a str>,
114 {
115 let mut convert_clients: Vec<OfficeConvertClient> = Vec::new();
116
117 let http_client = Client::builder()
121 .no_proxy()
122 .build()
123 .context("failed to build convert http client")?;
124
125 for convert_server_address in addresses {
126 tracing::debug!(address = ?convert_server_address, "added convert server");
127
128 let convert_client =
129 OfficeConvertClient::from_client(convert_server_address, http_client.clone());
130
131 convert_clients.push(convert_client);
132 }
133
134 if convert_clients.is_empty() {
135 return Err(anyhow::anyhow!(
136 "no office convert server addresses provided"
137 ));
138 }
139
140 let load_balancer = OfficeConvertLoadBalancer::new(convert_clients);
142 Ok(Self::new(OfficeConverter::from_load_balancer(
143 load_balancer,
144 )))
145 }
146}
147
148impl ConvertToPdf for OfficeConverterServer {
149 async fn convert_to_pdf(&self, file_bytes: Bytes) -> Result<Bytes, PdfConvertError> {
150 self.client
151 .convert(file_bytes)
152 .await
153 .map_err(|err| match err {
154 RequestError::ErrorResponse { reason, .. } if reason == "file is encrypted" => {
156 PdfConvertError::EncryptedDocument
157 }
158 RequestError::ErrorResponse { reason, .. } if reason == "file is corrupted" => {
160 PdfConvertError::MalformedDocument
161 }
162 err => PdfConvertError::ConversionFailed(err),
164 })
165 }
166
167 fn is_convertable(&self, mime: &mime::Mime) -> bool {
168 is_known_pdf_convertable(mime)
169 }
170}
171
172pub fn is_known_pdf_convertable(mime: &mime::Mime) -> bool {
174 mime.type_() != mime::IMAGE &&
176 CONVERTABLE_FORMATS.contains(&mime.essence_str())
178}