docbox_core/processing/office/
convert_server.rs

1use anyhow::Context;
2use bytes::Bytes;
3use office_convert_client::{
4    OfficeConvertClient, OfficeConvertLoadBalancer, OfficeConverter, RequestError,
5};
6use reqwest::Client;
7use serde::{Deserialize, Serialize};
8
9use super::{ConvertToPdf, PdfConvertError};
10
11/// List of supported convertable formats
12pub const CONVERTABLE_FORMATS: &[&str] = &[
13    // .dotm
14    "application/vnd.ms-word.template.macroenabled.12",
15    // .xlsb
16    "application/vnd.ms-excel.sheet.binary.macroenabled.12",
17    // .xlsm
18    "application/vnd.ms-excel.sheet.macroenabled.12",
19    // .xltm
20    "application/vnd.ms-excel.template.macroenabled.12",
21    // .ods
22    "application/vnd.oasis.opendocument.spreadsheet",
23    "text/html",
24    "application/msword",
25    "application/vnd.oasis.opendocument.text-flat-xml",
26    "application/rtf",
27    "application/vnd.sun.xml.writer",
28    "application/vnd.wordperfect",
29    "application/vnd.ms-works",
30    "application/x-mswrite",
31    "application/clarisworks",
32    "application/macwriteii",
33    "application/x-abiword",
34    "application/x-t602",
35    "application/vnd.lotus-wordpro",
36    "application/x-hwp",
37    "application/vnd.sun.xml.writer.template",
38    "application/pdf",
39    "application/vnd.oasis.opendocument.text",
40    "application/vnd.oasis.opendocument.text-template",
41    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
42    "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
43    "application/vnd.openxmlformats-officedocument.wordprocessingml.slideshow",
44    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
45    "application/vnd.oasis.opendocument.presentation",
46    "application/x-fictionbook+xml",
47    "application/x-aportisdoc",
48    "application/prs.plucker",
49    "application/x-iwork-pages-sffpages",
50    "application/vnd.palm",
51    "application/epub+zip",
52    "application/x-pocket-word",
53    "application/vnd.oasis.opendocument.spreadsheet-flat-xml",
54    "application/vnd.lotus-1-2-3",
55    "application/vnd.ms-excel",
56    "text/spreadsheet",
57    "application/vnd.sun.xml.calc",
58    "application/vnd.sun.xml.calc.template",
59    "application/x-gnumeric",
60    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
61    "application/vnd.ms-excel.sheet.macroEnabled.12",
62    "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
63    "application/clarisworks",
64    "application/x-iwork-numbers-sffnumbers",
65    "application/mathml+xml",
66    "application/vnd.sun.xml.math",
67    "application/vnd.oasis.opendocument.formula",
68    "application/vnd.sun.xml.base",
69    "image/jpeg",
70    "image/png",
71    "image/svg+xml",
72    "image/webp",
73    "application/docbook+xml",
74    "application/xhtml+xml",
75];
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct OfficeConvertServerConfig {
79    pub addresses: Vec<String>,
80}
81
82impl OfficeConvertServerConfig {
83    pub fn from_env() -> anyhow::Result<OfficeConvertServerConfig> {
84        let addresses =
85            std::env::var("CONVERT_SERVER_ADDRESS").unwrap_or("http://127.0.0.1:8081".to_string());
86        let addresses = addresses
87            .split(',')
88            .map(|value| value.to_string())
89            .collect();
90
91        Ok(OfficeConvertServerConfig { addresses })
92    }
93}
94
95/// Variant of [ConvertToPdf] that uses LibreOffice through a
96/// office-converter server for the conversion
97#[derive(Clone)]
98pub struct OfficeConverterServer {
99    client: OfficeConverter,
100}
101
102impl OfficeConverterServer {
103    pub fn new(client: OfficeConverter) -> Self {
104        Self { client }
105    }
106
107    pub fn from_config(config: OfficeConvertServerConfig) -> anyhow::Result<Self> {
108        Self::from_addresses(config.addresses.iter().map(|value| value.as_str()))
109    }
110
111    pub fn from_addresses<'a, I>(addresses: I) -> anyhow::Result<Self>
112    where
113        I: IntoIterator<Item = &'a str>,
114    {
115        let mut convert_clients: Vec<OfficeConvertClient> = Vec::new();
116
117        // Create an HTTP client with no_proxy to disable the system proxy
118        // so that it will only be request over localhost
119        // (Otherwise we will attempt to access the convert server through the proxy which is not able to access it)
120        let http_client = Client::builder()
121            .no_proxy()
122            .build()
123            .context("failed to build convert http client")?;
124
125        for convert_server_address in addresses {
126            tracing::debug!(address = ?convert_server_address, "added convert server");
127
128            let convert_client =
129                OfficeConvertClient::from_client(convert_server_address, http_client.clone());
130
131            convert_clients.push(convert_client);
132        }
133
134        if convert_clients.is_empty() {
135            return Err(anyhow::anyhow!(
136                "no office convert server addresses provided"
137            ));
138        }
139
140        // Create a convert load balancer
141        let load_balancer = OfficeConvertLoadBalancer::new(convert_clients);
142        Ok(Self::new(OfficeConverter::from_load_balancer(
143            load_balancer,
144        )))
145    }
146}
147
148impl ConvertToPdf for OfficeConverterServer {
149    async fn convert_to_pdf(&self, file_bytes: Bytes) -> Result<Bytes, PdfConvertError> {
150        self.client
151            .convert(file_bytes)
152            .await
153            .map_err(|err| match err {
154                // File was encrypted
155                RequestError::ErrorResponse { reason, .. } if reason == "file is encrypted" => {
156                    PdfConvertError::EncryptedDocument
157                }
158                // File was corrupted or unreadable
159                RequestError::ErrorResponse { reason, .. } if reason == "file is corrupted" => {
160                    PdfConvertError::MalformedDocument
161                }
162                // Other unknown error
163                err => PdfConvertError::ConversionFailed(err),
164            })
165    }
166
167    fn is_convertable(&self, mime: &mime::Mime) -> bool {
168        is_known_pdf_convertable(mime)
169    }
170}
171
172/// Checks if the provided mime is included in the known convertable mime types
173pub fn is_known_pdf_convertable(mime: &mime::Mime) -> bool {
174    // We don't want to send images through the office converter
175    mime.type_() != mime::IMAGE &&
176    // Must be in the convertable formats list
177    CONVERTABLE_FORMATS.contains(&mime.essence_str())
178}