docbox_core/processing/office/
convert_server.rs

1use anyhow::Context;
2use bytes::Bytes;
3use office_convert_client::{
4    OfficeConvertClient, OfficeConvertLoadBalancer, OfficeConverter, RequestError,
5};
6use reqwest::Client;
7
8use super::{ConvertToPdf, PdfConvertError};
9
10/// List of supported convertable formats
11pub const CONVERTABLE_FORMATS: &[&str] = &[
12    // .dotm
13    "application/vnd.ms-word.template.macroenabled.12",
14    // .xlsb
15    "application/vnd.ms-excel.sheet.binary.macroenabled.12",
16    // .xlsm
17    "application/vnd.ms-excel.sheet.macroenabled.12",
18    // .xltm
19    "application/vnd.ms-excel.template.macroenabled.12",
20    // .ods
21    "application/vnd.oasis.opendocument.spreadsheet",
22    "text/html",
23    "application/msword",
24    "application/vnd.oasis.opendocument.text-flat-xml",
25    "application/rtf",
26    "application/vnd.sun.xml.writer",
27    "application/vnd.wordperfect",
28    "application/vnd.ms-works",
29    "application/x-mswrite",
30    "application/clarisworks",
31    "application/macwriteii",
32    "application/x-abiword",
33    "application/x-t602",
34    "application/vnd.lotus-wordpro",
35    "application/x-hwp",
36    "application/vnd.sun.xml.writer.template",
37    "application/pdf",
38    "application/vnd.oasis.opendocument.text",
39    "application/vnd.oasis.opendocument.text-template",
40    "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
41    "application/vnd.openxmlformats-officedocument.wordprocessingml.template",
42    "application/vnd.openxmlformats-officedocument.wordprocessingml.slideshow",
43    "application/vnd.openxmlformats-officedocument.presentationml.presentation",
44    "application/vnd.oasis.opendocument.presentation",
45    "application/x-fictionbook+xml",
46    "application/x-aportisdoc",
47    "application/prs.plucker",
48    "application/x-iwork-pages-sffpages",
49    "application/vnd.palm",
50    "application/epub+zip",
51    "application/x-pocket-word",
52    "application/vnd.oasis.opendocument.spreadsheet-flat-xml",
53    "application/vnd.lotus-1-2-3",
54    "application/vnd.ms-excel",
55    "text/spreadsheet",
56    "application/vnd.sun.xml.calc",
57    "application/vnd.sun.xml.calc.template",
58    "application/x-gnumeric",
59    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
60    "application/vnd.ms-excel.sheet.macroEnabled.12",
61    "application/vnd.openxmlformats-officedocument.spreadsheetml.template",
62    "application/clarisworks",
63    "application/x-iwork-numbers-sffnumbers",
64    "application/mathml+xml",
65    "application/vnd.sun.xml.math",
66    "application/vnd.oasis.opendocument.formula",
67    "application/vnd.sun.xml.base",
68    "image/jpeg",
69    "image/png",
70    "image/svg+xml",
71    "image/webp",
72    "application/docbook+xml",
73    "application/xhtml+xml",
74];
75
76/// Variant of [ConvertToPdf] that uses LibreOffice through a
77/// office-converter server for the conversion
78#[derive(Clone)]
79pub struct OfficeConverterServer {
80    client: OfficeConverter,
81}
82
83impl OfficeConverterServer {
84    pub fn new(client: OfficeConverter) -> Self {
85        Self { client }
86    }
87
88    pub fn from_addresses<'a, I>(addresses: I) -> anyhow::Result<Self>
89    where
90        I: IntoIterator<Item = &'a str>,
91    {
92        let mut convert_clients: Vec<OfficeConvertClient> = Vec::new();
93
94        // Create an HTTP client with no_proxy to disable the system proxy
95        // so that it will only be request over localhost
96        // (Otherwise we will attempt to access the convert server through the proxy which is not able to access it)
97        let http_client = Client::builder()
98            .no_proxy()
99            .build()
100            .context("failed to build convert http client")?;
101
102        for convert_server_address in addresses {
103            tracing::debug!(address = ?convert_server_address, "added convert server");
104
105            let convert_client =
106                OfficeConvertClient::from_client(convert_server_address, http_client.clone());
107
108            convert_clients.push(convert_client);
109        }
110
111        if convert_clients.is_empty() {
112            return Err(anyhow::anyhow!(
113                "no office convert server addresses provided"
114            ));
115        }
116
117        // Create a convert load balancer
118        let load_balancer = OfficeConvertLoadBalancer::new(convert_clients);
119        Ok(Self::new(OfficeConverter::from_load_balancer(
120            load_balancer,
121        )))
122    }
123}
124
125impl ConvertToPdf for OfficeConverterServer {
126    async fn convert_to_pdf(&self, file_bytes: Bytes) -> Result<Bytes, PdfConvertError> {
127        self.client
128            .convert(file_bytes)
129            .await
130            .map_err(|err| match err {
131                // File was encrypted
132                RequestError::ErrorResponse { reason, .. } if reason == "file is encrypted" => {
133                    PdfConvertError::EncryptedDocument
134                }
135                // File was corrupted or unreadable
136                RequestError::ErrorResponse { reason, .. } if reason == "file is corrupted" => {
137                    PdfConvertError::MalformedDocument
138                }
139                // Other unknown error
140                err => PdfConvertError::ConversionFailed(err),
141            })
142    }
143
144    fn is_convertable(&self, mime: &mime::Mime) -> bool {
145        is_known_pdf_convertable(mime)
146    }
147}
148
149/// Checks if the provided mime is included in the known convertable mime types
150pub fn is_known_pdf_convertable(mime: &mime::Mime) -> bool {
151    // We don't want to send images through the office converter
152    mime.type_() != mime::IMAGE &&
153    // Must be in the convertable formats list
154    CONVERTABLE_FORMATS.contains(&mime.essence_str())
155}