docbox_processing/office/
convert_server.rs1use crate::office::libreoffice::is_known_libreoffice_pdf_convertable;
12
13use super::{ConvertToPdf, PdfConvertError};
14use bytes::Bytes;
15use office_convert_client::{
16 OfficeConvertClient, OfficeConvertLoadBalancer, OfficeConverter, RequestError,
17};
18use reqwest::Client;
19use serde::{Deserialize, Serialize};
20use thiserror::Error;
21
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct OfficeConvertServerConfig {
24 pub addresses: Vec<String>,
25 pub use_proxy: bool,
26}
27
28impl OfficeConvertServerConfig {
29 pub fn from_env() -> OfficeConvertServerConfig {
30 let addresses = std::env::var("DOCBOX_CONVERT_SERVER_ADDRESS")
31 .or(std::env::var("CONVERT_SERVER_ADDRESS"))
32 .unwrap_or("http://127.0.0.1:8081".to_string());
33 let addresses = addresses
34 .split(',')
35 .map(|value| value.to_string())
36 .collect();
37
38 let use_proxy = match std::env::var("DOCBOX_CONVERT_SERVER_USE_PROXY")
44 .or(std::env::var("CONVERT_SERVER_USE_PROXY"))
45 {
46 Ok(value) => match value.parse::<bool>() {
47 Ok(value) => value,
48 Err(error) => {
49 tracing::error!(
50 ?error,
51 "invalid CONVERT_SERVER_USE_PROXY environment variable, defaulting to false"
52 );
53 false
54 }
55 },
56 Err(_) => false,
57 };
58
59 OfficeConvertServerConfig {
60 addresses,
61 use_proxy,
62 }
63 }
64}
65
66#[derive(Clone)]
69pub struct OfficeConverterServer {
70 client: OfficeConverter,
71}
72
73#[derive(Debug, Error)]
74pub enum OfficeConvertServerError {
75 #[error("failed to build http client")]
76 BuildHttpClient(reqwest::Error),
77 #[error("no office convert server addresses provided")]
78 NoAddresses,
79}
80
81impl OfficeConverterServer {
82 pub fn new(client: OfficeConverter) -> Self {
83 Self { client }
84 }
85
86 pub fn from_config(
87 config: OfficeConvertServerConfig,
88 ) -> Result<Self, OfficeConvertServerError> {
89 Self::from_addresses(
90 config.addresses.iter().map(|value| value.as_str()),
91 config.use_proxy,
92 )
93 }
94
95 pub fn from_addresses<'a, I>(
96 addresses: I,
97 use_proxy: bool,
98 ) -> Result<Self, OfficeConvertServerError>
99 where
100 I: IntoIterator<Item = &'a str>,
101 {
102 let mut convert_clients: Vec<OfficeConvertClient> = Vec::new();
103 let mut http_client = Client::builder();
104
105 if !use_proxy {
106 http_client = http_client.no_proxy();
107 }
108
109 let http_client = http_client
110 .build()
111 .map_err(OfficeConvertServerError::BuildHttpClient)?;
112
113 for convert_server_address in addresses {
114 tracing::debug!(address = ?convert_server_address, "added convert server");
115
116 let convert_client =
117 OfficeConvertClient::from_client(convert_server_address, http_client.clone());
118
119 convert_clients.push(convert_client);
120 }
121
122 if convert_clients.is_empty() {
123 return Err(OfficeConvertServerError::NoAddresses);
124 }
125
126 let load_balancer = OfficeConvertLoadBalancer::new(convert_clients);
128 Ok(Self::new(OfficeConverter::from_load_balancer(
129 load_balancer,
130 )))
131 }
132}
133
134impl ConvertToPdf for OfficeConverterServer {
135 async fn convert_to_pdf(&self, file_bytes: Bytes) -> Result<Bytes, PdfConvertError> {
136 self.client
137 .convert(file_bytes)
138 .await
139 .map_err(|err| match err {
140 RequestError::ErrorResponse { reason, .. } if reason == "file is encrypted" => {
142 PdfConvertError::EncryptedDocument
143 }
144 RequestError::ErrorResponse { reason, .. } if reason == "file is corrupted" => {
146 PdfConvertError::MalformedDocument
147 }
148 err => PdfConvertError::ConversionFailed(err),
150 })
151 }
152
153 fn is_convertable(&self, mime: &mime::Mime) -> bool {
154 is_known_libreoffice_pdf_convertable(mime)
155 }
156}