docbox_processing/office/
convert_lambda.rs1use std::{str::FromStr, time::Duration};
16
17use crate::office::libreoffice::is_known_libreoffice_pdf_convertable;
18
19use super::{ConvertToPdf, PdfConvertError};
20use aws_config::SdkConfig;
21use bytes::Bytes;
22use docbox_database::sqlx::types::Uuid;
23use docbox_storage::{StorageLayerError, StorageLayerFactory, TenantStorageLayer};
24use office_convert_lambda_client::{ConvertError, OfficeConvertLambda, OfficeConvertLambdaOptions};
25use serde::{Deserialize, Serialize};
26use thiserror::Error;
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct OfficeConvertLambdaConfig {
30 pub function_name: String,
32 pub qualifier: Option<String>,
34 pub tenant_id: Option<String>,
36 pub retry_attempts: usize,
38 pub retry_wait: Duration,
40 pub tmp_bucket: String,
42}
43
44#[derive(Debug, Error)]
45pub enum OfficeConvertLambdaConfigError {
46 #[error("missing DOCBOX_CONVERT_LAMBDA_TMP_BUCKET environment variable")]
47 MissingTempBucket,
48 #[error("missing DOCBOX_CONVERT_LAMBDA_FUNCTION_NAME environment variable")]
49 MissingFunctionName,
50 #[error("DOCBOX_CONVERT_LAMBDA_RETRY_ATTEMPTS must be a number")]
51 InvalidRetryAttempts(<usize as FromStr>::Err),
52 #[error("DOCBOX_CONVERT_LAMBDA_RETRY_WAIT must be a number in seconds: {0}")]
53 InvalidRetryWait(<u64 as FromStr>::Err),
54}
55
56impl OfficeConvertLambdaConfig {
57 pub fn from_env() -> Result<OfficeConvertLambdaConfig, OfficeConvertLambdaConfigError> {
58 let tmp_bucket = std::env::var("DOCBOX_CONVERT_LAMBDA_TMP_BUCKET")
59 .map_err(|_| OfficeConvertLambdaConfigError::MissingTempBucket)?;
60
61 let function_name = std::env::var("DOCBOX_CONVERT_LAMBDA_FUNCTION_NAME")
62 .map_err(|_| OfficeConvertLambdaConfigError::MissingFunctionName)?;
63
64 let qualifier = std::env::var("DOCBOX_CONVERT_LAMBDA_QUALIFIER").ok();
65 let tenant_id = std::env::var("DOCBOX_CONVERT_LAMBDA_TENANT_ID").ok();
66
67 let retry_attempts = match std::env::var("DOCBOX_CONVERT_LAMBDA_RETRY_ATTEMPTS") {
68 Ok(retry_attempts) => retry_attempts
69 .parse::<usize>()
70 .map_err(OfficeConvertLambdaConfigError::InvalidRetryAttempts)?,
71 Err(_) => 3,
72 };
73
74 let retry_wait = match std::env::var("DOCBOX_CONVERT_LAMBDA_RETRY_WAIT") {
75 Ok(retry_wait) => retry_wait
76 .parse::<u64>()
77 .map_err(OfficeConvertLambdaConfigError::InvalidRetryWait)
78 .map(Duration::from_secs)?,
79 Err(_) => Duration::from_secs(1),
80 };
81
82 Ok(OfficeConvertLambdaConfig {
83 tmp_bucket,
84 function_name,
85 qualifier,
86 tenant_id,
87 retry_attempts,
88 retry_wait,
89 })
90 }
91}
92
93#[derive(Clone)]
96pub struct OfficeConverterLambda {
97 client: OfficeConvertLambda,
98 storage: TenantStorageLayer,
99}
100
101#[derive(Debug, Error)]
102pub enum OfficeConvertLambdaError {
103 #[error(transparent)]
105 Storage(#[from] StorageLayerError),
106
107 #[error(transparent)]
109 Convert(#[from] Box<ConvertError>),
110}
111
112impl OfficeConverterLambda {
113 pub fn new(client: OfficeConvertLambda, storage: TenantStorageLayer) -> Self {
114 Self { client, storage }
115 }
116
117 pub fn from_config(
118 aws_config: &SdkConfig,
119 storage: &StorageLayerFactory,
120 config: OfficeConvertLambdaConfig,
121 ) -> Result<Self, OfficeConvertLambdaError> {
122 let client = aws_sdk_lambda::Client::new(aws_config);
123 let storage = storage.create_storage_layer_bucket(config.tmp_bucket);
124
125 Ok(Self {
126 client: OfficeConvertLambda::new(
127 client,
128 OfficeConvertLambdaOptions {
129 function_name: config.function_name,
130 qualifier: config.qualifier,
131 tenant_id: config.tenant_id,
132 retry_attempts: config.retry_attempts,
133 retry_wait: config.retry_wait,
134 },
135 ),
136 storage,
137 })
138 }
139}
140
141impl ConvertToPdf for OfficeConverterLambda {
142 async fn convert_to_pdf(&self, file_bytes: Bytes) -> Result<Bytes, PdfConvertError> {
143 let bucket_name = self.storage.bucket_name();
144 let input_key = Uuid::new_v4().simple().to_string();
145 let output_key = Uuid::new_v4().simple().to_string();
146
147 tracing::debug!("uploading file for conversion");
148
149 self.storage
151 .upload_file(
152 &input_key,
153 "application/octet-stream".to_string(),
154 file_bytes,
155 )
156 .await
157 .map_err(OfficeConvertLambdaError::Storage)?;
158
159 tracing::debug!("calling conversion lambda");
160
161 let result = self
162 .client
163 .convert(office_convert_lambda_client::ConvertRequest {
164 source_bucket: bucket_name.clone(),
165 source_key: input_key.clone(),
166 dest_bucket: bucket_name.clone(),
167 dest_key: output_key.clone(),
168 })
169 .await;
170
171 tracing::debug!("conversion complete");
172
173 self.storage
175 .delete_file(&input_key)
176 .await
177 .map_err(OfficeConvertLambdaError::Storage)?;
178
179 match result {
180 Ok(_) => {
181 tracing::debug!("reading converted file");
182
183 let output_bytes = self
185 .storage
186 .get_file(&output_key)
187 .await
188 .map_err(OfficeConvertLambdaError::Storage)?
189 .collect_bytes()
190 .await
191 .map_err(OfficeConvertLambdaError::Storage)?;
192
193 self.storage
195 .delete_file(&output_key)
196 .await
197 .map_err(OfficeConvertLambdaError::Storage)?;
198
199 Ok(output_bytes)
200 }
201 Err(error) => {
202 tracing::error!(?error, "failed to convert file");
203 Err(match error {
204 ConvertError::Lambda(err) if err.reason.as_str() == "FILE_LIKELY_ENCRYPTED" => {
205 PdfConvertError::EncryptedDocument
206 }
207 ConvertError::Lambda(err) if err.reason.as_str() == "FILE_LIKELY_CORRUPTED" => {
208 PdfConvertError::MalformedDocument
209 }
210 err => PdfConvertError::ConversionFailedLambda(
211 OfficeConvertLambdaError::Convert(Box::new(err)),
212 ),
213 })
214 }
215 }
216 }
217
218 fn is_convertable(&self, mime: &mime::Mime) -> bool {
219 is_known_libreoffice_pdf_convertable(mime)
220 }
221}