use std::sync::{Arc, OnceLock};
use async_trait::async_trait;
use modkit::api::OpenApiRegistry;
use modkit::{Module, ModuleCtx, RestApiCapability};
use tracing::{debug, info};
use crate::config::FileParserConfig;
use crate::domain::service::{FileParserService, ServiceConfig};
use crate::infra::parsers::{
DocxParser, HtmlParser, ImageParser, PdfParser, PlainTextParser, PptxParser, StubParser,
XlsxParser,
};
#[modkit::module(
name = "file-parser",
capabilities = [rest]
)]
pub struct FileParserModule {
service: OnceLock<Arc<FileParserService>>,
}
impl Default for FileParserModule {
fn default() -> Self {
Self {
service: OnceLock::new(),
}
}
}
#[async_trait]
impl Module for FileParserModule {
#[allow(clippy::cast_possible_truncation)]
async fn init(&self, ctx: &ModuleCtx) -> anyhow::Result<()> {
const BYTES_IN_MB: u64 = 1024_u64 * 1024;
let cfg: FileParserConfig = ctx.config()?;
debug!(
"Loaded file-parser config: max_file_size_mb={}",
cfg.max_file_size_mb
);
let parsers: Vec<Arc<dyn crate::domain::parser::FileParserBackend>> = vec![
Arc::new(PlainTextParser::new()),
Arc::new(HtmlParser::new()),
Arc::new(PdfParser::new()),
Arc::new(DocxParser::new()),
Arc::new(XlsxParser::new()),
Arc::new(PptxParser::new()),
Arc::new(ImageParser::new()),
Arc::new(StubParser::new()),
];
info!("Registered {} parser backends", parsers.len());
let allowed_local_base_dir = cfg.allowed_local_base_dir.canonicalize().map_err(|e| {
anyhow::anyhow!(
"allowed_local_base_dir '{}' cannot be resolved: {e}",
cfg.allowed_local_base_dir.display()
)
})?;
if !allowed_local_base_dir.is_dir() {
return Err(anyhow::anyhow!(
"allowed_local_base_dir '{}' is not a directory",
allowed_local_base_dir.display()
));
}
info!(
allowed_local_base_dir = %allowed_local_base_dir.display(),
"Local file parsing restricted to base directory"
);
let service_config = ServiceConfig {
max_file_size_bytes: usize::try_from(cfg.max_file_size_mb * BYTES_IN_MB)
.unwrap_or(usize::MAX),
allowed_local_base_dir,
};
let file_parser_service = Arc::new(FileParserService::new(parsers, service_config));
self.service
.set(file_parser_service)
.map_err(|_| anyhow::anyhow!("{} module already initialized", Self::MODULE_NAME))?;
Ok(())
}
}
impl RestApiCapability for FileParserModule {
fn register_rest(
&self,
_ctx: &ModuleCtx,
router: axum::Router,
openapi: &dyn OpenApiRegistry,
) -> anyhow::Result<axum::Router> {
info!("Registering file-parser REST routes");
let service = self
.service
.get()
.ok_or_else(|| anyhow::anyhow!("Service not initialized"))?
.clone();
let router = crate::api::rest::routes::register_routes(router, openapi, service);
info!("File parser REST routes registered successfully");
Ok(router)
}
}