1use std::sync::{Arc, OnceLock};
2
3use async_trait::async_trait;
4use modkit::api::OpenApiRegistry;
5use modkit::{Module, ModuleCtx, RestApiCapability};
6use tracing::{debug, info};
7
8use crate::config::FileParserConfig;
9use crate::domain::service::{FileParserService, ServiceConfig};
10use crate::infra::parsers::{
11 DocxParser, HtmlParser, ImageParser, PdfParser, PlainTextParser, PptxParser, StubParser,
12 XlsxParser,
13};
14
15#[modkit::module(
17 name = "file-parser",
18 capabilities = [rest]
19)]
20pub struct FileParserModule {
21 service: OnceLock<Arc<FileParserService>>,
22}
23
24impl Default for FileParserModule {
25 fn default() -> Self {
26 Self {
27 service: OnceLock::new(),
28 }
29 }
30}
31
32#[async_trait]
33impl Module for FileParserModule {
34 #[allow(clippy::cast_possible_truncation)]
35 async fn init(&self, ctx: &ModuleCtx) -> anyhow::Result<()> {
36 const BYTES_IN_MB: u64 = 1024_u64 * 1024;
37
38 let cfg: FileParserConfig = ctx.config()?;
40 debug!(
41 "Loaded file-parser config: max_file_size_mb={}",
42 cfg.max_file_size_mb
43 );
44
45 let parsers: Vec<Arc<dyn crate::domain::parser::FileParserBackend>> = vec![
47 Arc::new(PlainTextParser::new()),
48 Arc::new(HtmlParser::new()),
49 Arc::new(PdfParser::new()),
50 Arc::new(DocxParser::new()),
51 Arc::new(XlsxParser::new()),
52 Arc::new(PptxParser::new()),
53 Arc::new(ImageParser::new()),
54 Arc::new(StubParser::new()),
55 ];
56
57 info!("Registered {} parser backends", parsers.len());
58
59 let allowed_local_base_dir = cfg.allowed_local_base_dir.canonicalize().map_err(|e| {
61 anyhow::anyhow!(
62 "allowed_local_base_dir '{}' cannot be resolved: {e}",
63 cfg.allowed_local_base_dir.display()
64 )
65 })?;
66 if !allowed_local_base_dir.is_dir() {
67 return Err(anyhow::anyhow!(
68 "allowed_local_base_dir '{}' is not a directory",
69 allowed_local_base_dir.display()
70 ));
71 }
72 info!(
73 allowed_local_base_dir = %allowed_local_base_dir.display(),
74 "Local file parsing restricted to base directory"
75 );
76
77 let service_config = ServiceConfig {
79 max_file_size_bytes: usize::try_from(cfg.max_file_size_mb * BYTES_IN_MB)
80 .unwrap_or(usize::MAX),
81 allowed_local_base_dir,
82 };
83
84 let file_parser_service = Arc::new(FileParserService::new(parsers, service_config));
86
87 self.service
89 .set(file_parser_service)
90 .map_err(|_| anyhow::anyhow!("{} module already initialized", Self::MODULE_NAME))?;
91
92 Ok(())
93 }
94}
95
96impl RestApiCapability for FileParserModule {
97 fn register_rest(
98 &self,
99 _ctx: &ModuleCtx,
100 router: axum::Router,
101 openapi: &dyn OpenApiRegistry,
102 ) -> anyhow::Result<axum::Router> {
103 info!("Registering file-parser REST routes");
104
105 let service = self
106 .service
107 .get()
108 .ok_or_else(|| anyhow::anyhow!("Service not initialized"))?
109 .clone();
110
111 let router = crate::api::rest::routes::register_routes(router, openapi, service);
112
113 info!("File parser REST routes registered successfully");
114 Ok(router)
115 }
116}