1use std::sync::{Arc, OnceLock};
2
3use async_trait::async_trait;
4use modkit::api::OpenApiRegistry;
5use modkit::{Module, ModuleCtx, RestApiCapability};
6use tracing::{debug, info};
7
8use crate::config::FileParserConfig;
9use crate::domain::service::{FileParserService, ServiceConfig};
10use crate::infra::parsers::{
11 DocxParser, HtmlParser, ImageParser, PdfParser, PlainTextParser, PptxParser, StubParser,
12 XlsxParser,
13};
14
15#[modkit::module(
17 name = "file-parser",
18 capabilities = [rest]
19)]
20pub struct FileParserModule {
21 service: OnceLock<Arc<FileParserService>>,
22}
23
24impl Default for FileParserModule {
25 fn default() -> Self {
26 Self {
27 service: OnceLock::new(),
28 }
29 }
30}
31
32#[async_trait]
33impl Module for FileParserModule {
34 #[allow(clippy::cast_possible_truncation)]
35 async fn init(&self, ctx: &ModuleCtx) -> anyhow::Result<()> {
36 const BYTES_IN_MB: u64 = 1024_u64 * 1024;
37
38 let cfg: FileParserConfig = ctx.config()?;
40 debug!(
41 "Loaded file-parser config: max_file_size_mb={}",
42 cfg.max_file_size_mb
43 );
44
45 let parsers: Vec<Arc<dyn crate::domain::parser::FileParserBackend>> = vec![
47 Arc::new(PlainTextParser::new()),
48 Arc::new(HtmlParser::new()),
49 Arc::new(PdfParser::new()),
50 Arc::new(DocxParser::new()),
51 Arc::new(XlsxParser::new()),
52 Arc::new(PptxParser::new()),
53 Arc::new(ImageParser::new()),
54 Arc::new(StubParser::new()),
55 ];
56
57 info!("Registered {} parser backends", parsers.len());
58
59 let raw_base = cfg.allowed_local_base_dir.ok_or_else(|| {
61 anyhow::anyhow!(
62 "file-parser: 'allowed_local_base_dir' is required but not set. \
63 Add it to your config under modules.file-parser.config."
64 )
65 })?;
66
67 let allowed_local_base_dir = raw_base.canonicalize().map_err(|e| {
69 anyhow::anyhow!(
70 "allowed_local_base_dir '{}' cannot be resolved: {e}",
71 raw_base.display()
72 )
73 })?;
74 if !allowed_local_base_dir.is_dir() {
75 return Err(anyhow::anyhow!(
76 "allowed_local_base_dir '{}' is not a directory",
77 allowed_local_base_dir.display()
78 ));
79 }
80 info!(
81 allowed_local_base_dir = %allowed_local_base_dir.display(),
82 "Local file parsing restricted to base directory"
83 );
84
85 let service_config = ServiceConfig {
87 max_file_size_bytes: usize::try_from(cfg.max_file_size_mb * BYTES_IN_MB)
88 .unwrap_or(usize::MAX),
89 allowed_local_base_dir,
90 };
91
92 let file_parser_service = Arc::new(FileParserService::new(parsers, service_config));
94
95 self.service
97 .set(file_parser_service)
98 .map_err(|_| anyhow::anyhow!("{} module already initialized", Self::MODULE_NAME))?;
99
100 Ok(())
101 }
102}
103
104impl RestApiCapability for FileParserModule {
105 fn register_rest(
106 &self,
107 _ctx: &ModuleCtx,
108 router: axum::Router,
109 openapi: &dyn OpenApiRegistry,
110 ) -> anyhow::Result<axum::Router> {
111 info!("Registering file-parser REST routes");
112
113 let service = self
114 .service
115 .get()
116 .ok_or_else(|| anyhow::anyhow!("Service not initialized"))?
117 .clone();
118
119 let router = crate::api::rest::routes::register_routes(router, openapi, service);
120
121 info!("File parser REST routes registered successfully");
122 Ok(router)
123 }
124}