1use std::sync::{Arc, OnceLock};
2
3use async_trait::async_trait;
4use modkit::api::OpenApiRegistry;
5use modkit::{Module, ModuleCtx, RestApiCapability};
6use tracing::{debug, info};
7
8use crate::config::FileParserConfig;
9use crate::domain::service::{FileParserService, ServiceConfig};
10use crate::infra::parsers::{
11 DocxParser, HtmlParser, ImageParser, PdfParser, PlainTextParser, PptxParser, StubParser,
12 XlsxParser,
13};
14
15#[modkit::module(
17 name = "file-parser",
18 capabilities = [rest]
19)]
20pub struct FileParserModule {
21 service: OnceLock<Arc<FileParserService>>,
22}
23
24impl Default for FileParserModule {
25 fn default() -> Self {
26 Self {
27 service: OnceLock::new(),
28 }
29 }
30}
31
32#[async_trait]
33impl Module for FileParserModule {
34 #[allow(clippy::cast_possible_truncation)]
35 async fn init(&self, ctx: &ModuleCtx) -> anyhow::Result<()> {
36 const BYTES_IN_MB: u64 = 1024_u64 * 1024;
37
38 info!("Initializing {} module", Self::MODULE_NAME);
39
40 let cfg: FileParserConfig = ctx.config()?;
42 debug!(
43 "Loaded file-parser config: max_file_size_mb={}",
44 cfg.max_file_size_mb
45 );
46
47 let parsers: Vec<Arc<dyn crate::domain::parser::FileParserBackend>> = vec![
49 Arc::new(PlainTextParser::new()),
50 Arc::new(HtmlParser::new()),
51 Arc::new(PdfParser::new()),
52 Arc::new(DocxParser::new()),
53 Arc::new(XlsxParser::new()),
54 Arc::new(PptxParser::new()),
55 Arc::new(ImageParser::new()),
56 Arc::new(StubParser::new()),
57 ];
58
59 info!("Registered {} parser backends", parsers.len());
60
61 let raw_base = cfg.allowed_local_base_dir.ok_or_else(|| {
63 anyhow::anyhow!(
64 "file-parser: 'allowed_local_base_dir' is required but not set. \
65 Add it to your config under modules.file-parser.config."
66 )
67 })?;
68
69 let allowed_local_base_dir = raw_base.canonicalize().map_err(|e| {
71 anyhow::anyhow!(
72 "allowed_local_base_dir '{}' cannot be resolved: {e}",
73 raw_base.display()
74 )
75 })?;
76 if !allowed_local_base_dir.is_dir() {
77 return Err(anyhow::anyhow!(
78 "allowed_local_base_dir '{}' is not a directory",
79 allowed_local_base_dir.display()
80 ));
81 }
82 info!(
83 allowed_local_base_dir = %allowed_local_base_dir.display(),
84 "Local file parsing restricted to base directory"
85 );
86
87 let service_config = ServiceConfig {
89 max_file_size_bytes: usize::try_from(cfg.max_file_size_mb * BYTES_IN_MB)
90 .unwrap_or(usize::MAX),
91 allowed_local_base_dir,
92 };
93
94 let file_parser_service = Arc::new(FileParserService::new(parsers, service_config));
96
97 self.service
99 .set(file_parser_service)
100 .map_err(|_| anyhow::anyhow!("{} module already initialized", Self::MODULE_NAME))?;
101
102 info!("{} module initialized successfully", Self::MODULE_NAME);
103 Ok(())
104 }
105}
106
107impl RestApiCapability for FileParserModule {
108 fn register_rest(
109 &self,
110 _ctx: &ModuleCtx,
111 router: axum::Router,
112 openapi: &dyn OpenApiRegistry,
113 ) -> anyhow::Result<axum::Router> {
114 info!("Registering file-parser REST routes");
115
116 let service = self
117 .service
118 .get()
119 .ok_or_else(|| anyhow::anyhow!("Service not initialized"))?
120 .clone();
121
122 let router = crate::api::rest::routes::register_routes(router, openapi, service);
123
124 info!("File parser REST routes registered successfully");
125 Ok(router)
126 }
127}