Skip to main content

file_parser/
module.rs

1use std::sync::{Arc, OnceLock};
2
3use async_trait::async_trait;
4use modkit::api::OpenApiRegistry;
5use modkit::{Module, ModuleCtx, RestApiCapability};
6use tracing::{debug, info};
7
8use crate::config::FileParserConfig;
9use crate::domain::service::{FileParserService, ServiceConfig};
10use crate::infra::parsers::{
11    DocxParser, HtmlParser, ImageParser, PdfParser, PlainTextParser, PptxParser, StubParser,
12    XlsxParser,
13};
14
15/// Main module struct for file parsing
16#[modkit::module(
17    name = "file-parser",
18    capabilities = [rest]
19)]
20pub struct FileParserModule {
21    service: OnceLock<Arc<FileParserService>>,
22}
23
24impl Default for FileParserModule {
25    fn default() -> Self {
26        Self {
27            service: OnceLock::new(),
28        }
29    }
30}
31
32#[async_trait]
33impl Module for FileParserModule {
34    #[allow(clippy::cast_possible_truncation)]
35    async fn init(&self, ctx: &ModuleCtx) -> anyhow::Result<()> {
36        const BYTES_IN_MB: u64 = 1024_u64 * 1024;
37
38        info!("Initializing {} module", Self::MODULE_NAME);
39
40        // Load module configuration
41        let cfg: FileParserConfig = ctx.config()?;
42        debug!(
43            "Loaded file-parser config: max_file_size_mb={}",
44            cfg.max_file_size_mb
45        );
46
47        // Build parser backends
48        let parsers: Vec<Arc<dyn crate::domain::parser::FileParserBackend>> = vec![
49            Arc::new(PlainTextParser::new()),
50            Arc::new(HtmlParser::new()),
51            Arc::new(PdfParser::new()),
52            Arc::new(DocxParser::new()),
53            Arc::new(XlsxParser::new()),
54            Arc::new(PptxParser::new()),
55            Arc::new(ImageParser::new()),
56            Arc::new(StubParser::new()),
57        ];
58
59        info!("Registered {} parser backends", parsers.len());
60
61        // allowed_local_base_dir is mandatory — fail fast if missing.
62        let raw_base = cfg.allowed_local_base_dir.ok_or_else(|| {
63            anyhow::anyhow!(
64                "file-parser: 'allowed_local_base_dir' is required but not set. \
65                 Add it to your config under modules.file-parser.config."
66            )
67        })?;
68
69        // Canonicalize at startup so we only do it once.
70        let allowed_local_base_dir = raw_base.canonicalize().map_err(|e| {
71            anyhow::anyhow!(
72                "allowed_local_base_dir '{}' cannot be resolved: {e}",
73                raw_base.display()
74            )
75        })?;
76        if !allowed_local_base_dir.is_dir() {
77            return Err(anyhow::anyhow!(
78                "allowed_local_base_dir '{}' is not a directory",
79                allowed_local_base_dir.display()
80            ));
81        }
82        info!(
83            allowed_local_base_dir = %allowed_local_base_dir.display(),
84            "Local file parsing restricted to base directory"
85        );
86
87        // Create service config from module config
88        let service_config = ServiceConfig {
89            max_file_size_bytes: usize::try_from(cfg.max_file_size_mb * BYTES_IN_MB)
90                .unwrap_or(usize::MAX),
91            allowed_local_base_dir,
92        };
93
94        // Create file parser service
95        let file_parser_service = Arc::new(FileParserService::new(parsers, service_config));
96
97        // Store service for REST usage
98        self.service
99            .set(file_parser_service)
100            .map_err(|_| anyhow::anyhow!("{} module already initialized", Self::MODULE_NAME))?;
101
102        info!("{} module initialized successfully", Self::MODULE_NAME);
103        Ok(())
104    }
105}
106
107impl RestApiCapability for FileParserModule {
108    fn register_rest(
109        &self,
110        _ctx: &ModuleCtx,
111        router: axum::Router,
112        openapi: &dyn OpenApiRegistry,
113    ) -> anyhow::Result<axum::Router> {
114        info!("Registering file-parser REST routes");
115
116        let service = self
117            .service
118            .get()
119            .ok_or_else(|| anyhow::anyhow!("Service not initialized"))?
120            .clone();
121
122        let router = crate::api::rest::routes::register_routes(router, openapi, service);
123
124        info!("File parser REST routes registered successfully");
125        Ok(router)
126    }
127}