Skip to main content

file_parser/
module.rs

1use std::sync::{Arc, OnceLock};
2
3use async_trait::async_trait;
4use modkit::api::OpenApiRegistry;
5use modkit::{Module, ModuleCtx, RestApiCapability};
6use tracing::{debug, info};
7
8use crate::config::FileParserConfig;
9use crate::domain::service::{FileParserService, ServiceConfig};
10use crate::infra::parsers::{
11    DocxParser, HtmlParser, ImageParser, PdfParser, PlainTextParser, PptxParser, StubParser,
12    XlsxParser,
13};
14
15/// Main module struct for file parsing
16#[modkit::module(
17    name = "file-parser",
18    capabilities = [rest]
19)]
20pub struct FileParserModule {
21    service: OnceLock<Arc<FileParserService>>,
22}
23
24impl Default for FileParserModule {
25    fn default() -> Self {
26        Self {
27            service: OnceLock::new(),
28        }
29    }
30}
31
32#[async_trait]
33impl Module for FileParserModule {
34    #[allow(clippy::cast_possible_truncation)]
35    async fn init(&self, ctx: &ModuleCtx) -> anyhow::Result<()> {
36        const BYTES_IN_MB: u64 = 1024_u64 * 1024;
37
38        // Load module configuration
39        let cfg: FileParserConfig = ctx.config()?;
40        debug!(
41            "Loaded file-parser config: max_file_size_mb={}",
42            cfg.max_file_size_mb
43        );
44
45        // Build parser backends
46        let parsers: Vec<Arc<dyn crate::domain::parser::FileParserBackend>> = vec![
47            Arc::new(PlainTextParser::new()),
48            Arc::new(HtmlParser::new()),
49            Arc::new(PdfParser::new()),
50            Arc::new(DocxParser::new()),
51            Arc::new(XlsxParser::new()),
52            Arc::new(PptxParser::new()),
53            Arc::new(ImageParser::new()),
54            Arc::new(StubParser::new()),
55        ];
56
57        info!("Registered {} parser backends", parsers.len());
58
59        // allowed_local_base_dir is mandatory — fail fast if missing.
60        let raw_base = cfg.allowed_local_base_dir.ok_or_else(|| {
61            anyhow::anyhow!(
62                "file-parser: 'allowed_local_base_dir' is required but not set. \
63                 Add it to your config under modules.file-parser.config."
64            )
65        })?;
66
67        // Canonicalize at startup so we only do it once.
68        let allowed_local_base_dir = raw_base.canonicalize().map_err(|e| {
69            anyhow::anyhow!(
70                "allowed_local_base_dir '{}' cannot be resolved: {e}",
71                raw_base.display()
72            )
73        })?;
74        if !allowed_local_base_dir.is_dir() {
75            return Err(anyhow::anyhow!(
76                "allowed_local_base_dir '{}' is not a directory",
77                allowed_local_base_dir.display()
78            ));
79        }
80        info!(
81            allowed_local_base_dir = %allowed_local_base_dir.display(),
82            "Local file parsing restricted to base directory"
83        );
84
85        // Create service config from module config
86        let service_config = ServiceConfig {
87            max_file_size_bytes: usize::try_from(cfg.max_file_size_mb * BYTES_IN_MB)
88                .unwrap_or(usize::MAX),
89            allowed_local_base_dir,
90        };
91
92        // Create file parser service
93        let file_parser_service = Arc::new(FileParserService::new(parsers, service_config));
94
95        // Store service for REST usage
96        self.service
97            .set(file_parser_service)
98            .map_err(|_| anyhow::anyhow!("{} module already initialized", Self::MODULE_NAME))?;
99
100        Ok(())
101    }
102}
103
104impl RestApiCapability for FileParserModule {
105    fn register_rest(
106        &self,
107        _ctx: &ModuleCtx,
108        router: axum::Router,
109        openapi: &dyn OpenApiRegistry,
110    ) -> anyhow::Result<axum::Router> {
111        info!("Registering file-parser REST routes");
112
113        let service = self
114            .service
115            .get()
116            .ok_or_else(|| anyhow::anyhow!("Service not initialized"))?
117            .clone();
118
119        let router = crate::api::rest::routes::register_routes(router, openapi, service);
120
121        info!("File parser REST routes registered successfully");
122        Ok(router)
123    }
124}