Skip to main content

file_parser/
module.rs

1use std::sync::Arc;
2
3use async_trait::async_trait;
4use modkit::api::OpenApiRegistry;
5use modkit::{Module, ModuleCtx, RestApiCapability};
6use tracing::{debug, info};
7
8use crate::config::FileParserConfig;
9use crate::domain::service::{FileParserService, ServiceConfig};
10use crate::infra::parsers::{
11    DocxParser, HtmlParser, ImageParser, PdfParser, PlainTextParser, PptxParser, StubParser,
12    XlsxParser,
13};
14
15/// Main module struct for file parsing
16#[modkit::module(
17    name = "file-parser",
18    capabilities = [rest]
19)]
20pub struct FileParserModule {
21    // Keep the service behind ArcSwap for cheap read-mostly access.
22    service: arc_swap::ArcSwapOption<FileParserService>,
23}
24
25impl Default for FileParserModule {
26    fn default() -> Self {
27        Self {
28            service: arc_swap::ArcSwapOption::from(None),
29        }
30    }
31}
32
33impl Clone for FileParserModule {
34    fn clone(&self) -> Self {
35        Self {
36            service: arc_swap::ArcSwapOption::new(self.service.load().as_ref().map(Clone::clone)),
37        }
38    }
39}
40
41#[async_trait]
42impl Module for FileParserModule {
43    #[allow(clippy::cast_possible_truncation)]
44    async fn init(&self, ctx: &ModuleCtx) -> anyhow::Result<()> {
45        const BYTES_IN_MB: u64 = 1024_u64 * 1024;
46
47        info!("Initializing file-parser module");
48
49        // Load module configuration
50        let cfg: FileParserConfig = ctx.config()?;
51        debug!(
52            "Loaded file-parser config: max_file_size_mb={}, download_timeout_secs={}",
53            cfg.max_file_size_mb, cfg.download_timeout_secs
54        );
55
56        // Build parser backends
57        let parsers: Vec<Arc<dyn crate::domain::parser::FileParserBackend>> = vec![
58            Arc::new(PlainTextParser::new()),
59            Arc::new(HtmlParser::new()),
60            Arc::new(PdfParser::new()),
61            Arc::new(DocxParser::new()),
62            Arc::new(XlsxParser::new()),
63            Arc::new(PptxParser::new()),
64            Arc::new(ImageParser::new()),
65            Arc::new(StubParser::new()),
66        ];
67
68        info!("Registered {} parser backends", parsers.len());
69
70        // Create service config from module config
71        let service_config = ServiceConfig {
72            max_file_size_bytes: usize::try_from(cfg.max_file_size_mb * BYTES_IN_MB)
73                .unwrap_or(usize::MAX),
74            download_timeout_secs: cfg.download_timeout_secs,
75        };
76
77        // Create file parser service
78        let file_parser_service = Arc::new(
79            FileParserService::new(parsers, service_config)
80                .map_err(|e| anyhow::anyhow!("failed to create FileParserService: {e}"))?,
81        );
82
83        // Store service for REST usage
84        self.service.store(Some(file_parser_service));
85
86        info!("FileParserService initialized successfully");
87        Ok(())
88    }
89}
90
91impl RestApiCapability for FileParserModule {
92    fn register_rest(
93        &self,
94        _ctx: &ModuleCtx,
95        router: axum::Router,
96        openapi: &dyn OpenApiRegistry,
97    ) -> anyhow::Result<axum::Router> {
98        info!("Registering file-parser REST routes");
99
100        let service = self
101            .service
102            .load()
103            .as_ref()
104            .ok_or_else(|| anyhow::anyhow!("Service not initialized"))?
105            .clone();
106
107        let router = crate::api::rest::routes::register_routes(router, openapi, service);
108
109        info!("File parser REST routes registered successfully");
110        Ok(router)
111    }
112}