Skip to main content

drft/parsers/
mod.rs

1pub mod custom;
2pub mod frontmatter;
3pub mod markdown;
4
5use crate::config::ParserConfig;
6use std::collections::HashMap;
7
8/// Combined output from parsing a single file: links + optional metadata.
9/// Links are raw strings as they appear in the source — the graph builder handles
10/// normalization (fragment stripping, anchor filtering, URI detection).
11///
12/// See [`docs/parsers`](../../docs/parsers/README.md) for details.
13#[derive(Debug, Clone, Default)]
14pub struct ParseResult {
15    pub links: Vec<String>,
16    /// Structured metadata extracted from the file, namespaced by parser on the node.
17    pub metadata: Option<serde_json::Value>,
18}
19
20/// Trait implemented by all parsers (built-in and custom).
21pub trait Parser {
22    /// Parser name — used as provenance on edges.
23    fn name(&self) -> &str;
24    /// Check if this parser should run on a given file path.
25    fn matches(&self, path: &str) -> bool;
26    /// Parse a file's content and return discovered links + optional metadata.
27    fn parse(&self, path: &str, content: &str) -> ParseResult;
28    /// Parse multiple files in one call. Default falls back to per-file parsing.
29    /// Custom parsers override this to spawn one process for all files.
30    fn parse_batch(&self, files: &[(&str, &str)]) -> HashMap<String, ParseResult> {
31        files
32            .iter()
33            .map(|(path, content)| (path.to_string(), self.parse(path, content)))
34            .collect()
35    }
36}
37
38/// Build a GlobSet from file patterns (for parser routing).
39/// Returns None if no patterns → parser receives all File nodes.
40fn build_file_filter(patterns: &Option<Vec<String>>, name: &str) -> Option<globset::GlobSet> {
41    let patterns = patterns.as_ref()?;
42    match crate::config::compile_globs(patterns) {
43        Ok(set) => set,
44        Err(e) => {
45            eprintln!("warn: invalid glob in parser {name}.files: {e}");
46            None
47        }
48    }
49}
50
51/// Build the parser registry from config.
52/// Returns a list of boxed parsers ready to run.
53pub fn build_parsers(
54    parsers_config: &HashMap<String, ParserConfig>,
55    config_dir: Option<&std::path::Path>,
56    root: &std::path::Path,
57) -> Vec<Box<dyn Parser>> {
58    let mut parsers: Vec<Box<dyn Parser>> = Vec::new();
59
60    for (name, config) in parsers_config {
61        let file_filter = build_file_filter(&config.files, name);
62
63        if let Some(ref command) = config.command {
64            // Custom parser
65            let resolved_command = if let Some(dir) = config_dir {
66                let cmd_path = dir.join(command);
67                if cmd_path.exists() {
68                    cmd_path.to_string_lossy().to_string()
69                } else {
70                    command.clone()
71                }
72            } else {
73                command.clone()
74            };
75
76            parsers.push(Box::new(custom::CustomParser {
77                parser_name: name.clone(),
78                file_filter,
79                command: resolved_command,
80                timeout_ms: config.timeout.unwrap_or(5000),
81                scope_dir: root.to_path_buf(),
82                options: config.options.clone(),
83            }));
84        } else {
85            // Built-in parser
86            match name.as_str() {
87                "markdown" => {
88                    parsers.push(Box::new(markdown::MarkdownParser { file_filter }));
89                }
90                "frontmatter" => {
91                    parsers.push(Box::new(frontmatter::FrontmatterParser { file_filter }));
92                }
93                _ => {
94                    eprintln!(
95                        "warn: unknown built-in parser \"{name}\" (use 'command' field for custom parsers)"
96                    );
97                }
98            }
99        }
100    }
101
102    parsers
103}