pub mod custom;
pub mod frontmatter;
pub mod markdown;
use crate::config::ParserConfig;
use std::collections::HashMap;
#[derive(Debug, Clone, Default)]
pub struct ParseResult {
pub links: Vec<String>,
pub metadata: Option<serde_json::Value>,
}
pub trait Parser {
fn name(&self) -> &str;
fn matches(&self, path: &str) -> bool;
fn parse(&self, path: &str, content: &str) -> ParseResult;
fn parse_batch(&self, files: &[(&str, &str)]) -> HashMap<String, ParseResult> {
files
.iter()
.map(|(path, content)| (path.to_string(), self.parse(path, content)))
.collect()
}
}
fn build_file_filter(patterns: &Option<Vec<String>>, name: &str) -> Option<globset::GlobSet> {
let patterns = patterns.as_ref()?;
match crate::config::compile_globs(patterns) {
Ok(set) => set,
Err(e) => {
eprintln!("warn: invalid glob in parser {name}.files: {e}");
None
}
}
}
pub fn build_parsers(
parsers_config: &HashMap<String, ParserConfig>,
config_dir: Option<&std::path::Path>,
root: &std::path::Path,
) -> Vec<Box<dyn Parser>> {
let mut parsers: Vec<Box<dyn Parser>> = Vec::new();
for (name, config) in parsers_config {
let file_filter = build_file_filter(&config.files, name);
if let Some(ref command) = config.command {
let resolved_command = if let Some(dir) = config_dir {
let cmd_path = dir.join(command);
if cmd_path.exists() {
cmd_path.to_string_lossy().to_string()
} else {
command.clone()
}
} else {
command.clone()
};
parsers.push(Box::new(custom::CustomParser {
parser_name: name.clone(),
file_filter,
command: resolved_command,
timeout_ms: config.timeout.unwrap_or(5000),
scope_dir: root.to_path_buf(),
options: config.options.clone(),
}));
} else {
match name.as_str() {
"markdown" => {
parsers.push(Box::new(markdown::MarkdownParser { file_filter }));
}
"frontmatter" => {
parsers.push(Box::new(frontmatter::FrontmatterParser { file_filter }));
}
_ => {
eprintln!(
"warn: unknown built-in parser \"{name}\" (use 'command' field for custom parsers)"
);
}
}
}
}
parsers
}