1pub mod custom;
2pub mod frontmatter;
3pub mod markdown;
4
5use crate::config::ParserConfig;
6use std::collections::HashMap;
7
8#[derive(Debug, Clone, Default)]
14pub struct ParseResult {
15 pub links: Vec<String>,
16 pub metadata: Option<serde_json::Value>,
18}
19
20pub trait Parser {
22 fn name(&self) -> &str;
24 fn matches(&self, path: &str) -> bool;
26 fn parse(&self, path: &str, content: &str) -> ParseResult;
28 fn parse_batch(&self, files: &[(&str, &str)]) -> HashMap<String, ParseResult> {
31 files
32 .iter()
33 .map(|(path, content)| (path.to_string(), self.parse(path, content)))
34 .collect()
35 }
36}
37
38fn build_file_filter(patterns: &Option<Vec<String>>, name: &str) -> Option<globset::GlobSet> {
41 let patterns = patterns.as_ref()?;
42 match crate::config::compile_globs(patterns) {
43 Ok(set) => set,
44 Err(e) => {
45 eprintln!("warn: invalid glob in parser {name}.files: {e}");
46 None
47 }
48 }
49}
50
51pub(crate) fn has_file_extension(path: &str) -> bool {
53 if let Some(basename) = path.rsplit('/').next() {
54 basename.contains('.')
55 } else {
56 path.contains('.')
57 }
58}
59
60pub(crate) fn strip_code(content: &str) -> String {
63 let mut result = String::with_capacity(content.len());
65 let mut in_code_block = false;
66 let mut fence_marker = "";
67
68 for line in content.lines() {
69 let trimmed = line.trim_start();
70 if !in_code_block {
71 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
72 in_code_block = true;
73 fence_marker = if trimmed.starts_with("```") {
74 "```"
75 } else {
76 "~~~"
77 };
78 result.push_str(&" ".repeat(line.len()));
79 } else {
80 result.push_str(line);
81 }
82 } else if trimmed.starts_with(fence_marker) && trimmed.trim() == fence_marker {
83 in_code_block = false;
84 result.push_str(&" ".repeat(line.len()));
85 } else {
86 result.push_str(&" ".repeat(line.len()));
87 }
88 result.push('\n');
89 }
90
91 let mut cleaned = String::with_capacity(result.len());
93 let chars: Vec<char> = result.chars().collect();
94 let mut i = 0;
95 while i < chars.len() {
96 if chars[i] == '`' {
97 let mut ticks = 0;
99 while i + ticks < chars.len() && chars[i + ticks] == '`' {
100 ticks += 1;
101 }
102 let after = i + ticks;
104 let mut found = None;
105 let mut j = after;
106 while j + ticks <= chars.len() {
107 if chars[j..j + ticks].iter().all(|c| *c == '`') {
108 found = Some(j);
109 break;
110 }
111 j += 1;
112 }
113 if let Some(close_start) = found {
114 let total = close_start + ticks - i;
116 for _ in 0..total {
117 cleaned.push(' ');
118 }
119 i += total;
120 } else {
121 cleaned.push(chars[i]);
123 i += 1;
124 }
125 } else {
126 cleaned.push(chars[i]);
127 i += 1;
128 }
129 }
130
131 cleaned
132}
133
134pub fn build_parsers(
137 parsers_config: &HashMap<String, ParserConfig>,
138 config_dir: Option<&std::path::Path>,
139 root: &std::path::Path,
140) -> Vec<Box<dyn Parser>> {
141 let mut parsers: Vec<Box<dyn Parser>> = Vec::new();
142
143 for (name, config) in parsers_config {
144 let file_filter = build_file_filter(&config.files, name);
145
146 if let Some(ref command) = config.command {
147 let resolved_command = if let Some(dir) = config_dir {
149 let cmd_path = dir.join(command);
150 if cmd_path.exists() {
151 cmd_path.to_string_lossy().to_string()
152 } else {
153 command.clone()
154 }
155 } else {
156 command.clone()
157 };
158
159 parsers.push(Box::new(custom::CustomParser {
160 parser_name: name.clone(),
161 file_filter,
162 command: resolved_command,
163 timeout_ms: config.timeout.unwrap_or(5000),
164 scope_dir: root.to_path_buf(),
165 options: config.options.clone(),
166 }));
167 } else {
168 match name.as_str() {
170 "markdown" => {
171 parsers.push(Box::new(markdown::MarkdownParser { file_filter }));
172 }
173 "frontmatter" => {
174 parsers.push(Box::new(frontmatter::FrontmatterParser { file_filter }));
175 }
176 _ => {
177 eprintln!(
178 "warn: unknown built-in parser \"{name}\" (use 'command' field for custom parsers)"
179 );
180 }
181 }
182 }
183 }
184
185 parsers
186}