drft/parsers/
frontmatter.rs1use super::{ParseResult, Parser};
2
3fn is_link_candidate(value: &str) -> bool {
5 if crate::graph::is_uri(value) {
7 return true;
8 }
9 if value.starts_with("./") || value.starts_with("../") || value.starts_with('/') {
12 return true;
13 }
14 if value.contains(' ') {
16 return false;
17 }
18 let basename = value.rsplit('/').next().unwrap_or(value);
21 if let Some(dot_pos) = basename.rfind('.') {
22 let ext = &basename[dot_pos + 1..];
23 !ext.is_empty()
24 && ext.len() <= 4
25 && ext.chars().all(|c| c.is_ascii_alphanumeric())
26 && !ext.chars().all(|c| c.is_ascii_digit())
27 } else {
28 false
29 }
30}
31
32fn strip_code(content: &str) -> String {
35 let mut result = String::with_capacity(content.len());
37 let mut in_code_block = false;
38 let mut fence_marker = "";
39
40 for line in content.lines() {
41 let trimmed = line.trim_start();
42 if !in_code_block {
43 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
44 in_code_block = true;
45 fence_marker = if trimmed.starts_with("```") {
46 "```"
47 } else {
48 "~~~"
49 };
50 result.push_str(&" ".repeat(line.len()));
51 } else {
52 result.push_str(line);
53 }
54 } else if trimmed.starts_with(fence_marker) && trimmed.trim() == fence_marker {
55 in_code_block = false;
56 result.push_str(&" ".repeat(line.len()));
57 } else {
58 result.push_str(&" ".repeat(line.len()));
59 }
60 result.push('\n');
61 }
62
63 let mut cleaned = String::with_capacity(result.len());
65 let chars: Vec<char> = result.chars().collect();
66 let mut i = 0;
67 while i < chars.len() {
68 if chars[i] == '`' {
69 let mut ticks = 0;
71 while i + ticks < chars.len() && chars[i + ticks] == '`' {
72 ticks += 1;
73 }
74 let after = i + ticks;
76 let mut found = None;
77 let mut j = after;
78 while j + ticks <= chars.len() {
79 if chars[j..j + ticks].iter().all(|c| *c == '`') {
80 found = Some(j);
81 break;
82 }
83 j += 1;
84 }
85 if let Some(close_start) = found {
86 let total = close_start + ticks - i;
88 for _ in 0..total {
89 cleaned.push(' ');
90 }
91 i += total;
92 } else {
93 cleaned.push(chars[i]);
95 i += 1;
96 }
97 } else {
98 cleaned.push(chars[i]);
99 i += 1;
100 }
101 }
102
103 cleaned
104}
105
106pub struct FrontmatterParser {
108 pub file_filter: Option<globset::GlobSet>,
110}
111
112impl Parser for FrontmatterParser {
113 fn name(&self) -> &str {
114 "frontmatter"
115 }
116
117 fn matches(&self, path: &str) -> bool {
118 match &self.file_filter {
119 Some(set) => set.is_match(path),
120 None => true,
121 }
122 }
123
124 fn parse(&self, _path: &str, content: &str) -> ParseResult {
125 let links = extract_frontmatter_links(content);
126 let metadata = extract_frontmatter_metadata(content);
127
128 ParseResult { links, metadata }
129 }
130}
131
132fn extract_frontmatter_links(content: &str) -> Vec<String> {
136 let content = &strip_code(content);
137 let mut links = Vec::new();
138
139 if !content.starts_with("---") {
140 return links;
141 }
142
143 let rest = &content[3..];
144 let end = match rest.find("\n---") {
145 Some(idx) => idx,
146 None => return links,
147 };
148
149 let frontmatter = &rest[..end];
150
151 for line in frontmatter.lines() {
152 let line = line.trim();
153
154 let value = if let Some(stripped) = line.strip_prefix("- ") {
155 stripped.trim()
156 } else if let Some((_key, val)) = line.split_once(':') {
157 val.trim()
158 } else {
159 continue;
160 };
161
162 if value.is_empty() {
163 continue;
164 }
165
166 if value.starts_with('{')
167 || value.starts_with('[')
168 || value.starts_with('"')
169 || value.starts_with('\'')
170 {
171 continue;
172 }
173
174 if value.parse::<f64>().is_ok() {
176 continue;
177 }
178
179 if !is_link_candidate(value) {
180 continue;
181 }
182
183 links.push(value.to_string());
184 }
185
186 links
187}
188
189fn extract_frontmatter_metadata(content: &str) -> Option<serde_json::Value> {
192 let content = &strip_code(content);
193
194 if !content.starts_with("---") {
195 return None;
196 }
197
198 let rest = &content[3..];
199 let end = rest.find("\n---")?;
200 let yaml_str = &rest[..end];
201
202 if yaml_str.trim().is_empty() {
203 return None;
204 }
205
206 match serde_yml::from_str::<serde_yml::Value>(yaml_str) {
207 Ok(yaml_val) => Some(yaml_to_json(yaml_val)),
208 Err(e) => {
209 eprintln!("warn: frontmatter parser: invalid YAML: {e}");
210 None
211 }
212 }
213}
214
215fn yaml_to_json(yaml: serde_yml::Value) -> serde_json::Value {
217 match yaml {
218 serde_yml::Value::Null => serde_json::Value::Null,
219 serde_yml::Value::Bool(b) => serde_json::Value::Bool(b),
220 serde_yml::Value::Number(n) => {
221 if let Some(i) = n.as_i64() {
222 serde_json::Value::Number(i.into())
223 } else if let Some(f) = n.as_f64() {
224 serde_json::Number::from_f64(f)
225 .map(serde_json::Value::Number)
226 .unwrap_or(serde_json::Value::Null)
227 } else {
228 serde_json::Value::Null
229 }
230 }
231 serde_yml::Value::String(s) => serde_json::Value::String(s),
232 serde_yml::Value::Sequence(seq) => {
233 serde_json::Value::Array(seq.into_iter().map(yaml_to_json).collect())
234 }
235 serde_yml::Value::Mapping(map) => {
236 let obj: serde_json::Map<String, serde_json::Value> = map
237 .into_iter()
238 .filter_map(|(k, v)| {
239 let key = match k {
240 serde_yml::Value::String(s) => s,
241 other => serde_json::to_string(&yaml_to_json(other)).ok()?,
242 };
243 Some((key, yaml_to_json(v)))
244 })
245 .collect();
246 serde_json::Value::Object(obj)
247 }
248 serde_yml::Value::Tagged(tagged) => yaml_to_json(tagged.value),
249 }
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255
256 fn parse(content: &str) -> ParseResult {
257 let parser = FrontmatterParser { file_filter: None };
258 parser.parse("test.md", content)
259 }
260
261 #[test]
262 fn parser_name() {
263 let parser = FrontmatterParser { file_filter: None };
264 assert_eq!(parser.name(), "frontmatter");
265 }
266
267 #[test]
268 fn extracts_frontmatter_links() {
269 let content =
270 "---\nsources:\n - ../shared/glossary.md\n - ./prior-art.md\n---\n\n# Hello\n";
271 let result = parse(content);
272 assert_eq!(result.links.len(), 2);
273 assert_eq!(result.links[0], "../shared/glossary.md");
274 assert_eq!(result.links[1], "./prior-art.md");
275 }
276
277 #[test]
278 fn extracts_same_directory_links() {
279 let content = "---\nsources:\n - setup.md\n - config.rs\n---\n";
280 let result = parse(content);
281 assert_eq!(result.links.len(), 2);
282 assert_eq!(result.links[0], "setup.md");
283 assert_eq!(result.links[1], "config.rs");
284 }
285
286 #[test]
287 fn frontmatter_skips_non_paths() {
288 let content = "---\ntitle: My Document\nversion: 1.0\ntags:\n - rust\n - cli\n---\n";
289 let result = parse(content);
290 assert!(result.links.is_empty());
291 }
292
293 #[test]
294 fn frontmatter_skips_code_block_examples() {
295 let content = "# Doc\n\n```markdown\n---\nsources:\n - ./fake.md\n---\n```\n";
296 let result = parse(content);
297 assert!(
298 result.links.is_empty(),
299 "frontmatter inside code block should be ignored"
300 );
301 assert!(result.metadata.is_none());
302 }
303
304 #[test]
305 fn extracts_metadata() {
306 let content =
307 "---\ntitle: My Doc\nstatus: draft\ntags:\n - rust\n - cli\n---\n\n# Hello\n";
308 let result = parse(content);
309 let meta = result.metadata.unwrap();
310 assert_eq!(meta["title"], "My Doc");
311 assert_eq!(meta["status"], "draft");
312 assert_eq!(meta["tags"], serde_json::json!(["rust", "cli"]));
313 }
314
315 #[test]
316 fn no_metadata_without_frontmatter() {
317 let result = parse("# Just a heading\n");
318 assert!(result.metadata.is_none());
319 }
320
321 #[test]
322 fn metadata_handles_nested_yaml() {
323 let content = "---\ntitle: Test\nauthor:\n name: Alice\n role: dev\n---\n";
324 let result = parse(content);
325 let meta = result.metadata.unwrap();
326 assert_eq!(meta["author"]["name"], "Alice");
327 assert_eq!(meta["author"]["role"], "dev");
328 }
329
330 #[test]
331 fn no_filter_matches_everything() {
332 let parser = FrontmatterParser { file_filter: None };
333 assert!(parser.matches("index.md"));
334 assert!(parser.matches("main.rs"));
335 }
336
337 #[test]
338 fn file_filter_restricts_matching() {
339 let mut builder = globset::GlobSetBuilder::new();
340 builder.add(globset::Glob::new("*.md").unwrap());
341 let parser = FrontmatterParser {
342 file_filter: Some(builder.build().unwrap()),
343 };
344 assert!(parser.matches("index.md"));
345 assert!(!parser.matches("main.rs"));
346 }
347
348 #[test]
349 fn extracts_uris() {
350 let content = "---\nsources:\n - https://example.com\n - ./local.md\n---\n";
351 let result = parse(content);
352 assert_eq!(result.links.len(), 2);
353 assert_eq!(result.links[0], "https://example.com");
354 assert_eq!(result.links[1], "./local.md");
355 }
356
357 #[test]
358 fn skips_prose_with_spaces() {
359 let content = "---\npurpose: configuration reference\nstatus: needs review\n---\n";
360 let result = parse(content);
361 assert!(result.links.is_empty());
362 }
363
364 #[test]
365 fn skips_abbreviations_and_versions() {
366 let content = "---\nnote: e.g.\nversion: v2.0\nauthor: Dr.\n---\n";
367 let result = parse(content);
368 assert!(result.links.is_empty());
369 }
370
371 #[test]
372 fn accepts_paths_without_prefix() {
373 let content = "---\nsources:\n - config.rs\n - docs/setup.md\n---\n";
374 let result = parse(content);
375 assert_eq!(result.links.len(), 2);
376 assert_eq!(result.links[0], "config.rs");
377 assert_eq!(result.links[1], "docs/setup.md");
378 }
379
380 #[test]
381 fn emits_absolute_paths() {
382 let content = "---\nsource: /usr/local/config.toml\n---\n";
383 let result = parse(content);
384 assert_eq!(result.links.len(), 1);
385 assert_eq!(result.links[0], "/usr/local/config.toml");
386 }
387}