drft/parsers/
frontmatter.rs1use super::{ParseResult, Parser};
2
3fn is_link_candidate(value: &str) -> bool {
5 if crate::graph::is_uri(value) {
7 return true;
8 }
9 if value.starts_with("./") || value.starts_with("../") || value.starts_with('/') {
12 return true;
13 }
14 if value.contains(' ') {
16 return false;
17 }
18 let basename = value.rsplit('/').next().unwrap_or(value);
21 if let Some(dot_pos) = basename.rfind('.') {
22 let ext = &basename[dot_pos + 1..];
23 !ext.is_empty()
24 && ext.len() <= 6
25 && ext.chars().all(|c| c.is_ascii_alphanumeric())
26 && !ext.chars().all(|c| c.is_ascii_digit())
27 } else {
28 false
29 }
30}
31
32fn strip_code(content: &str) -> String {
35 let mut result = String::with_capacity(content.len());
37 let mut in_code_block = false;
38 let mut fence_marker = "";
39
40 for line in content.lines() {
41 let trimmed = line.trim_start();
42 if !in_code_block {
43 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
44 in_code_block = true;
45 fence_marker = if trimmed.starts_with("```") {
46 "```"
47 } else {
48 "~~~"
49 };
50 result.push_str(&" ".repeat(line.len()));
51 } else {
52 result.push_str(line);
53 }
54 } else if trimmed.starts_with(fence_marker) && trimmed.trim() == fence_marker {
55 in_code_block = false;
56 result.push_str(&" ".repeat(line.len()));
57 } else {
58 result.push_str(&" ".repeat(line.len()));
59 }
60 result.push('\n');
61 }
62
63 let mut cleaned = String::with_capacity(result.len());
65 let chars: Vec<char> = result.chars().collect();
66 let mut i = 0;
67 while i < chars.len() {
68 if chars[i] == '`' {
69 let mut ticks = 0;
71 while i + ticks < chars.len() && chars[i + ticks] == '`' {
72 ticks += 1;
73 }
74 let after = i + ticks;
76 let mut found = None;
77 let mut j = after;
78 while j + ticks <= chars.len() {
79 if chars[j..j + ticks].iter().all(|c| *c == '`') {
80 found = Some(j);
81 break;
82 }
83 j += 1;
84 }
85 if let Some(close_start) = found {
86 let total = close_start + ticks - i;
88 for _ in 0..total {
89 cleaned.push(' ');
90 }
91 i += total;
92 } else {
93 cleaned.push(chars[i]);
95 i += 1;
96 }
97 } else {
98 cleaned.push(chars[i]);
99 i += 1;
100 }
101 }
102
103 cleaned
104}
105
106pub struct FrontmatterParser {
108 pub file_filter: Option<globset::GlobSet>,
110}
111
112impl Parser for FrontmatterParser {
113 fn name(&self) -> &str {
114 "frontmatter"
115 }
116
117 fn matches(&self, path: &str) -> bool {
118 match &self.file_filter {
119 Some(set) => set.is_match(path),
120 None => true,
121 }
122 }
123
124 fn parse(&self, _path: &str, content: &str) -> ParseResult {
125 let links = extract_frontmatter_links(content);
126 let metadata = extract_frontmatter_metadata(content);
127
128 ParseResult { links, metadata }
129 }
130}
131
132fn extract_frontmatter_links(content: &str) -> Vec<String> {
136 let content = &strip_code(content);
137
138 if !content.starts_with("---") {
139 return Vec::new();
140 }
141
142 let rest = &content[3..];
143 let end = match rest.find("\n---") {
144 Some(idx) => idx,
145 None => return Vec::new(),
146 };
147
148 let yaml_str = &rest[..end];
149 if yaml_str.trim().is_empty() {
150 return Vec::new();
151 }
152
153 let yaml: serde_yml::Value = match serde_yml::from_str(yaml_str) {
154 Ok(v) => v,
155 Err(e) => {
156 eprintln!("warn: frontmatter parser: invalid YAML: {e}");
157 return Vec::new();
158 }
159 };
160
161 let mut links = Vec::new();
162 collect_string_leaves(&yaml, &mut links);
163 links.retain(|v| is_link_candidate(v));
164 links
165}
166
167fn collect_string_leaves(value: &serde_yml::Value, out: &mut Vec<String>) {
170 match value {
171 serde_yml::Value::String(s) => out.push(s.clone()),
172 serde_yml::Value::Sequence(seq) => {
173 for item in seq {
174 collect_string_leaves(item, out);
175 }
176 }
177 serde_yml::Value::Mapping(map) => {
178 for (_key, val) in map {
179 collect_string_leaves(val, out);
180 }
181 }
182 serde_yml::Value::Tagged(tagged) => collect_string_leaves(&tagged.value, out),
183 _ => {}
184 }
185}
186
187fn extract_frontmatter_metadata(content: &str) -> Option<serde_json::Value> {
190 let content = &strip_code(content);
191
192 if !content.starts_with("---") {
193 return None;
194 }
195
196 let rest = &content[3..];
197 let end = rest.find("\n---")?;
198 let yaml_str = &rest[..end];
199
200 if yaml_str.trim().is_empty() {
201 return None;
202 }
203
204 match serde_yml::from_str::<serde_yml::Value>(yaml_str) {
205 Ok(yaml_val) => Some(yaml_to_json(yaml_val)),
206 Err(e) => {
207 eprintln!("warn: frontmatter parser: invalid YAML: {e}");
208 None
209 }
210 }
211}
212
213fn yaml_to_json(yaml: serde_yml::Value) -> serde_json::Value {
215 match yaml {
216 serde_yml::Value::Null => serde_json::Value::Null,
217 serde_yml::Value::Bool(b) => serde_json::Value::Bool(b),
218 serde_yml::Value::Number(n) => {
219 if let Some(i) = n.as_i64() {
220 serde_json::Value::Number(i.into())
221 } else if let Some(f) = n.as_f64() {
222 serde_json::Number::from_f64(f)
223 .map(serde_json::Value::Number)
224 .unwrap_or(serde_json::Value::Null)
225 } else {
226 serde_json::Value::Null
227 }
228 }
229 serde_yml::Value::String(s) => serde_json::Value::String(s),
230 serde_yml::Value::Sequence(seq) => {
231 serde_json::Value::Array(seq.into_iter().map(yaml_to_json).collect())
232 }
233 serde_yml::Value::Mapping(map) => {
234 let obj: serde_json::Map<String, serde_json::Value> = map
235 .into_iter()
236 .filter_map(|(k, v)| {
237 let key = match k {
238 serde_yml::Value::String(s) => s,
239 other => serde_json::to_string(&yaml_to_json(other)).ok()?,
240 };
241 Some((key, yaml_to_json(v)))
242 })
243 .collect();
244 serde_json::Value::Object(obj)
245 }
246 serde_yml::Value::Tagged(tagged) => yaml_to_json(tagged.value),
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use super::*;
253
254 fn parse(content: &str) -> ParseResult {
255 let parser = FrontmatterParser { file_filter: None };
256 parser.parse("test.md", content)
257 }
258
259 #[test]
260 fn parser_name() {
261 let parser = FrontmatterParser { file_filter: None };
262 assert_eq!(parser.name(), "frontmatter");
263 }
264
265 #[test]
266 fn extracts_frontmatter_links() {
267 let content =
268 "---\nsources:\n - ../shared/glossary.md\n - ./prior-art.md\n---\n\n# Hello\n";
269 let result = parse(content);
270 assert_eq!(result.links.len(), 2);
271 assert_eq!(result.links[0], "../shared/glossary.md");
272 assert_eq!(result.links[1], "./prior-art.md");
273 }
274
275 #[test]
276 fn extracts_same_directory_links() {
277 let content = "---\nsources:\n - setup.md\n - config.rs\n---\n";
278 let result = parse(content);
279 assert_eq!(result.links.len(), 2);
280 assert_eq!(result.links[0], "setup.md");
281 assert_eq!(result.links[1], "config.rs");
282 }
283
284 #[test]
285 fn frontmatter_skips_non_paths() {
286 let content = "---\ntitle: My Document\nversion: 1.0\ntags:\n - rust\n - cli\n---\n";
287 let result = parse(content);
288 assert!(result.links.is_empty());
289 }
290
291 #[test]
292 fn frontmatter_skips_code_block_examples() {
293 let content = "# Doc\n\n```markdown\n---\nsources:\n - ./fake.md\n---\n```\n";
294 let result = parse(content);
295 assert!(
296 result.links.is_empty(),
297 "frontmatter inside code block should be ignored"
298 );
299 assert!(result.metadata.is_none());
300 }
301
302 #[test]
303 fn extracts_metadata() {
304 let content =
305 "---\ntitle: My Doc\nstatus: draft\ntags:\n - rust\n - cli\n---\n\n# Hello\n";
306 let result = parse(content);
307 let meta = result.metadata.unwrap();
308 assert_eq!(meta["title"], "My Doc");
309 assert_eq!(meta["status"], "draft");
310 assert_eq!(meta["tags"], serde_json::json!(["rust", "cli"]));
311 }
312
313 #[test]
314 fn no_metadata_without_frontmatter() {
315 let result = parse("# Just a heading\n");
316 assert!(result.metadata.is_none());
317 }
318
319 #[test]
320 fn metadata_handles_nested_yaml() {
321 let content = "---\ntitle: Test\nauthor:\n name: Alice\n role: dev\n---\n";
322 let result = parse(content);
323 let meta = result.metadata.unwrap();
324 assert_eq!(meta["author"]["name"], "Alice");
325 assert_eq!(meta["author"]["role"], "dev");
326 }
327
328 #[test]
329 fn no_filter_matches_everything() {
330 let parser = FrontmatterParser { file_filter: None };
331 assert!(parser.matches("index.md"));
332 assert!(parser.matches("main.rs"));
333 }
334
335 #[test]
336 fn file_filter_restricts_matching() {
337 let mut builder = globset::GlobSetBuilder::new();
338 builder.add(globset::Glob::new("*.md").unwrap());
339 let parser = FrontmatterParser {
340 file_filter: Some(builder.build().unwrap()),
341 };
342 assert!(parser.matches("index.md"));
343 assert!(!parser.matches("main.rs"));
344 }
345
346 #[test]
347 fn extracts_uris() {
348 let content = "---\nsources:\n - https://example.com\n - ./local.md\n---\n";
349 let result = parse(content);
350 assert_eq!(result.links.len(), 2);
351 assert_eq!(result.links[0], "https://example.com");
352 assert_eq!(result.links[1], "./local.md");
353 }
354
355 #[test]
356 fn skips_prose_with_spaces() {
357 let content = "---\npurpose: configuration reference\nstatus: needs review\n---\n";
358 let result = parse(content);
359 assert!(result.links.is_empty());
360 }
361
362 #[test]
363 fn skips_abbreviations_and_versions() {
364 let content = "---\nnote: e.g.\nversion: v2.0\nauthor: Dr.\n---\n";
365 let result = parse(content);
366 assert!(result.links.is_empty());
367 }
368
369 #[test]
370 fn accepts_paths_without_prefix() {
371 let content = "---\nsources:\n - config.rs\n - docs/setup.md\n---\n";
372 let result = parse(content);
373 assert_eq!(result.links.len(), 2);
374 assert_eq!(result.links[0], "config.rs");
375 assert_eq!(result.links[1], "docs/setup.md");
376 }
377
378 #[test]
379 fn emits_absolute_paths() {
380 let content = "---\nsource: /usr/local/config.toml\n---\n";
381 let result = parse(content);
382 assert_eq!(result.links.len(), 1);
383 assert_eq!(result.links[0], "/usr/local/config.toml");
384 }
385
386 #[test]
387 fn yaml_list_values_not_parsed_as_uris() {
388 let content = "---\ntags:\n - name: foo bar bazz\n - status: draft\n---\n";
391 let result = parse(content);
392 assert!(result.links.is_empty());
393 }
394}