rumdl_lib/rules/
front_matter_utils.rs1use regex::Regex;
2use std::collections::HashMap;
3use std::sync::LazyLock;
4
5static STANDARD_FRONT_MATTER_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^---\s*$").unwrap());
7static STANDARD_FRONT_MATTER_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^---\s*$").unwrap());
8
9static TOML_FRONT_MATTER_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\+\+\+\s*$").unwrap());
11static TOML_FRONT_MATTER_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\+\+\+\s*$").unwrap());
12
13static JSON_FRONT_MATTER_START: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\{\s*$").unwrap());
15static JSON_FRONT_MATTER_END: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^\}\s*$").unwrap());
16
17static MALFORMED_FRONT_MATTER_START1: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^- --\s*$").unwrap());
19static MALFORMED_FRONT_MATTER_END1: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^- --\s*$").unwrap());
20
21static MALFORMED_FRONT_MATTER_START2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^-- -\s*$").unwrap());
23static MALFORMED_FRONT_MATTER_END2: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^-- -\s*$").unwrap());
24
25static FRONT_MATTER_FIELD: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^([^:]+):\s*(.*)$").unwrap());
27
28static TOML_FIELD_PATTERN: LazyLock<Regex> = LazyLock::new(|| Regex::new(r#"^([^=]+)\s*=\s*"?([^"]*)"?$"#).unwrap());
30
31#[derive(Debug, PartialEq, Eq, Clone, Copy)]
33pub enum FrontMatterType {
34 Yaml,
36 Toml,
38 Json,
40 Malformed,
42 None,
44}
45
46pub struct FrontMatterUtils;
48
49impl FrontMatterUtils {
50 pub fn has_front_matter_field(content: &str, field_prefix: &str) -> bool {
52 let field_name = field_prefix.trim_end_matches(':');
53 Self::get_front_matter_field_value(content, field_name).is_some()
54 }
55
56 pub fn get_front_matter_field_value<'a>(content: &'a str, field_name: &str) -> Option<&'a str> {
58 let lines: Vec<&'a str> = content.lines().collect();
59 if lines.len() < 3 {
60 return None;
61 }
62
63 let front_matter_type = Self::detect_front_matter_type(content);
64 if front_matter_type == FrontMatterType::None {
65 return None;
66 }
67
68 let front_matter = Self::extract_front_matter(content);
69 for line in front_matter {
70 let line = line.trim();
71 match front_matter_type {
72 FrontMatterType::Toml => {
73 if let Some(captures) = TOML_FIELD_PATTERN.captures(line) {
75 let key = captures.get(1).unwrap().as_str().trim();
76 if key == field_name {
77 let value = captures.get(2).unwrap().as_str();
78 return Some(value);
79 }
80 }
81 }
82 _ => {
83 if let Some(captures) = FRONT_MATTER_FIELD.captures(line) {
85 let mut key = captures.get(1).unwrap().as_str().trim();
86
87 if key.starts_with('"') && key.ends_with('"') && key.len() >= 2 {
89 key = &key[1..key.len() - 1];
90 }
91
92 if key == field_name {
93 let value = captures.get(2).unwrap().as_str().trim();
94 if value.starts_with('"') && value.ends_with('"') && value.len() >= 2 {
96 return Some(&value[1..value.len() - 1]);
97 }
98 return Some(value);
99 }
100 }
101 }
102 }
103 }
104
105 None
106 }
107
108 pub fn extract_front_matter_fields(content: &str) -> HashMap<String, String> {
110 let mut fields = HashMap::new();
111
112 let front_matter_type = Self::detect_front_matter_type(content);
113 if front_matter_type == FrontMatterType::None {
114 return fields;
115 }
116
117 let front_matter = Self::extract_front_matter(content);
118 let mut current_prefix = String::new();
119 let mut indent_level = 0;
120
121 for line in front_matter {
122 let line_indent = line.chars().take_while(|c| c.is_whitespace()).count();
123 let line = line.trim();
124
125 match line_indent.cmp(&indent_level) {
127 std::cmp::Ordering::Greater => {
128 indent_level = line_indent;
130 }
131 std::cmp::Ordering::Less => {
132 indent_level = line_indent;
134 if let Some(last_dot) = current_prefix.rfind('.') {
136 current_prefix.truncate(last_dot);
137 } else {
138 current_prefix.clear();
139 }
140 }
141 std::cmp::Ordering::Equal => {}
142 }
143
144 match front_matter_type {
145 FrontMatterType::Toml => {
146 if let Some(captures) = TOML_FIELD_PATTERN.captures(line) {
148 let key = captures.get(1).unwrap().as_str().trim();
149 let value = captures.get(2).unwrap().as_str();
150 let full_key = if current_prefix.is_empty() {
151 key.to_string()
152 } else {
153 format!("{current_prefix}.{key}")
154 };
155 fields.insert(full_key, value.to_string());
156 }
157 }
158 _ => {
159 if let Some(captures) = FRONT_MATTER_FIELD.captures(line) {
161 let mut key = captures.get(1).unwrap().as_str().trim();
162 let value = captures.get(2).unwrap().as_str().trim();
163
164 if key.starts_with('"') && key.ends_with('"') && key.len() >= 2 {
166 key = &key[1..key.len() - 1];
167 }
168
169 if let Some(stripped) = key.strip_suffix(':') {
170 if current_prefix.is_empty() {
172 current_prefix = stripped.to_string();
173 } else {
174 current_prefix = format!("{current_prefix}.{stripped}");
175 }
176 } else {
177 let full_key = if current_prefix.is_empty() {
179 key.to_string()
180 } else {
181 format!("{current_prefix}.{key}")
182 };
183 let value = value
185 .strip_prefix('"')
186 .and_then(|v| v.strip_suffix('"'))
187 .unwrap_or(value);
188 fields.insert(full_key, value.to_string());
189 }
190 }
191 }
192 }
193 }
194
195 fields
196 }
197
198 pub fn extract_front_matter<'a>(content: &'a str) -> Vec<&'a str> {
200 let lines: Vec<&'a str> = content.lines().collect();
201 if lines.len() < 3 {
202 return Vec::new();
203 }
204
205 let front_matter_type = Self::detect_front_matter_type(content);
206 if front_matter_type == FrontMatterType::None {
207 return Vec::new();
208 }
209
210 let mut front_matter = Vec::new();
211 let mut in_front_matter = false;
212
213 for (i, line) in lines.iter().enumerate() {
214 match front_matter_type {
215 FrontMatterType::Yaml => {
216 if i == 0 && STANDARD_FRONT_MATTER_START.is_match(line) {
217 in_front_matter = true;
218 continue;
219 } else if STANDARD_FRONT_MATTER_END.is_match(line) && in_front_matter && i > 0 {
220 break;
221 }
222 }
223 FrontMatterType::Toml => {
224 if i == 0 && TOML_FRONT_MATTER_START.is_match(line) {
225 in_front_matter = true;
226 continue;
227 } else if TOML_FRONT_MATTER_END.is_match(line) && in_front_matter && i > 0 {
228 break;
229 }
230 }
231 FrontMatterType::Json => {
232 if i == 0 && JSON_FRONT_MATTER_START.is_match(line) {
233 in_front_matter = true;
234 continue;
235 } else if JSON_FRONT_MATTER_END.is_match(line) && in_front_matter && i > 0 {
236 break;
237 }
238 }
239 FrontMatterType::Malformed => {
240 if i == 0
241 && (MALFORMED_FRONT_MATTER_START1.is_match(line)
242 || MALFORMED_FRONT_MATTER_START2.is_match(line))
243 {
244 in_front_matter = true;
245 continue;
246 } else if (MALFORMED_FRONT_MATTER_END1.is_match(line) || MALFORMED_FRONT_MATTER_END2.is_match(line))
247 && in_front_matter
248 && i > 0
249 {
250 break;
251 }
252 }
253 FrontMatterType::None => break,
254 }
255
256 if in_front_matter {
257 front_matter.push(*line);
258 }
259 }
260
261 front_matter
262 }
263
264 pub fn detect_front_matter_type(content: &str) -> FrontMatterType {
266 let lines: Vec<&str> = content.lines().collect();
267 if lines.is_empty() {
268 return FrontMatterType::None;
269 }
270
271 let first_line = lines[0];
272
273 if STANDARD_FRONT_MATTER_START.is_match(first_line) {
274 for line in lines.iter().skip(1) {
276 if STANDARD_FRONT_MATTER_END.is_match(line) {
277 return FrontMatterType::Yaml;
278 }
279 }
280 } else if TOML_FRONT_MATTER_START.is_match(first_line) {
281 for line in lines.iter().skip(1) {
283 if TOML_FRONT_MATTER_END.is_match(line) {
284 return FrontMatterType::Toml;
285 }
286 }
287 } else if JSON_FRONT_MATTER_START.is_match(first_line) {
288 for line in lines.iter().skip(1) {
290 if JSON_FRONT_MATTER_END.is_match(line) {
291 return FrontMatterType::Json;
292 }
293 }
294 } else if MALFORMED_FRONT_MATTER_START1.is_match(first_line)
295 || MALFORMED_FRONT_MATTER_START2.is_match(first_line)
296 {
297 for line in lines.iter().skip(1) {
299 if MALFORMED_FRONT_MATTER_END1.is_match(line) || MALFORMED_FRONT_MATTER_END2.is_match(line) {
300 return FrontMatterType::Malformed;
301 }
302 }
303 }
304
305 FrontMatterType::None
306 }
307
308 pub fn get_front_matter_end_line(content: &str) -> usize {
310 let lines: Vec<&str> = content.lines().collect();
311 if lines.len() < 3 {
312 return 0;
313 }
314
315 let front_matter_type = Self::detect_front_matter_type(content);
316 if front_matter_type == FrontMatterType::None {
317 return 0;
318 }
319
320 let mut in_front_matter = false;
321
322 for (i, line) in lines.iter().enumerate() {
323 match front_matter_type {
324 FrontMatterType::Yaml => {
325 if i == 0 && STANDARD_FRONT_MATTER_START.is_match(line) {
326 in_front_matter = true;
327 } else if STANDARD_FRONT_MATTER_END.is_match(line) && in_front_matter && i > 0 {
328 return i + 1;
329 }
330 }
331 FrontMatterType::Toml => {
332 if i == 0 && TOML_FRONT_MATTER_START.is_match(line) {
333 in_front_matter = true;
334 } else if TOML_FRONT_MATTER_END.is_match(line) && in_front_matter && i > 0 {
335 return i + 1;
336 }
337 }
338 FrontMatterType::Json => {
339 if i == 0 && JSON_FRONT_MATTER_START.is_match(line) {
340 in_front_matter = true;
341 } else if JSON_FRONT_MATTER_END.is_match(line) && in_front_matter && i > 0 {
342 return i + 1;
343 }
344 }
345 FrontMatterType::Malformed => {
346 if i == 0
347 && (MALFORMED_FRONT_MATTER_START1.is_match(line)
348 || MALFORMED_FRONT_MATTER_START2.is_match(line))
349 {
350 in_front_matter = true;
351 } else if (MALFORMED_FRONT_MATTER_END1.is_match(line) || MALFORMED_FRONT_MATTER_END2.is_match(line))
352 && in_front_matter
353 && i > 0
354 {
355 return i + 1;
356 }
357 }
358 FrontMatterType::None => return 0,
359 }
360 }
361
362 0
363 }
364}
365
366#[cfg(test)]
367mod tests {
368 use super::*;
369
370 #[test]
371 fn test_front_matter_type_enum() {
372 assert_eq!(FrontMatterType::Yaml, FrontMatterType::Yaml);
373 assert_eq!(FrontMatterType::Toml, FrontMatterType::Toml);
374 assert_eq!(FrontMatterType::Json, FrontMatterType::Json);
375 assert_eq!(FrontMatterType::Malformed, FrontMatterType::Malformed);
376 assert_eq!(FrontMatterType::None, FrontMatterType::None);
377 assert_ne!(FrontMatterType::Yaml, FrontMatterType::Toml);
378 }
379
380 #[test]
381 fn test_detect_front_matter_type() {
382 let yaml_content = "---\ntitle: Test\n---\nContent";
384 assert_eq!(
385 FrontMatterUtils::detect_front_matter_type(yaml_content),
386 FrontMatterType::Yaml
387 );
388
389 let toml_content = "+++\ntitle = \"Test\"\n+++\nContent";
391 assert_eq!(
392 FrontMatterUtils::detect_front_matter_type(toml_content),
393 FrontMatterType::Toml
394 );
395
396 let json_content = "{\n\"title\": \"Test\"\n}\nContent";
398 assert_eq!(
399 FrontMatterUtils::detect_front_matter_type(json_content),
400 FrontMatterType::Json
401 );
402
403 let malformed1 = "- --\ntitle: Test\n- --\nContent";
405 assert_eq!(
406 FrontMatterUtils::detect_front_matter_type(malformed1),
407 FrontMatterType::Malformed
408 );
409
410 let malformed2 = "-- -\ntitle: Test\n-- -\nContent";
411 assert_eq!(
412 FrontMatterUtils::detect_front_matter_type(malformed2),
413 FrontMatterType::Malformed
414 );
415
416 assert_eq!(
418 FrontMatterUtils::detect_front_matter_type("# Regular content"),
419 FrontMatterType::None
420 );
421 assert_eq!(FrontMatterUtils::detect_front_matter_type(""), FrontMatterType::None);
422
423 assert_eq!(
425 FrontMatterUtils::detect_front_matter_type("---\ntitle: Test"),
426 FrontMatterType::None
427 );
428 }
429
430 #[test]
431 fn test_extract_front_matter() {
432 let content = "---\ntitle: Test\nauthor: Me\n---\nContent";
433 let front_matter = FrontMatterUtils::extract_front_matter(content);
434
435 assert_eq!(front_matter.len(), 2);
436 assert_eq!(front_matter[0], "title: Test");
437 assert_eq!(front_matter[1], "author: Me");
438
439 let no_fm = FrontMatterUtils::extract_front_matter("Regular content");
441 assert!(no_fm.is_empty());
442
443 let short = FrontMatterUtils::extract_front_matter("---\n---");
445 assert!(short.is_empty());
446 }
447
448 #[test]
449 fn test_has_front_matter_field() {
450 let content = "---\ntitle: Test\nauthor: Me\n---\nContent";
451
452 assert!(FrontMatterUtils::has_front_matter_field(content, "title"));
453 assert!(FrontMatterUtils::has_front_matter_field(content, "author"));
454 assert!(!FrontMatterUtils::has_front_matter_field(content, "date"));
455
456 assert!(!FrontMatterUtils::has_front_matter_field("Regular content", "title"));
458
459 assert!(!FrontMatterUtils::has_front_matter_field("--", "title"));
461 }
462
463 #[test]
464 fn test_get_front_matter_field_value() {
465 let yaml_content = "---\ntitle: Test Title\nauthor: \"John Doe\"\n---\nContent";
467 assert_eq!(
468 FrontMatterUtils::get_front_matter_field_value(yaml_content, "title"),
469 Some("Test Title")
470 );
471 assert_eq!(
472 FrontMatterUtils::get_front_matter_field_value(yaml_content, "author"),
473 Some("John Doe")
474 );
475 assert_eq!(
476 FrontMatterUtils::get_front_matter_field_value(yaml_content, "nonexistent"),
477 None
478 );
479
480 let toml_content = "+++\ntitle = \"Test Title\"\nauthor = \"John Doe\"\n+++\nContent";
482 assert_eq!(
483 FrontMatterUtils::get_front_matter_field_value(toml_content, "title"),
484 Some("Test Title")
485 );
486 assert_eq!(
487 FrontMatterUtils::get_front_matter_field_value(toml_content, "author"),
488 Some("John Doe")
489 );
490
491 let json_style_yaml = "---\n\"title\": \"Test Title\"\n---\nContent";
493 assert_eq!(
494 FrontMatterUtils::get_front_matter_field_value(json_style_yaml, "title"),
495 Some("Test Title")
496 );
497
498 let json_fm = "{\n\"title\": \"Test Title\"\n}\nContent";
500 assert_eq!(
501 FrontMatterUtils::get_front_matter_field_value(json_fm, "title"),
502 Some("Test Title")
503 );
504
505 assert_eq!(
507 FrontMatterUtils::get_front_matter_field_value("Regular content", "title"),
508 None
509 );
510
511 assert_eq!(FrontMatterUtils::get_front_matter_field_value("--", "title"), None);
513 }
514
515 #[test]
516 fn test_extract_front_matter_fields() {
517 let yaml_content = "---\ntitle: Test\nauthor: Me\n---\nContent";
519 let fields = FrontMatterUtils::extract_front_matter_fields(yaml_content);
520
521 assert_eq!(fields.get("title"), Some(&"Test".to_string()));
522 assert_eq!(fields.get("author"), Some(&"Me".to_string()));
523
524 let toml_content = "+++\ntitle = \"Test\"\nauthor = \"Me\"\n+++\nContent";
526 let toml_fields = FrontMatterUtils::extract_front_matter_fields(toml_content);
527
528 assert_eq!(toml_fields.get("title"), Some(&"Test".to_string()));
529 assert_eq!(toml_fields.get("author"), Some(&"Me".to_string()));
530
531 let no_fields = FrontMatterUtils::extract_front_matter_fields("Regular content");
533 assert!(no_fields.is_empty());
534 }
535
536 #[test]
537 fn test_get_front_matter_end_line() {
538 let content = "---\ntitle: Test\n---\nContent";
539 assert_eq!(FrontMatterUtils::get_front_matter_end_line(content), 3);
540
541 let toml_content = "+++\ntitle = \"Test\"\n+++\nContent";
543 assert_eq!(FrontMatterUtils::get_front_matter_end_line(toml_content), 3);
544
545 assert_eq!(FrontMatterUtils::get_front_matter_end_line("Regular content"), 0);
547
548 assert_eq!(FrontMatterUtils::get_front_matter_end_line("--"), 0);
550 }
551
552 #[test]
553 fn test_nested_yaml_fields() {
554 let content = "---
555title: Test
556author:
557 name: John Doe
558 email: john@example.com
559---
560Content";
561
562 let fields = FrontMatterUtils::extract_front_matter_fields(content);
563
564 assert!(fields.contains_key("title"));
567 }
569
570 #[test]
571 fn test_edge_cases() {
572 assert_eq!(FrontMatterUtils::detect_front_matter_type(""), FrontMatterType::None);
574 assert!(FrontMatterUtils::extract_front_matter("").is_empty());
575 assert_eq!(FrontMatterUtils::get_front_matter_end_line(""), 0);
576
577 let only_delim = "---\n---";
579 assert!(FrontMatterUtils::extract_front_matter(only_delim).is_empty());
580
581 let multiple = "---\ntitle: First\n---\n---\ntitle: Second\n---";
583 let fm_type = FrontMatterUtils::detect_front_matter_type(multiple);
584 assert_eq!(fm_type, FrontMatterType::Yaml);
585 let fields = FrontMatterUtils::extract_front_matter_fields(multiple);
586 assert_eq!(fields.get("title"), Some(&"First".to_string()));
587 }
588
589 #[test]
590 fn test_unicode_content() {
591 let content = "---\ntitle: 你好世界\nauthor: José\n---\nContent";
592
593 assert_eq!(
594 FrontMatterUtils::detect_front_matter_type(content),
595 FrontMatterType::Yaml
596 );
597 assert_eq!(
598 FrontMatterUtils::get_front_matter_field_value(content, "title"),
599 Some("你好世界")
600 );
601 assert_eq!(
602 FrontMatterUtils::get_front_matter_field_value(content, "author"),
603 Some("José")
604 );
605 }
606}