use once_cell::sync::Lazy;
use regex::Regex;
#[derive(Debug, Clone, PartialEq)]
pub enum FrontmatterFormat {
Yaml,
Toml,
}
#[derive(Debug, Clone)]
pub struct Frontmatter {
pub format: FrontmatterFormat,
pub content: String,
}
static YAML_FRONTMATTER: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^---\s*\n([\s\S]*?)\n---\s*\n").unwrap()
});
static TOML_FRONTMATTER: Lazy<Regex> = Lazy::new(|| {
Regex::new(r"^\+\+\+\s*\n([\s\S]*?)\n\+\+\+\s*\n").unwrap()
});
pub fn extract_frontmatter(input: &str) -> (Option<Frontmatter>, String) {
if let Some(caps) = YAML_FRONTMATTER.captures(input) {
let fm_content = caps.get(1).map_or("", |m| m.as_str());
let remaining = YAML_FRONTMATTER.replace(input, "").to_string();
return (
Some(Frontmatter {
format: FrontmatterFormat::Yaml,
content: fm_content.to_string(),
}),
remaining,
);
}
if let Some(caps) = TOML_FRONTMATTER.captures(input) {
let fm_content = caps.get(1).map_or("", |m| m.as_str());
let remaining = TOML_FRONTMATTER.replace(input, "").to_string();
return (
Some(Frontmatter {
format: FrontmatterFormat::Toml,
content: fm_content.to_string(),
}),
remaining,
);
}
(None, input.to_string())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_yaml_frontmatter() {
let input = "---\ntitle: Test\nauthor: John\n---\n\n# Content";
let (fm, content) = extract_frontmatter(input);
assert!(fm.is_some());
let fm = fm.unwrap();
assert_eq!(fm.format, FrontmatterFormat::Yaml);
assert!(fm.content.contains("title: Test"));
assert!(content.contains("# Content"));
assert!(!content.contains("---"));
}
#[test]
fn test_toml_frontmatter() {
let input = "+++\ntitle = \"Test\"\nauthor = \"John\"\n+++\n\n# Content";
let (fm, content) = extract_frontmatter(input);
assert!(fm.is_some());
let fm = fm.unwrap();
assert_eq!(fm.format, FrontmatterFormat::Toml);
assert!(fm.content.contains("title = \"Test\""));
assert!(content.contains("# Content"));
assert!(!content.contains("+++"));
}
#[test]
fn test_no_frontmatter() {
let input = "# Just a heading\n\nSome content";
let (fm, content) = extract_frontmatter(input);
assert!(fm.is_none());
assert_eq!(content, input);
}
#[test]
fn test_yaml_with_complex_content() {
let input = "---\ntitle: Complex\ntags:\n - rust\n - wiki\ndate: 2024-01-01\n---\n\n**Bold** text";
let (fm, content) = extract_frontmatter(input);
assert!(fm.is_some());
let fm = fm.unwrap();
assert!(fm.content.contains("tags:"));
assert!(content.contains("**Bold**"));
}
#[test]
fn test_frontmatter_must_be_at_start() {
let input = "Some text\n---\ntitle: Test\n---\n\nMore content";
let (fm, content) = extract_frontmatter(input);
assert!(fm.is_none());
assert_eq!(content, input);
}
}