use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
use crate::config::{ContentDirConfig, SortBy, default_toml_table};
pub(crate) fn page_url_path(parent_dir: &str, slug: &str) -> String {
if parent_dir.is_empty() {
format!("/{slug}/")
} else {
format!("/{parent_dir}/{slug}/")
}
}
pub(crate) fn section_url_path(dir: &str) -> String {
if dir.is_empty() {
"/".to_string()
} else {
format!("/{dir}/")
}
}
pub(crate) fn parent_dir(relative_path: &str) -> String {
Path::new(relative_path)
.parent()
.unwrap_or(Path::new(""))
.to_string_lossy()
.to_string()
}
pub(crate) fn section_key_for(relative_path: &str) -> String {
let p = Path::new(relative_path);
let is_colocated = p.file_name().is_some_and(|f| f == "index.md");
let dir = if is_colocated {
p.parent()
.and_then(|d| d.parent())
.unwrap_or(Path::new(""))
.to_string_lossy()
.to_string()
} else {
parent_dir(relative_path)
};
if dir.is_empty() {
"_index.md".to_string()
} else {
format!("{dir}/_index.md")
}
}
#[derive(Debug, Deserialize)]
pub struct Frontmatter {
pub title: Option<String>,
pub date: Option<toml::Value>,
pub author: Option<String>,
pub description: Option<String>,
#[serde(default)]
pub draft: bool,
pub slug: Option<String>,
pub template: Option<String>,
#[serde(default)]
pub aliases: Vec<String>,
pub sort_by: Option<SortBy>,
pub paginate_by: Option<usize>,
#[serde(default = "default_toml_table")]
pub extra: toml::Value,
#[serde(flatten)]
pub rest: HashMap<String, toml::Value>,
}
impl Default for Frontmatter {
fn default() -> Self {
Self {
title: None,
date: None,
author: None,
description: None,
draft: false,
slug: None,
template: None,
aliases: Vec::new(),
sort_by: None,
paginate_by: None,
extra: default_toml_table(),
rest: HashMap::new(),
}
}
}
#[derive(Debug, Clone, Serialize)]
pub struct Page {
pub title: String,
pub date: Option<String>,
pub author: Option<String>,
pub description: Option<String>,
pub draft: bool,
pub slug: String,
pub template: Option<String>,
pub path: String,
pub permalink: String,
pub content: String,
pub summary: Option<String>,
pub raw_content: String,
pub taxonomies: HashMap<String, Vec<String>>,
pub extra: serde_json::Value,
pub aliases: Vec<String>,
pub word_count: usize,
pub reading_time: usize,
pub relative_path: String,
}
#[derive(Debug, Clone, Serialize)]
pub struct Section {
pub title: String,
pub description: Option<String>,
pub path: String,
pub permalink: String,
pub content: String,
pub raw_content: String,
pub pages: Vec<Page>,
pub sort_by: Option<SortBy>,
pub paginate_by: Option<usize>,
pub template: Option<String>,
pub extra: serde_json::Value,
pub relative_path: String,
}
pub fn parse_frontmatter(content: &str) -> anyhow::Result<(Frontmatter, String)> {
let content = content.trim_start_matches('\u{feff}'); if !content.starts_with("+++") {
return Ok((Frontmatter::default(), content.to_string()));
}
let rest = &content[3..];
let end = rest
.find("\n+++")
.ok_or_else(|| anyhow::anyhow!("Unclosed frontmatter"))?;
let frontmatter_str = &rest[..end];
let body = &rest[end + 4..]; let body = body.strip_prefix('\n').unwrap_or(body);
let fm: Frontmatter = toml::from_str(frontmatter_str)?;
Ok((fm, body.to_string()))
}
fn value_to_date_string(v: &toml::Value) -> String {
match v {
toml::Value::Datetime(dt) => dt.to_string(),
toml::Value::String(s) => s.clone(),
toml::Value::Integer(i) => i.to_string(),
_ => v.to_string(),
}
}
pub fn build_page(
fm: Frontmatter,
raw_content: String,
relative_path: &str,
base_url: &str,
) -> Page {
let title = fm.title.unwrap_or_default();
let p = Path::new(relative_path);
let is_colocated = p.file_name().is_some_and(|f| f == "index.md");
let slug = fm.slug.unwrap_or_else(|| {
if is_colocated {
let dir_name = p
.parent()
.and_then(|d| d.file_name())
.unwrap_or_default()
.to_string_lossy()
.to_string();
slug::slugify(&dir_name)
} else {
let filename = p
.file_stem()
.unwrap_or_default()
.to_string_lossy()
.to_string();
slug::slugify(&filename)
}
});
let parent = if is_colocated {
p.parent()
.and_then(|d| d.parent())
.unwrap_or(Path::new(""))
.to_string_lossy()
.to_string()
} else {
parent_dir(relative_path)
};
let path = page_url_path(&parent, &slug);
let permalink = format!("{base_url}{path}");
let date = fm.date.as_ref().map(value_to_date_string);
let mut taxonomies = HashMap::new();
for (key, value) in &fm.rest {
if let toml::Value::Array(arr) = value {
let strings: Vec<String> = arr
.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect();
if !strings.is_empty() {
taxonomies.insert(key.clone(), strings);
}
}
}
let word_count = raw_content.split_whitespace().count();
let reading_time = (word_count / 200).max(1);
let extra = toml_to_json(&fm.extra);
Page {
title,
date,
author: fm.author,
description: fm.description,
draft: fm.draft,
slug,
template: fm.template,
path,
permalink,
content: String::new(), summary: None, raw_content,
taxonomies,
extra,
aliases: fm.aliases,
word_count,
reading_time,
relative_path: relative_path.to_string(),
}
}
pub fn build_section(
fm: Frontmatter,
raw_content: String,
relative_path: &str,
base_url: &str,
) -> Section {
let title = fm.title.unwrap_or_default();
let path = section_url_path(&parent_dir(relative_path));
let permalink = format!("{base_url}{path}");
let extra = toml_to_json(&fm.extra);
Section {
title,
description: fm.description,
path,
permalink,
content: String::new(),
raw_content,
pages: vec![],
sort_by: fm.sort_by,
paginate_by: fm.paginate_by,
template: fm.template,
extra,
relative_path: relative_path.to_string(),
}
}
pub struct LoadedContent {
pub sections: HashMap<String, Section>,
pub pages: HashMap<String, Page>,
pub assets: Vec<PathBuf>,
}
pub fn load_content(content_dir: &Path, base_url: &str) -> anyhow::Result<LoadedContent> {
let mut sections = HashMap::new();
let mut pages = HashMap::new();
let mut assets = Vec::new();
for entry in WalkDir::new(content_dir)
.into_iter()
.collect::<Result<Vec<_>, _>>()
.map_err(|e| anyhow::anyhow!("failed to walk content directory: {e}"))?
{
let path = entry.path();
let relative = path
.strip_prefix(content_dir)
.expect("walkdir entry is under content_dir")
.to_string_lossy()
.to_string();
if path.is_dir() {
continue;
}
let filename = path
.file_name()
.expect("non-directory entry has a filename")
.to_string_lossy();
if filename == "_index.md" {
let content = std::fs::read_to_string(path)
.map_err(|e| anyhow::anyhow!("cannot read {}: {e}", path.display()))?;
let (fm, body) = parse_frontmatter(&content)?;
let section = build_section(fm, body, &relative, base_url);
sections.insert(relative, section);
} else if filename.ends_with(".md") {
let content = std::fs::read_to_string(path)
.map_err(|e| anyhow::anyhow!("cannot read {}: {e}", path.display()))?;
let (fm, body) = parse_frontmatter(&content)?;
let page = build_page(fm, body, &relative, base_url);
pages.insert(relative, page);
} else {
assets.push(path.to_path_buf());
}
}
Ok(LoadedContent {
sections,
pages,
assets,
})
}
pub fn load_content_dir(
dir: &Path,
config: &ContentDirConfig,
base_url: &str,
) -> anyhow::Result<LoadedContent> {
let mut sections = HashMap::new();
let mut pages = HashMap::new();
if !dir.exists() {
return Ok(LoadedContent {
sections,
pages,
assets: vec![],
});
}
for entry in WalkDir::new(dir)
.into_iter()
.collect::<Result<Vec<_>, _>>()
.map_err(|e| anyhow::anyhow!("failed to walk content dir {}: {e}", dir.display()))?
{
let path = entry.path();
if path.is_dir() {
continue;
}
let filename = path.file_name().unwrap_or_default().to_string_lossy();
if !filename.ends_with(".md") {
continue;
}
let rel_in_dir = path
.strip_prefix(dir)
.expect("walkdir entry is under dir")
.to_string_lossy()
.to_string();
if config.exclude.contains(&rel_in_dir) {
continue;
}
let raw = std::fs::read_to_string(path)
.map_err(|e| anyhow::anyhow!("cannot read {}: {e}", path.display()))?;
let is_readme = filename == "README.md";
let stem = Path::new(&rel_in_dir)
.file_stem()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let (extracted_title, description) = extract_title_description(&raw);
let title = extracted_title.unwrap_or_else(|| title_from_filename(&stem));
let body = strip_title_heading(&raw);
let body = if config.rewrite_links {
let include_path = format!("../{}/{}", config.path, rel_in_dir);
crate::shortcodes::rewrite_md_links(&body, &include_path)
} else {
body
};
if is_readme {
let rel_path = if config.url_prefix.is_empty() {
"_index.md".to_string()
} else {
let parent = Path::new(&rel_in_dir)
.parent()
.unwrap_or(Path::new(""))
.to_string_lossy();
if parent.is_empty() {
format!("{}/_index.md", config.url_prefix)
} else {
format!("{}/{parent}/_index.md", config.url_prefix)
}
};
let fm = Frontmatter {
title: Some(title),
description,
template: Some(config.section_template.clone()),
sort_by: config.sort_by,
..Default::default()
};
let section = build_section(fm, body, &rel_path, base_url);
sections.insert(rel_path, section);
} else {
let parent = Path::new(&rel_in_dir)
.parent()
.unwrap_or(Path::new(""))
.to_string_lossy();
let rel_path = if parent.is_empty() {
format!("{}/{stem}.md", config.url_prefix)
} else {
format!("{}/{parent}/{stem}.md", config.url_prefix)
};
let fm = Frontmatter {
title: Some(title),
description,
template: Some(config.template.clone()),
..Default::default()
};
let page = build_page(fm, body, &rel_path, base_url);
pages.insert(rel_path, page);
}
}
Ok(LoadedContent {
sections,
pages,
assets: vec![],
})
}
pub fn extract_title_description(content: &str) -> (Option<String>, Option<String>) {
let mut title = None;
let mut desc_lines = Vec::new();
let mut found_title = false;
let mut in_desc = false;
for line in content.lines() {
if !found_title {
if let Some(h1) = line.strip_prefix("# ") {
title = Some(h1.trim().to_string());
found_title = true;
continue;
}
}
let trimmed = line.trim();
if trimmed.is_empty() {
if in_desc {
break;
}
continue;
}
if trimmed.starts_with('#')
|| trimmed.starts_with('-')
|| trimmed.starts_with("```")
|| trimmed.starts_with('|')
|| trimmed.starts_with('<')
{
if !in_desc {
continue;
}
break;
}
in_desc = true;
desc_lines.push(trimmed);
}
let desc = if desc_lines.is_empty() {
None
} else {
Some(desc_lines.join(" "))
};
(title, desc)
}
fn title_from_filename(stem: &str) -> String {
let mut title = stem.replace('-', " ");
if let Some(first) = title.get_mut(..1) {
first.make_ascii_uppercase();
}
title
}
fn strip_title_heading(content: &str) -> String {
let mut lines = content.lines();
let mut result = Vec::new();
let mut found = false;
for line in &mut lines {
if !found && line.starts_with("# ") {
found = true;
continue;
}
result.push(line);
}
let start = result
.iter()
.position(|l| !l.trim().is_empty())
.unwrap_or(0);
result[start..].join("\n")
}
fn page_date_key(p: &Page) -> &str {
p.date.as_deref().unwrap_or("")
}
pub fn sort_pages_by_date(pages: &mut [Page]) {
pages.sort_by(|a, b| page_date_key(b).cmp(page_date_key(a)));
}
pub fn sort_pages_by_date_ref(pages: &mut [&Page]) {
pages.sort_by(|a, b| page_date_key(b).cmp(page_date_key(a)));
}
pub fn assign_pages_to_sections(
sections: &mut HashMap<String, Section>,
pages: &HashMap<String, Page>,
) {
for (rel_path, page) in pages {
let key = section_key_for(rel_path);
if let Some(section) = sections.get_mut(&key) {
section.pages.push(page.clone());
}
}
for section in sections.values_mut() {
match section.sort_by.unwrap_or_default() {
SortBy::Date => sort_pages_by_date(&mut section.pages),
SortBy::Title => section.pages.sort_by(|a, b| a.title.cmp(&b.title)),
}
}
}
pub(crate) fn escape_xml(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
.replace('"', """)
.replace('\'', "'")
}
pub(crate) fn toml_to_json(v: &toml::Value) -> serde_json::Value {
match v {
toml::Value::String(s) => serde_json::Value::String(s.clone()),
toml::Value::Integer(i) => serde_json::json!(*i),
toml::Value::Float(f) => serde_json::json!(*f),
toml::Value::Boolean(b) => serde_json::Value::Bool(*b),
toml::Value::Datetime(d) => serde_json::Value::String(d.to_string()),
toml::Value::Array(a) => serde_json::Value::Array(a.iter().map(toml_to_json).collect()),
toml::Value::Table(t) => {
let map: serde_json::Map<String, serde_json::Value> = t
.iter()
.map(|(k, v)| (k.clone(), toml_to_json(v)))
.collect();
serde_json::Value::Object(map)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_frontmatter_basic() {
let input = "+++\ntitle = \"Hello\"\n+++\nBody text here";
let (fm, body) = parse_frontmatter(input).unwrap();
assert_eq!(fm.title.as_deref(), Some("Hello"));
assert_eq!(body, "Body text here");
}
#[test]
fn test_parse_frontmatter_no_frontmatter() {
let input = "Just plain markdown content";
let (fm, body) = parse_frontmatter(input).unwrap();
assert!(fm.title.is_none());
assert!(!fm.draft);
assert_eq!(body, "Just plain markdown content");
}
#[test]
fn test_parse_frontmatter_all_fields() {
let input = r#"+++
title = "Full Post"
date = "2025-01-15"
author = "Cody"
description = "A test post"
draft = true
slug = "custom-slug"
aliases = ["/old-url/"]
tags = ["rust", "test"]
sort_by = "date"
paginate_by = 5
[extra]
foo = "bar"
+++
Content goes here"#;
let (fm, body) = parse_frontmatter(input).unwrap();
assert_eq!(fm.title.as_deref(), Some("Full Post"));
assert_eq!(fm.author.as_deref(), Some("Cody"));
assert_eq!(fm.description.as_deref(), Some("A test post"));
assert!(fm.draft);
assert_eq!(fm.slug.as_deref(), Some("custom-slug"));
assert_eq!(fm.aliases, vec!["/old-url/"]);
let tags = fm.rest.get("tags").unwrap();
assert_eq!(
tags,
&toml::Value::Array(vec![
toml::Value::String("rust".into()),
toml::Value::String("test".into()),
])
);
assert_eq!(fm.sort_by, Some(SortBy::Date));
assert_eq!(fm.paginate_by, Some(5));
assert_eq!(body, "Content goes here");
}
#[test]
fn test_parse_frontmatter_date_datetime() {
let input = "+++\ndate = 2025-06-15T10:30:00\n+++\n";
let (fm, _) = parse_frontmatter(input).unwrap();
let date_val = fm.date.unwrap();
match date_val {
toml::Value::Datetime(_) => {} other => panic!("Expected Datetime, got {other:?}"),
}
}
#[test]
fn test_parse_frontmatter_date_string() {
let input = "+++\ndate = \"2025-06-15\"\n+++\n";
let (fm, _) = parse_frontmatter(input).unwrap();
let date_val = fm.date.unwrap();
match date_val {
toml::Value::String(s) => assert_eq!(s, "2025-06-15"),
other => panic!("Expected String, got {other:?}"),
}
}
#[test]
fn test_build_page_slug_from_filename() {
let fm = Frontmatter::default();
let page = build_page(fm, "body".into(), "hello-world.md", "https://example.com");
assert_eq!(page.slug, "hello-world");
}
#[test]
fn test_build_page_slug_from_frontmatter() {
let fm = Frontmatter {
slug: Some("custom".into()),
..Default::default()
};
let page = build_page(fm, "body".into(), "hello-world.md", "https://example.com");
assert_eq!(page.slug, "custom");
}
#[test]
fn test_build_page_path_nested() {
let fm = Frontmatter::default();
let page = build_page(fm, "body".into(), "posts/hello.md", "https://example.com");
assert_eq!(page.path, "/posts/hello/");
}
#[test]
fn test_build_page_path_root() {
let fm = Frontmatter::default();
let page = build_page(fm, "body".into(), "hello.md", "https://example.com");
assert_eq!(page.path, "/hello/");
}
#[test]
fn test_build_page_permalink() {
let fm = Frontmatter::default();
let page = build_page(fm, "body".into(), "posts/hello.md", "https://example.com");
assert_eq!(page.permalink, "https://example.com/posts/hello/");
}
#[test]
fn test_build_page_colocated_index() {
let fm = Frontmatter::default();
let page = build_page(
fm,
"body".into(),
"posts/my-post/index.md",
"https://example.com",
);
assert_eq!(page.slug, "my-post");
assert_eq!(page.path, "/posts/my-post/");
assert_eq!(page.permalink, "https://example.com/posts/my-post/");
}
#[test]
fn test_build_page_colocated_with_custom_slug() {
let fm = Frontmatter {
slug: Some("custom".into()),
..Default::default()
};
let page = build_page(
fm,
"body".into(),
"posts/my-post/index.md",
"https://example.com",
);
assert_eq!(page.slug, "custom");
assert_eq!(page.path, "/posts/custom/");
}
#[test]
fn test_build_page_word_count() {
let fm = Frontmatter::default();
let body = "one two three four five six seven eight nine ten";
let page = build_page(fm, body.into(), "test.md", "https://example.com");
assert_eq!(page.word_count, 10);
assert_eq!(page.reading_time, 1); }
#[test]
fn test_build_page_tags() {
let mut rest = HashMap::new();
rest.insert(
"tags".to_string(),
toml::Value::Array(vec![
toml::Value::String("rust".into()),
toml::Value::String("test".into()),
]),
);
let fm = Frontmatter {
rest,
..Default::default()
};
let page = build_page(fm, "body".into(), "test.md", "https://example.com");
assert_eq!(
page.taxonomies.get("tags").unwrap(),
&vec!["rust".to_string(), "test".to_string()]
);
}
#[test]
fn test_build_page_custom_taxonomy() {
let mut rest = HashMap::new();
rest.insert(
"categories".to_string(),
toml::Value::Array(vec![toml::Value::String("tutorial".into())]),
);
let fm = Frontmatter {
rest,
..Default::default()
};
let page = build_page(fm, "body".into(), "test.md", "https://example.com");
assert_eq!(
page.taxonomies.get("categories").unwrap(),
&vec!["tutorial".to_string()]
);
}
#[test]
fn test_build_section_root() {
let fm = Frontmatter {
title: Some("Home".into()),
..Default::default()
};
let section = build_section(fm, "body".into(), "_index.md", "https://example.com");
assert_eq!(section.path, "/");
assert_eq!(section.permalink, "https://example.com/");
assert_eq!(section.title, "Home");
}
#[test]
fn test_build_section_nested() {
let fm = Frontmatter {
title: Some("Blog".into()),
..Default::default()
};
let section = build_section(fm, "body".into(), "posts/_index.md", "https://example.com");
assert_eq!(section.path, "/posts/");
assert_eq!(section.permalink, "https://example.com/posts/");
}
#[test]
fn test_toml_to_json_primitives() {
assert_eq!(
toml_to_json(&toml::Value::String("hello".into())),
serde_json::json!("hello")
);
assert_eq!(
toml_to_json(&toml::Value::Integer(42)),
serde_json::json!(42)
);
assert_eq!(
toml_to_json(&toml::Value::Boolean(true)),
serde_json::json!(true)
);
assert_eq!(
toml_to_json(&toml::Value::Float(1.23)),
serde_json::json!(1.23)
);
}
#[test]
fn test_toml_to_json_nested() {
let mut table = toml::map::Map::new();
table.insert("key".into(), toml::Value::String("value".into()));
table.insert(
"nums".into(),
toml::Value::Array(vec![toml::Value::Integer(1), toml::Value::Integer(2)]),
);
let result = toml_to_json(&toml::Value::Table(table));
assert_eq!(result["key"], serde_json::json!("value"));
assert_eq!(result["nums"], serde_json::json!([1, 2]));
}
}