use std::{
collections::{HashMap, HashSet},
fs,
io::{self, Write},
path::{Path, PathBuf},
sync::atomic::{AtomicUsize, Ordering},
time::{Duration, Instant},
};
use percent_encoding::percent_decode_str;
use walkdir::WalkDir;
use scraper::{Html, Selector};
use std::sync::Arc;
use papaya::HashMap as ConcurrentHashMap;
use crate::{
config::Config,
embedded_pico,
errors::BuildError,
link_index::{InboundLink, OutboundLink, PageLinks},
link_transform::{LinkTransformConfig, make_relative_url},
markdown,
oembed_cache::OembedCache,
repo::{MarkdownInfo, Repo},
server::{
DEFAULT_FILES, MediaViewerType, generate_breadcrumbs, get_current_dir_name,
get_parent_path, markdown_file_to_json,
},
sorting::sort_files,
templates::Templates,
};
const MAX_BUILD_CONCURRENCY: usize = 32;
const FALLBACK_BUILD_CONCURRENCY: usize = 4;
fn url_depth(url_path: &str) -> usize {
url_path
.trim_matches('/')
.split('/')
.filter(|s| !s.is_empty())
.count()
}
fn relative_base(depth: usize) -> String {
if depth == 0 {
".mbr/".to_string()
} else {
format!("{}.mbr/", "../".repeat(depth))
}
}
fn relative_root(depth: usize) -> String {
if depth == 0 {
String::new()
} else {
"../".repeat(depth)
}
}
fn resolve_relative_url(base_url: &str, relative_url: &str) -> String {
if relative_url.starts_with('/') {
let trimmed = relative_url.trim_end_matches('/');
return if trimmed.is_empty() {
"/".to_string()
} else {
format!("{}/", trimmed)
};
}
let base_segments: Vec<&str> = base_url
.trim_matches('/')
.split('/')
.filter(|s| !s.is_empty())
.collect();
let mut segments: Vec<&str> = if !base_segments.is_empty() {
base_segments[..base_segments.len() - 1].to_vec()
} else {
vec![]
};
for part in relative_url.split('/') {
match part {
"" | "." => {} ".." => {
segments.pop(); }
segment => {
segments.push(segment); }
}
}
if segments.is_empty() {
"/".to_string()
} else {
format!("/{}/", segments.join("/"))
}
}
fn print_stage(stage: &str) {
print!("\r\x1b[K{}", stage);
let _ = io::stdout().flush();
}
fn print_progress(stage: &str, current: usize, total: usize) {
print!("\r\x1b[K{} ({}/{})", stage, current, total);
let _ = io::stdout().flush();
}
fn format_duration(d: Duration) -> String {
let secs = d.as_secs_f64();
if secs >= 60.0 {
format!("{:.0}m {:.1}s", (secs / 60.0).floor(), secs % 60.0)
} else {
format!("{:.2}s", secs)
}
}
fn print_stage_done(stage: &str, count: usize, duration: Option<Duration>) {
if let Some(d) = duration {
println!(
"\r\x1b[K{} ... {} done ({})",
stage,
count,
format_duration(d)
);
} else {
println!("\r\x1b[K{} ... {} done", stage, count);
}
}
fn print_done(stage: &str, duration: Option<Duration>) {
if let Some(d) = duration {
println!("\r\x1b[K{} ... done ({})", stage, format_duration(d));
} else {
println!("\r\x1b[K{} ... done", stage);
}
}
fn normalize_path(path: &Path) -> PathBuf {
let mut components = Vec::new();
for component in path.components() {
match component {
std::path::Component::ParentDir => {
if !components.is_empty() {
components.pop();
}
}
std::path::Component::CurDir => {
}
_ => {
components.push(component);
}
}
}
components.iter().collect()
}
fn link_target_exists(path: &Path, valid_files: &HashSet<PathBuf>) -> bool {
valid_files.contains(path) || valid_files.contains(&path.join("index.html"))
}
#[derive(Debug, Default)]
pub struct BuildStats {
pub markdown_pages: usize,
pub section_pages: usize,
pub tag_pages: usize,
pub assets_linked: usize,
pub duration: Duration,
pub pagefind_indexed: Option<bool>,
pub broken_links: usize,
pub link_files: usize,
}
#[derive(Debug, Clone)]
pub struct BrokenLink {
pub source_page: String,
pub link_url: String,
}
pub struct Builder {
config: Config,
templates: Templates,
output_dir: PathBuf,
repo: Repo,
oembed_cache: Arc<OembedCache>,
build_link_index: Arc<ConcurrentHashMap<String, Vec<OutboundLink>>>,
}
impl Builder {
pub fn new(config: Config, output_dir: PathBuf) -> Result<Self, BuildError> {
let templates = Templates::new(&config.root_dir, config.template_folder.as_deref())?;
let repo = Repo::init_from_config(&config);
let oembed_cache = Arc::new(OembedCache::new(config.oembed_cache_size));
let build_link_index = Arc::new(ConcurrentHashMap::new());
tracing::debug!(
"build: initialized oembed cache with {} bytes max",
config.oembed_cache_size
);
Ok(Builder {
config,
templates,
output_dir,
repo,
oembed_cache,
build_link_index,
})
}
pub async fn build(&self) -> Result<BuildStats, BuildError> {
let start = Instant::now();
let mut stats = BuildStats::default();
let stage_start = Instant::now();
print_stage("Scanning repository...");
self.repo
.scan_all()
.map_err(|e| crate::errors::RepoError::ScanFailed {
path: self.config.root_dir.clone(),
source: std::io::Error::other(e.to_string()),
})?;
self.repo
.scan_static_folder()
.map_err(|e| crate::errors::RepoError::ScanFailed {
path: self.config.root_dir.clone(),
source: std::io::Error::other(e.to_string()),
})?;
let file_count = self.repo.markdown_files.pin().len() + self.repo.other_files.pin().len();
print_stage_done(
"Scanning repository",
file_count,
Some(stage_start.elapsed()),
);
let stage_start = Instant::now();
print_stage("Cleaning output directory...");
self.prepare_output_dir()?;
print_done("Cleaning output directory", Some(stage_start.elapsed()));
stats.markdown_pages = self.render_markdown_files().await?;
if self.config.link_tracking {
stats.link_files = self.write_link_files().await?;
}
stats.section_pages = self.render_directory_pages().await?;
if self.config.build_tag_pages {
stats.tag_pages = self.render_tag_pages().await?;
} else {
println!("Generating tag pages ... skipped");
}
let stage_start = Instant::now();
print_stage("Linking assets...");
stats.assets_linked = self.symlink_assets()?;
print_stage_done(
"Linking assets",
stats.assets_linked,
Some(stage_start.elapsed()),
);
let stage_start = Instant::now();
print_stage("Processing static folder...");
self.handle_static_folder()?;
print_done("Processing static folder", Some(stage_start.elapsed()));
let stage_start = Instant::now();
print_stage("Copying theme and assets...");
self.handle_mbr_folder()?;
print_done("Copying theme and assets", Some(stage_start.elapsed()));
self.generate_404_page()?;
self.generate_media_viewer_pages()?;
if self.config.skip_link_checks {
println!("Validating links ... skipped");
} else {
let stage_start = Instant::now();
print_stage("Validating links...");
let broken_links = self.validate_links();
stats.broken_links = broken_links.len();
print_done("Validating links", Some(stage_start.elapsed()));
if !broken_links.is_empty() {
eprintln!(
"\n⚠️ Broken links detected ({} total):",
broken_links.len()
);
for link in &broken_links {
eprintln!(" {} → {}", link.source_page, link.link_url);
}
eprintln!();
}
}
let stage_start = Instant::now();
print_stage("Building search index...");
stats.pagefind_indexed = Some(self.run_pagefind().await);
if stats.pagefind_indexed == Some(true) {
print_done("Building search index", Some(stage_start.elapsed()));
} else {
println!("\r\x1b[KBuilding search index ... skipped");
}
stats.duration = start.elapsed();
Ok(stats)
}
fn prepare_output_dir(&self) -> Result<(), BuildError> {
if self.output_dir.exists() {
let tmp_dir = self
.output_dir
.with_extension(format!("old.{}", std::process::id()));
if tmp_dir.exists() {
let _ = fs::remove_dir_all(&tmp_dir);
}
fs::rename(&self.output_dir, &tmp_dir).map_err(|e| BuildError::CreateDirFailed {
path: self.output_dir.clone(),
source: e,
})?;
std::thread::spawn(move || {
let _ = fs::remove_dir_all(&tmp_dir);
});
}
fs::create_dir_all(&self.output_dir).map_err(|e| BuildError::CreateDirFailed {
path: self.output_dir.clone(),
source: e,
})?;
Ok(())
}
fn get_concurrency(&self) -> usize {
self.config.build_concurrency.unwrap_or_else(|| {
std::thread::available_parallelism()
.map(|n| std::cmp::min(n.get() * 2, MAX_BUILD_CONCURRENCY))
.unwrap_or(FALLBACK_BUILD_CONCURRENCY)
})
}
async fn render_markdown_files(&self) -> Result<usize, BuildError> {
let stage_start = Instant::now();
let markdown_files: Vec<_> = self
.repo
.markdown_files
.pin()
.iter()
.map(|(path, info)| (path.clone(), info.clone()))
.collect();
let count = markdown_files.len();
let concurrency = self.get_concurrency();
tracing::info!(
"Rendering {} markdown files with concurrency {}",
count,
concurrency
);
let sibling_index = {
let mut index: HashMap<PathBuf, Vec<serde_json::Value>> = HashMap::new();
for (_, info) in &markdown_files {
let parent = info
.raw_path
.parent()
.unwrap_or(Path::new(""))
.to_path_buf();
index
.entry(parent)
.or_default()
.push(markdown_file_to_json(info));
}
for siblings in index.values_mut() {
sort_files(siblings, &self.config.sort);
}
Arc::new(index)
};
let completed = Arc::new(AtomicUsize::new(0));
print_progress("Rendering markdown", 0, count);
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(concurrency)
.build()
.map_err(|e| BuildError::CreateDirFailed {
path: self.output_dir.clone(),
source: std::io::Error::other(format!("Failed to create rayon thread pool: {}", e)),
})?;
let tera_snapshot = self.templates.tera_clone();
let error: std::sync::Mutex<Option<BuildError>> = std::sync::Mutex::new(None);
pool.install(|| {
use rayon::prelude::*;
markdown_files.par_iter().for_each(|(path, info)| {
if error.lock().unwrap().is_some() {
return;
}
match self.render_single_markdown_sync(path, info, &sibling_index, &tera_snapshot) {
Ok(()) => {
let done = completed.fetch_add(1, Ordering::Relaxed) + 1;
if done.is_multiple_of(100) || done == count {
print_progress("Rendering markdown", done, count);
}
}
Err(e) => {
let mut err = error.lock().unwrap();
if err.is_none() {
*err = Some(e);
}
}
}
});
});
if let Some(e) = error.into_inner().unwrap() {
return Err(e);
}
print_stage_done("Rendering markdown", count, Some(stage_start.elapsed()));
Ok(count)
}
async fn write_link_files(&self) -> Result<usize, BuildError> {
let stage_start = Instant::now();
print_stage("Building link index...");
let outbound_guard = self.build_link_index.pin();
let mut inbound_index: HashMap<String, Vec<InboundLink>> = HashMap::new();
let mut outbound_index: HashMap<String, Vec<OutboundLink>> = HashMap::new();
for (source_url, outbound_links) in outbound_guard.iter() {
outbound_index.insert(source_url.clone(), outbound_links.clone());
for link in outbound_links {
if !link.internal {
continue;
}
let target_url = resolve_relative_url(source_url, &link.to);
let inbound_link = InboundLink {
from: source_url.clone(),
text: link.text.clone(),
anchor: link.anchor.clone(),
};
inbound_index
.entry(target_url)
.or_default()
.push(inbound_link);
}
}
let all_page_urls: HashSet<String> = {
let mut urls: HashSet<String> = outbound_index.keys().cloned().collect();
for (_, info) in self.repo.markdown_files.pin().iter() {
urls.insert(info.url_path.clone());
}
if self.config.build_tag_pages {
for tag_source in &self.config.tag_sources {
let source = tag_source.url_source();
urls.insert(format!("/{}/", source));
for tag in self.repo.tag_index.get_all_tags(&source) {
urls.insert(format!("/{}/{}/", source, tag.normalized));
}
}
}
urls
};
let count = all_page_urls.len();
let concurrency = self.get_concurrency();
let completed = Arc::new(AtomicUsize::new(0));
print_progress("Writing link files", 0, count);
let page_urls: Vec<String> = all_page_urls.into_iter().collect();
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(concurrency)
.build()
.map_err(|e| BuildError::CreateDirFailed {
path: self.output_dir.clone(),
source: std::io::Error::other(format!("Failed to create rayon thread pool: {}", e)),
})?;
let error: std::sync::Mutex<Option<BuildError>> = std::sync::Mutex::new(None);
pool.install(|| {
use rayon::prelude::*;
page_urls.par_iter().for_each(|url_path| {
if error.lock().unwrap().is_some() {
return;
}
match self.write_single_link_file(url_path, &outbound_index, &inbound_index) {
Ok(()) => {
let done = completed.fetch_add(1, Ordering::Relaxed) + 1;
if done.is_multiple_of(100) || done == count {
print_progress("Writing link files", done, count);
}
}
Err(e) => {
let mut err = error.lock().unwrap();
if err.is_none() {
*err = Some(e);
}
}
}
});
});
if let Some(e) = error.into_inner().unwrap() {
return Err(e);
}
print_stage_done("Writing link files", count, Some(stage_start.elapsed()));
Ok(count)
}
fn write_single_link_file(
&self,
url_path: &str,
outbound_index: &HashMap<String, Vec<OutboundLink>>,
inbound_index: &HashMap<String, Vec<InboundLink>>,
) -> Result<(), BuildError> {
let outbound = self
.try_build_tag_outbound(url_path)
.unwrap_or_else(|| outbound_index.get(url_path).cloned().unwrap_or_default());
let inbound = inbound_index.get(url_path).cloned().unwrap_or_default();
let page_links = PageLinks { inbound, outbound };
let url_path_stripped = url_path.trim_start_matches('/');
let output_path = if url_path_stripped.is_empty() || url_path == "/" {
self.output_dir.join("links.json")
} else {
self.output_dir.join(url_path_stripped).join("links.json")
};
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
let json = serde_json::to_string(&page_links).map_err(|e| BuildError::WriteFailed {
path: output_path.clone(),
source: std::io::Error::other(format!("JSON serialization failed: {}", e)),
})?;
fs::write(&output_path, json).map_err(|e| BuildError::WriteFailed {
path: output_path,
source: e,
})?;
Ok(())
}
fn render_single_markdown_sync(
&self,
path: &Path,
info: &MarkdownInfo,
sibling_index: &HashMap<PathBuf, Vec<serde_json::Value>>,
tera: &tera::Tera,
) -> Result<(), BuildError> {
let is_index_file = path
.file_name()
.and_then(|f| f.to_str())
.is_some_and(|f| f == self.config.index_file);
let link_transform_config = LinkTransformConfig {
markdown_extensions: self.config.markdown_extensions.clone(),
index_file: self.config.index_file.clone(),
is_index_file,
url_depth: Some(url_depth(&info.url_path)),
};
tracing::debug!("build: rendering {}", path.display());
let valid_tag_sources = crate::config::tag_sources_to_set(&self.config.tag_sources);
let render_result = markdown::render_sync(
path.to_path_buf(),
&self.config.root_dir,
self.config.oembed_timeout_ms,
link_transform_config,
Some(self.oembed_cache.clone()),
false, false, valid_tag_sources,
)
.map_err(|e| BuildError::RenderFailed {
path: path.to_path_buf(),
source: Box::new(crate::MbrError::Io(std::io::Error::other(e.to_string()))),
})?;
let mut frontmatter = render_result.frontmatter;
let headings = render_result.headings;
let html = render_result.html;
let outbound_links = render_result.outbound_links;
let has_h1 = render_result.has_h1;
let word_count = render_result.word_count;
if self.config.link_tracking && !outbound_links.is_empty() {
self.build_link_index
.pin()
.insert(info.url_path.clone(), outbound_links);
}
tracing::debug!("build: rendered {}", path.display());
frontmatter.insert(
"markdown_source".to_string(),
serde_json::Value::String(info.url_path.clone()),
);
frontmatter.insert("server_mode".to_string(), serde_json::json!(false));
let depth = url_depth(&info.url_path);
let url_path_for_breadcrumbs = std::path::Path::new(&info.url_path);
let breadcrumbs = crate::server::generate_breadcrumbs(url_path_for_breadcrumbs);
let breadcrumbs_json: Vec<_> = breadcrumbs
.iter()
.map(|b| {
serde_json::json!({
"name": b.name,
"url": make_relative_url(&b.url, depth)
})
})
.collect();
let current_dir_name = crate::server::get_current_dir_name(url_path_for_breadcrumbs);
let mut extra_context = std::collections::HashMap::new();
extra_context.insert(
"breadcrumbs".to_string(),
serde_json::json!(breadcrumbs_json),
);
extra_context.insert(
"current_dir_name".to_string(),
serde_json::json!(current_dir_name),
);
extra_context.insert("headings".to_string(), serde_json::json!(headings));
extra_context.insert("has_h1".to_string(), serde_json::json!(has_h1));
let tag_sources_json = serde_json::to_string(
&self
.config
.tag_sources
.iter()
.map(|ts| {
serde_json::json!({
"field": ts.field,
"urlSource": ts.url_source(),
"label": ts.singular_label(),
"labelPlural": ts.plural_label()
})
})
.collect::<Vec<_>>(),
)
.unwrap_or_else(|_| "[]".to_string());
extra_context.insert(
"tag_sources".to_string(),
serde_json::json!(tag_sources_json),
);
extra_context.insert(
"sidebar_style".to_string(),
serde_json::json!(self.config.sidebar_style),
);
extra_context.insert(
"sidebar_max_items".to_string(),
serde_json::json!(self.config.sidebar_max_items),
);
extra_context.insert(
"title_prefix".to_string(),
serde_json::json!(self.config.title_prefix),
);
extra_context.insert(
"title_suffix".to_string(),
serde_json::json!(self.config.title_suffix),
);
let reading_time_minutes = word_count.div_ceil(crate::constants::WORDS_PER_MINUTE);
extra_context.insert("word_count".to_string(), serde_json::json!(word_count));
extra_context.insert(
"reading_time_minutes".to_string(),
serde_json::json!(reading_time_minutes),
);
let relative_path = path
.strip_prefix(&self.config.root_dir)
.unwrap_or(path)
.to_string_lossy();
extra_context.insert("file_path".to_string(), serde_json::json!(relative_path));
if let Ok(metadata) = std::fs::metadata(path)
&& let Ok(modified) = metadata.modified()
&& let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH)
{
extra_context.insert(
"modified_timestamp".to_string(),
serde_json::json!(duration.as_secs()),
);
}
let parent_dir = info
.raw_path
.parent()
.unwrap_or(Path::new(""))
.to_path_buf();
let empty_siblings = Vec::new();
let siblings = sibling_index.get(&parent_dir).unwrap_or(&empty_siblings);
if let Some(current_idx) = siblings.iter().position(|f| {
f.get("url_path")
.and_then(|v| v.as_str())
.is_some_and(|p| p == info.url_path)
}) {
if current_idx > 0
&& let Some(prev) = siblings.get(current_idx - 1)
{
let prev_url = prev.get("url_path").and_then(|v| v.as_str()).unwrap_or("/");
extra_context.insert(
"prev_page".to_string(),
serde_json::json!({
"url": make_relative_url(prev_url, depth),
"title": prev.get("title").and_then(|v| v.as_str()).unwrap_or("Previous")
}),
);
}
if let Some(next) = siblings.get(current_idx + 1) {
let next_url = next.get("url_path").and_then(|v| v.as_str()).unwrap_or("/");
extra_context.insert(
"next_page".to_string(),
serde_json::json!({
"url": make_relative_url(next_url, depth),
"title": next.get("title").and_then(|v| v.as_str()).unwrap_or("Next")
}),
);
}
}
extra_context.insert(
"relative_base".to_string(),
serde_json::json!(relative_base(depth)),
);
extra_context.insert(
"relative_root".to_string(),
serde_json::json!(relative_root(depth)),
);
let html_output =
Templates::render_markdown_with_tera(tera, &html, frontmatter, extra_context)?;
let url_path = info.url_path.trim_start_matches('/');
let output_path = if url_path.is_empty() || url_path == "/" {
self.output_dir.join("index.html")
} else {
self.output_dir.join(url_path).join("index.html")
};
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
fs::write(&output_path, html_output).map_err(|e| BuildError::WriteFailed {
path: output_path,
source: e,
})?;
Ok(())
}
async fn render_directory_pages(&self) -> Result<usize, BuildError> {
let stage_start = Instant::now();
let mut directories: HashSet<PathBuf> = HashSet::new();
directories.insert(PathBuf::new());
for (_, info) in self.repo.markdown_files.pin().iter() {
let url_path = info.url_path.trim_start_matches('/').trim_end_matches('/');
if !url_path.is_empty() {
let mut current = PathBuf::new();
for component in Path::new(url_path)
.parent()
.into_iter()
.flat_map(|p| p.components())
{
if let std::path::Component::Normal(s) = component {
current.push(s);
directories.insert(current.clone());
}
}
}
}
let count = directories.len();
let concurrency = self.get_concurrency();
tracing::info!(
"Rendering {} directory pages with concurrency {}",
count,
concurrency
);
let tera_snapshot = self.templates.tera_clone();
let completed = Arc::new(AtomicUsize::new(0));
print_progress("Generating sections", 0, count);
let directories: Vec<_> = directories.into_iter().collect();
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(concurrency)
.build()
.map_err(|e| BuildError::CreateDirFailed {
path: self.output_dir.clone(),
source: std::io::Error::other(format!("Failed to create rayon thread pool: {}", e)),
})?;
let error: std::sync::Mutex<Option<BuildError>> = std::sync::Mutex::new(None);
pool.install(|| {
use rayon::prelude::*;
directories.par_iter().for_each(|dir| {
if error.lock().unwrap().is_some() {
return;
}
match self.render_directory_page_sync(dir, &tera_snapshot) {
Ok(()) => {
let done = completed.fetch_add(1, Ordering::Relaxed) + 1;
if done.is_multiple_of(100) || done == count {
print_progress("Generating sections", done, count);
}
}
Err(e) => {
let mut err = error.lock().unwrap();
if err.is_none() {
*err = Some(e);
}
}
}
});
});
if let Some(e) = error.into_inner().unwrap() {
return Err(e);
}
print_stage_done("Generating sections", count, Some(stage_start.elapsed()));
Ok(count)
}
fn render_directory_page_sync(
&self,
relative_dir: &Path,
tera: &tera::Tera,
) -> Result<(), BuildError> {
let is_root = relative_dir.as_os_str().is_empty();
let depth = if is_root {
0
} else {
relative_dir.components().count()
};
let mut context: HashMap<String, serde_json::Value> = HashMap::new();
let breadcrumbs = generate_breadcrumbs(relative_dir);
let breadcrumbs_json: Vec<serde_json::Value> = breadcrumbs
.iter()
.map(|b| {
serde_json::json!({
"name": b.name,
"url": make_relative_url(&b.url, depth)
})
})
.collect();
context.insert(
"breadcrumbs".to_string(),
serde_json::Value::Array(breadcrumbs_json),
);
let current_dir_name = if is_root {
"Home".to_string()
} else {
get_current_dir_name(relative_dir)
};
context.insert(
"current_dir_name".to_string(),
serde_json::Value::String(current_dir_name),
);
if let Some(parent) = get_parent_path(relative_dir) {
let relative_parent = make_relative_url(&parent, depth);
context.insert(
"parent_path".to_string(),
serde_json::Value::String(relative_parent),
);
}
context.insert(
"relative_base".to_string(),
serde_json::Value::String(relative_base(depth)),
);
context.insert(
"relative_root".to_string(),
serde_json::Value::String(relative_root(depth)),
);
let dir_prefix = if is_root {
"/".to_string()
} else {
format!("/{}/", relative_dir.to_string_lossy())
};
let mut files: Vec<serde_json::Value> = Vec::new();
let mut subdirs: HashSet<String> = HashSet::new();
for (_, info) in self.repo.markdown_files.pin().iter() {
let url_path = &info.url_path;
if url_path.starts_with(&dir_prefix) {
let remainder = url_path.strip_prefix(&dir_prefix).unwrap_or(url_path);
if !remainder.trim_end_matches('/').contains('/') {
let mut file_json = markdown_file_to_json(info);
if let Some(obj) = file_json.as_object_mut()
&& let Some(abs_url) = obj.get("url_path").and_then(|v| v.as_str())
{
obj.insert(
"url_path".to_string(),
serde_json::Value::String(make_relative_url(abs_url, depth)),
);
}
files.push(file_json);
} else if let Some(subdir) = remainder.split('/').next()
&& !subdir.is_empty()
{
subdirs.insert(subdir.to_string());
}
}
}
sort_files(&mut files, &self.config.sort);
context.insert("files".to_string(), serde_json::Value::Array(files));
let subdirs_json: Vec<serde_json::Value> = subdirs
.into_iter()
.map(|name| {
let abs_url_path = if is_root {
format!("/{}/", name)
} else {
format!("{}{}/", dir_prefix, name)
};
serde_json::json!({
"name": name,
"url_path": make_relative_url(&abs_url_path, depth)
})
})
.collect();
context.insert(
"subdirs".to_string(),
serde_json::Value::Array(subdirs_json),
);
context.insert("server_mode".to_string(), serde_json::Value::Bool(false));
let tag_sources_json = serde_json::to_string(
&self
.config
.tag_sources
.iter()
.map(|ts| {
serde_json::json!({
"field": ts.field,
"urlSource": ts.url_source(),
"label": ts.singular_label(),
"labelPlural": ts.plural_label()
})
})
.collect::<Vec<_>>(),
)
.unwrap_or_else(|_| "[]".to_string());
context.insert(
"tag_sources".to_string(),
serde_json::json!(tag_sources_json),
);
context.insert(
"sidebar_style".to_string(),
serde_json::json!(self.config.sidebar_style),
);
context.insert(
"sidebar_max_items".to_string(),
serde_json::json!(self.config.sidebar_max_items),
);
context.insert(
"title_prefix".to_string(),
serde_json::json!(self.config.title_prefix),
);
context.insert(
"title_suffix".to_string(),
serde_json::json!(self.config.title_suffix),
);
let template_name = if is_root { "home.html" } else { "section.html" };
let html_output = Templates::render_template_with_tera(tera, template_name, context)?;
let output_path = if is_root {
self.output_dir.join("index.html")
} else {
self.output_dir.join(relative_dir).join("index.html")
};
if !output_path.exists() {
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
fs::write(&output_path, html_output).map_err(|e| BuildError::WriteFailed {
path: output_path,
source: e,
})?;
}
Ok(())
}
async fn render_tag_pages(&self) -> Result<usize, BuildError> {
let stage_start = Instant::now();
let mut tasks: Vec<(String, Option<String>)> = Vec::new();
for tag_source in &self.config.tag_sources {
let source = tag_source.url_source();
if !self.repo.tag_index.has_source(&source) {
continue;
}
tasks.push((source.clone(), None));
for tag_info in self.repo.tag_index.get_all_tags(&source) {
tasks.push((source.clone(), Some(tag_info.normalized)));
}
}
if tasks.is_empty() {
println!("Generating tag pages ... skipped (no tags)");
return Ok(0);
}
let count = tasks.len();
let concurrency = self.get_concurrency();
tracing::info!(
"Rendering {} tag pages with concurrency {}",
count,
concurrency
);
let tera_snapshot = self.templates.tera_clone();
let completed = Arc::new(AtomicUsize::new(0));
print_progress("Generating tag pages", 0, count);
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(concurrency)
.build()
.map_err(|e| BuildError::CreateDirFailed {
path: self.output_dir.clone(),
source: std::io::Error::other(format!("Failed to create rayon thread pool: {}", e)),
})?;
let error: std::sync::Mutex<Option<BuildError>> = std::sync::Mutex::new(None);
pool.install(|| {
use rayon::prelude::*;
tasks.par_iter().for_each(|(source, value)| {
if error.lock().unwrap().is_some() {
return;
}
let result = if let Some(tag_value) = value {
self.render_single_tag_page_sync(source, tag_value, &tera_snapshot)
} else {
self.render_tag_source_index_sync(source, &tera_snapshot)
};
match result {
Ok(()) => {
let done = completed.fetch_add(1, Ordering::Relaxed) + 1;
if done.is_multiple_of(100) || done == count {
print_progress("Generating tag pages", done, count);
}
}
Err(e) => {
let mut err = error.lock().unwrap();
if err.is_none() {
*err = Some(e);
}
}
}
});
});
if let Some(e) = error.into_inner().unwrap() {
return Err(e);
}
print_stage_done("Generating tag pages", count, Some(stage_start.elapsed()));
Ok(count)
}
fn render_single_tag_page_sync(
&self,
source: &str,
value: &str,
tera: &tera::Tera,
) -> Result<(), BuildError> {
let context = self.build_single_tag_page_context(source, value);
let context = match context {
Some(c) => c,
None => return Ok(()),
};
let html_output = Templates::render_template_with_tera(tera, "tag.html", context.0)?;
if !context.1.exists() {
if let Some(parent) = context.1.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
fs::write(&context.1, html_output).map_err(|e| BuildError::WriteFailed {
path: context.1,
source: e,
})?;
}
Ok(())
}
fn render_tag_source_index_sync(
&self,
source: &str,
tera: &tera::Tera,
) -> Result<(), BuildError> {
let context = self.build_tag_source_index_context(source);
let context = match context {
Some(c) => c,
None => return Ok(()),
};
let html_output = Templates::render_template_with_tera(tera, "tag_index.html", context.0)?;
if !context.1.exists() {
if let Some(parent) = context.1.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
fs::write(&context.1, html_output).map_err(|e| BuildError::WriteFailed {
path: context.1,
source: e,
})?;
}
Ok(())
}
fn build_single_tag_page_context(
&self,
source: &str,
value: &str,
) -> Option<(HashMap<String, serde_json::Value>, PathBuf)> {
let tag_source = self
.config
.tag_sources
.iter()
.find(|ts| ts.url_source() == source);
let (singular_label, plural_label) = match tag_source {
Some(ts) => (ts.singular_label(), ts.plural_label()),
None => (source.to_string(), format!("{}s", source)),
};
let display_value = self
.repo
.tag_index
.get_tag_display(source, value)
.unwrap_or_else(|| value.to_string());
let pages = self.repo.tag_index.get_pages(source, value);
let url_path = format!("/{}/{}/", source, value);
let depth = url_depth(&url_path);
let mut context: HashMap<String, serde_json::Value> = HashMap::new();
context.insert(
"tag_source".to_string(),
serde_json::Value::String(source.to_string()),
);
context.insert(
"tag_display_value".to_string(),
serde_json::Value::String(display_value.clone()),
);
context.insert(
"tag_label".to_string(),
serde_json::Value::String(singular_label),
);
context.insert(
"tag_label_plural".to_string(),
serde_json::Value::String(plural_label),
);
context.insert(
"page_count".to_string(),
serde_json::Value::Number(pages.len().into()),
);
let pages_json: Vec<serde_json::Value> = pages
.iter()
.map(|p| {
serde_json::json!({
"url_path": make_relative_url(&p.url_path, depth),
"title": p.title,
"description": p.description
})
})
.collect();
context.insert("pages".to_string(), serde_json::Value::Array(pages_json));
context.insert("server_mode".to_string(), serde_json::Value::Bool(false));
context.insert(
"relative_base".to_string(),
serde_json::Value::String(relative_base(depth)),
);
context.insert(
"relative_root".to_string(),
serde_json::Value::String(relative_root(depth)),
);
let breadcrumbs_json = vec![
serde_json::json!({
"name": "Home",
"url": make_relative_url("/", depth)
}),
serde_json::json!({
"name": context.get("tag_label_plural").and_then(|v| v.as_str()).unwrap_or(source),
"url": make_relative_url(&format!("/{}/", source), depth)
}),
];
context.insert(
"breadcrumbs".to_string(),
serde_json::Value::Array(breadcrumbs_json),
);
context.insert(
"current_dir_name".to_string(),
serde_json::Value::String(display_value),
);
context.insert(
"sidebar_style".to_string(),
serde_json::json!(self.config.sidebar_style),
);
context.insert(
"sidebar_max_items".to_string(),
serde_json::json!(self.config.sidebar_max_items),
);
context.insert(
"title_prefix".to_string(),
serde_json::json!(self.config.title_prefix),
);
context.insert(
"title_suffix".to_string(),
serde_json::json!(self.config.title_suffix),
);
let safe_source = crate::wikilink::sanitize_path_component(source);
let safe_value = crate::wikilink::sanitize_path_component(value);
if safe_source.is_empty() || safe_value.is_empty() {
tracing::warn!(
"Skipping tag page with empty sanitized source={source:?} value={value:?}"
);
return None;
}
let output_path = self
.output_dir
.join(&safe_source)
.join(&safe_value)
.join("index.html");
if !output_path.starts_with(&self.output_dir) {
tracing::warn!(
"Tag page path escaped output directory: source={source:?} value={value:?}"
);
return None;
}
Some((context, output_path))
}
fn build_tag_source_index_context(
&self,
source: &str,
) -> Option<(HashMap<String, serde_json::Value>, PathBuf)> {
let tag_source = self
.config
.tag_sources
.iter()
.find(|ts| ts.url_source() == source);
let (singular_label, plural_label) = match tag_source {
Some(ts) => (ts.singular_label(), ts.plural_label()),
None => (source.to_string(), format!("{}s", source)),
};
let tags = self.repo.tag_index.get_all_tags(source);
let url_path = format!("/{}/", source);
let depth = url_depth(&url_path);
let mut context: HashMap<String, serde_json::Value> = HashMap::new();
context.insert(
"tag_source".to_string(),
serde_json::Value::String(source.to_string()),
);
context.insert(
"tag_label".to_string(),
serde_json::Value::String(singular_label),
);
context.insert(
"tag_label_plural".to_string(),
serde_json::Value::String(plural_label.clone()),
);
context.insert(
"tag_count".to_string(),
serde_json::Value::Number(tags.len().into()),
);
let tags_json: Vec<serde_json::Value> = tags
.iter()
.map(|t| {
serde_json::json!({
"url_value": t.normalized.clone(),
"display_value": t.display.clone(),
"page_count": t.count
})
})
.collect();
context.insert("tags".to_string(), serde_json::Value::Array(tags_json));
context.insert("server_mode".to_string(), serde_json::Value::Bool(false));
context.insert(
"relative_base".to_string(),
serde_json::Value::String(relative_base(depth)),
);
context.insert(
"relative_root".to_string(),
serde_json::Value::String(relative_root(depth)),
);
let breadcrumbs_json = vec![serde_json::json!({
"name": "Home",
"url": make_relative_url("/", depth)
})];
context.insert(
"breadcrumbs".to_string(),
serde_json::Value::Array(breadcrumbs_json),
);
context.insert(
"current_dir_name".to_string(),
serde_json::Value::String(plural_label),
);
context.insert(
"sidebar_style".to_string(),
serde_json::json!(self.config.sidebar_style),
);
context.insert(
"sidebar_max_items".to_string(),
serde_json::json!(self.config.sidebar_max_items),
);
context.insert(
"title_prefix".to_string(),
serde_json::json!(self.config.title_prefix),
);
context.insert(
"title_suffix".to_string(),
serde_json::json!(self.config.title_suffix),
);
let safe_source = crate::wikilink::sanitize_path_component(source);
if safe_source.is_empty() {
tracing::warn!("Skipping tag source index with empty sanitized source={source:?}");
return None;
}
let output_path = self.output_dir.join(&safe_source).join("index.html");
if !output_path.starts_with(&self.output_dir) {
tracing::warn!("Tag source index path escaped output directory: source={source:?}");
return None;
}
Some((context, output_path))
}
fn symlink_assets(&self) -> Result<usize, BuildError> {
let other_files: Vec<_> = self
.repo
.other_files
.pin()
.iter()
.map(|(_, info)| info.clone())
.collect();
let count = other_files.len();
for file_info in other_files {
let url_path = file_info.url_path.trim_start_matches('/');
let output_path = self.output_dir.join(url_path);
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
let target = self.calculate_relative_symlink(&output_path, &file_info.raw_path)?;
if !output_path.exists() {
#[cfg(unix)]
std::os::unix::fs::symlink(&target, &output_path).map_err(|e| {
BuildError::SymlinkFailed {
target: target.clone(),
link: output_path.clone(),
source: e,
}
})?;
}
}
Ok(count)
}
fn calculate_relative_symlink(&self, from: &Path, to: &Path) -> Result<PathBuf, BuildError> {
let from_dir = from.parent().unwrap_or(from);
let from_components: Vec<_> = from_dir.components().collect();
let to_components: Vec<_> = to.components().collect();
let common_len = from_components
.iter()
.zip(to_components.iter())
.take_while(|(a, b)| a == b)
.count();
let mut relative = PathBuf::new();
for _ in common_len..from_components.len() {
relative.push("..");
}
for component in to_components.iter().skip(common_len) {
relative.push(component.as_os_str());
}
Ok(relative)
}
fn handle_static_folder(&self) -> Result<(), BuildError> {
let static_path = self.config.root_dir.join(&self.config.static_folder);
if !static_path.exists() || !static_path.is_dir() {
return Ok(());
}
for entry in WalkDir::new(&static_path)
.follow_links(true)
.min_depth(1)
.into_iter()
.filter_map(|e| e.ok())
{
if entry.file_type().is_file() {
let relative = entry.path().strip_prefix(&static_path).map_err(|_| {
BuildError::CreateDirFailed {
path: entry.path().to_path_buf(),
source: std::io::Error::other("strip prefix failed"),
}
})?;
let output_path = self.output_dir.join(relative);
if !output_path.exists() {
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
let target = self.calculate_relative_symlink(&output_path, entry.path())?;
#[cfg(unix)]
std::os::unix::fs::symlink(&target, &output_path).map_err(|e| {
BuildError::SymlinkFailed {
target,
link: output_path,
source: e,
}
})?;
}
}
}
Ok(())
}
fn handle_mbr_folder(&self) -> Result<(), BuildError> {
let mbr_output = self.output_dir.join(".mbr");
fs::create_dir_all(&mbr_output).map_err(|e| BuildError::CreateDirFailed {
path: mbr_output.clone(),
source: e,
})?;
let nojekyll_path = self.output_dir.join(".nojekyll");
fs::write(&nojekyll_path, "").map_err(|e| BuildError::WriteFailed {
path: nojekyll_path,
source: e,
})?;
let mbr_source = self.config.root_dir.join(".mbr");
if mbr_source.exists() && mbr_source.is_dir() {
self.copy_dir_recursive(&mbr_source, &mbr_output)?;
}
for (route, content, _mime_type) in DEFAULT_FILES.iter() {
if content.is_empty() {
continue;
}
if *route == "/pico.min.css" {
continue;
}
let filename = route.trim_start_matches('/');
let output_path = mbr_output.join(filename);
if !output_path.exists() {
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
fs::write(&output_path, content).map_err(|e| BuildError::WriteFailed {
path: output_path,
source: e,
})?;
}
}
let pico_output_path = mbr_output.join("pico.min.css");
if !pico_output_path.exists() {
let pico_content =
embedded_pico::get_pico_css(&self.config.theme).unwrap_or_else(|| {
eprintln!(
"Warning: Invalid theme '{}'. Using default. Valid themes: {}",
self.config.theme,
embedded_pico::valid_themes_display()
);
embedded_pico::get_pico_css("default").expect("default theme must exist")
});
fs::write(&pico_output_path, pico_content).map_err(|e| BuildError::WriteFailed {
path: pico_output_path,
source: e,
})?;
}
let mut response = serde_json::to_value(&self.repo)
.map_err(|e| BuildError::RepoScan(crate::errors::RepoError::JsonSerializeFailed(e)))?;
if let Some(obj) = response.as_object_mut() {
obj.insert(
"sort".to_string(),
serde_json::to_value(&self.config.sort).unwrap_or(serde_json::Value::Array(vec![])),
);
let mut tags_data: HashMap<String, serde_json::Value> = HashMap::new();
for tag_source in &self.config.tag_sources {
let source = tag_source.url_source();
if self.repo.tag_index.has_source(&source) {
let tags = self.repo.tag_index.get_all_tags(&source);
let tags_json: Vec<serde_json::Value> = tags
.iter()
.map(|t| {
serde_json::json!({
"normalized": t.normalized,
"display": t.display,
"count": t.count,
"url": format!("/{}/{}/", source, t.normalized)
})
})
.collect();
tags_data.insert(
source,
serde_json::json!({
"label": tag_source.singular_label(),
"label_plural": tag_source.plural_label(),
"tags": tags_json
}),
);
}
}
if !tags_data.is_empty() {
obj.insert(
"tag_sources".to_string(),
serde_json::to_value(tags_data)
.unwrap_or(serde_json::Value::Object(serde_json::Map::new())),
);
}
}
let site_json = serde_json::to_string(&response)
.map_err(|e| BuildError::RepoScan(crate::errors::RepoError::JsonSerializeFailed(e)))?;
let site_json_path = mbr_output.join("site.json");
fs::write(&site_json_path, site_json).map_err(|e| BuildError::WriteFailed {
path: site_json_path,
source: e,
})?;
let media_data = serde_json::json!({
"other_files": &self.repo.other_files,
});
let media_json = serde_json::to_string(&media_data)
.map_err(|e| BuildError::RepoScan(crate::errors::RepoError::JsonSerializeFailed(e)))?;
let media_json_path = mbr_output.join("media.json");
fs::write(&media_json_path, media_json).map_err(|e| BuildError::WriteFailed {
path: media_json_path,
source: e,
})?;
Ok(())
}
fn generate_404_page(&self) -> Result<(), BuildError> {
use std::collections::HashMap;
let output_path = self.output_dir.join("404.html");
let mut context: HashMap<String, serde_json::Value> = HashMap::new();
context.insert(
"error_code".to_string(),
serde_json::Value::Number(404.into()),
);
context.insert(
"error_title".to_string(),
serde_json::Value::String("Not Found".to_string()),
);
context.insert(
"error_message".to_string(),
serde_json::Value::String("The requested page could not be found.".to_string()),
);
context.insert("server_mode".to_string(), serde_json::Value::Bool(false));
context.insert(
"relative_base".to_string(),
serde_json::Value::String(relative_base(0)),
);
context.insert(
"relative_root".to_string(),
serde_json::Value::String(relative_root(0)),
);
context.insert(
"breadcrumbs".to_string(),
serde_json::Value::Array(vec![serde_json::json!({
"name": "Home",
"url": "./"
})]),
);
context.insert(
"sidebar_style".to_string(),
serde_json::json!(self.config.sidebar_style),
);
context.insert(
"sidebar_max_items".to_string(),
serde_json::json!(self.config.sidebar_max_items),
);
let html = self.templates.render_error(context)?;
fs::write(&output_path, html).map_err(|e| BuildError::WriteFailed {
path: output_path,
source: e,
})?;
Ok(())
}
fn generate_media_viewer_pages(&self) -> Result<(), BuildError> {
use std::collections::HashMap;
let media_types = [
MediaViewerType::Video,
MediaViewerType::Pdf,
MediaViewerType::Audio,
MediaViewerType::Image,
];
for media_type in media_types {
let output_path = match media_type {
MediaViewerType::Video => self.output_dir.join(".mbr/videos/index.html"),
MediaViewerType::Pdf => self.output_dir.join(".mbr/pdfs/index.html"),
MediaViewerType::Audio => self.output_dir.join(".mbr/audio/index.html"),
MediaViewerType::Image => self.output_dir.join(".mbr/images/index.html"),
};
if let Some(parent) = output_path.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
let depth = 2;
let mut context: HashMap<String, serde_json::Value> = HashMap::new();
context.insert(
"media_type".to_string(),
serde_json::Value::String(media_type.as_str().to_string()),
);
context.insert(
"title".to_string(),
serde_json::Value::String(format!("{} Viewer", media_type.label())),
);
context.insert("server_mode".to_string(), serde_json::Value::Bool(false));
context.insert(
"relative_base".to_string(),
serde_json::Value::String(relative_base(depth)),
);
context.insert(
"relative_root".to_string(),
serde_json::Value::String(relative_root(depth)),
);
context.insert(
"breadcrumbs".to_string(),
serde_json::Value::Array(vec![serde_json::json!({
"name": "Home",
"url": "../../"
})]),
);
context.insert(
"parent_path".to_string(),
serde_json::Value::String("../../".to_string()),
);
context.insert(
"sidebar_style".to_string(),
serde_json::json!(self.config.sidebar_style),
);
context.insert(
"sidebar_max_items".to_string(),
serde_json::json!(self.config.sidebar_max_items),
);
context.insert(
"title_prefix".to_string(),
serde_json::json!(self.config.title_prefix),
);
context.insert(
"title_suffix".to_string(),
serde_json::json!(self.config.title_suffix),
);
let html = self.templates.render_media_viewer(context)?;
fs::write(&output_path, html).map_err(|e| BuildError::WriteFailed {
path: output_path,
source: e,
})?;
}
Ok(())
}
async fn run_pagefind(&self) -> bool {
use pagefind::api::PagefindIndex;
use pagefind::options::PagefindServiceConfig;
let options = PagefindServiceConfig::builder()
.force_language("en".to_string())
.build();
let mut index = match PagefindIndex::new(Some(options)) {
Ok(idx) => idx,
Err(e) => {
tracing::warn!("Failed to create Pagefind index: {}", e);
return false;
}
};
let path = self.output_dir.to_string_lossy().to_string();
let files_indexed = match index
.add_directory(path, Some("**/*.html".to_string()))
.await
{
Ok(count) => count,
Err(e) => {
tracing::warn!("Failed to index directory: {}", e);
return false;
}
};
if files_indexed == 0 {
tracing::warn!("No HTML files found to index");
return false;
}
let files = match index.get_files().await {
Ok(f) => f,
Err(e) => {
tracing::warn!("Failed to get Pagefind files: {}", e);
return false;
}
};
let pagefind_dir = self.output_dir.join(".mbr").join("pagefind");
if let Err(e) = fs::create_dir_all(&pagefind_dir) {
tracing::warn!("Failed to create pagefind directory: {}", e);
return false;
}
for file in files {
let file_path = pagefind_dir.join(&file.filename);
if let Some(parent) = file_path.parent()
&& let Err(e) = fs::create_dir_all(parent)
{
tracing::debug!("Failed to create dir {}: {}", parent.display(), e);
continue;
}
if let Err(e) = fs::write(&file_path, &file.contents) {
tracing::debug!("Failed to write {}: {}", file_path.display(), e);
continue;
}
}
tracing::info!(
"Pagefind search index generated: {} pages indexed",
files_indexed
);
true
}
fn validate_links(&self) -> Vec<BrokenLink> {
use rayon::prelude::*;
let selector = match Selector::parse("a[href]") {
Ok(s) => s,
Err(_) => return Vec::new(), };
let mut valid_files: HashSet<PathBuf> = HashSet::new();
let mut html_files: Vec<PathBuf> = Vec::new();
let mbr_prefix = self.output_dir.join(".mbr");
for entry in WalkDir::new(&self.output_dir)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
{
let path = entry.into_path();
valid_files.insert(path.clone());
if path.extension().is_some_and(|ext| ext == "html") && !path.starts_with(&mbr_prefix) {
html_files.push(path);
}
}
html_files
.par_iter()
.flat_map(|path| {
let html_content = match fs::read_to_string(path) {
Ok(content) => content,
Err(_) => return Vec::new(),
};
let source_page = path
.strip_prefix(&self.output_dir)
.unwrap_or(path)
.to_string_lossy()
.to_string();
let document = Html::parse_document(&html_content);
let mut broken = Vec::new();
for element in document.select(&selector) {
if let Some(href) = element.value().attr("href") {
if href.starts_with("http://")
|| href.starts_with("https://")
|| href.starts_with("//")
|| href.starts_with("mailto:")
|| href.starts_with("tel:")
|| href.starts_with("javascript:")
|| href.starts_with("data:")
|| href.starts_with("#")
{
continue;
}
if let Some(resolved) = self.resolve_link(path, href)
&& !link_target_exists(&resolved, &valid_files)
{
broken.push(BrokenLink {
source_page: source_page.clone(),
link_url: href.to_string(),
});
}
}
}
broken
})
.collect()
}
fn resolve_link(&self, source_file: &Path, href: &str) -> Option<PathBuf> {
let href = href.split('#').next().unwrap_or(href);
let href = href.split('?').next().unwrap_or(href);
if href.is_empty() {
return None;
}
let href = percent_decode_str(href).decode_utf8_lossy();
if href.starts_with('/') {
let path = href.trim_start_matches('/');
Some(self.output_dir.join(path))
} else {
let source_dir = source_file.parent()?;
let resolved = source_dir.join(href.as_ref());
Some(normalize_path(&resolved))
}
}
fn copy_dir_recursive(&self, from: &Path, to: &Path) -> Result<(), BuildError> {
for entry in WalkDir::new(from)
.follow_links(true)
.min_depth(1)
.into_iter()
.filter_map(|e| e.ok())
{
let relative = entry
.path()
.strip_prefix(from)
.map_err(|_| BuildError::CopyFailed {
from: entry.path().to_path_buf(),
to: to.to_path_buf(),
source: std::io::Error::other("strip prefix failed"),
})?;
let dest = to.join(relative);
if entry.file_type().is_dir() {
fs::create_dir_all(&dest).map_err(|e| BuildError::CreateDirFailed {
path: dest.clone(),
source: e,
})?;
} else if entry.file_type().is_file() {
if let Some(parent) = dest.parent() {
fs::create_dir_all(parent).map_err(|e| BuildError::CreateDirFailed {
path: parent.to_path_buf(),
source: e,
})?;
}
fs::copy(entry.path(), &dest).map_err(|e| BuildError::CopyFailed {
from: entry.path().to_path_buf(),
to: dest.clone(),
source: e,
})?;
}
}
Ok(())
}
fn try_build_tag_outbound(&self, url_path: &str) -> Option<Vec<OutboundLink>> {
let path = url_path.trim_matches('/');
if path.is_empty() {
return None;
}
let segments: Vec<&str> = path.split('/').collect();
let tag_sources: Vec<String> = self
.config
.tag_sources
.iter()
.map(|ts| ts.url_source())
.collect();
match segments.len() {
1 => {
let source = segments[0].to_lowercase();
if tag_sources.contains(&source) {
Some(self.build_tag_index_outbound(&source))
} else {
None
}
}
2 => {
let source = segments[0].to_lowercase();
let value = segments[1];
if tag_sources.contains(&source) {
Some(self.build_tag_page_outbound(&source, value))
} else {
None
}
}
_ => None,
}
}
fn build_tag_page_outbound(&self, source: &str, value: &str) -> Vec<OutboundLink> {
let mut outbound = Vec::new();
for page in self.repo.tag_index.get_pages(source, value) {
outbound.push(OutboundLink {
to: page.url_path,
text: page.title,
anchor: None,
internal: true,
});
}
let label = self
.config
.tag_sources
.iter()
.find(|ts| ts.url_source() == source)
.map(|ts| ts.plural_label())
.unwrap_or_else(|| source.to_string());
outbound.push(OutboundLink {
to: format!("/{}/", source),
text: label,
anchor: None,
internal: true,
});
outbound
}
fn build_tag_index_outbound(&self, source: &str) -> Vec<OutboundLink> {
self.repo
.tag_index
.get_all_tags(source)
.into_iter()
.map(|tag| OutboundLink {
to: format!("/{}/{}/", source, tag.normalized),
text: tag.display,
anchor: None,
internal: true,
})
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_build_stats_default() {
let stats = BuildStats::default();
assert_eq!(stats.markdown_pages, 0);
assert_eq!(stats.section_pages, 0);
assert_eq!(stats.tag_pages, 0);
assert_eq!(stats.assets_linked, 0);
assert_eq!(stats.pagefind_indexed, None);
assert_eq!(stats.broken_links, 0);
}
#[test]
fn test_broken_link_struct() {
let link = BrokenLink {
source_page: "docs/index.html".to_string(),
link_url: "../missing/".to_string(),
};
assert_eq!(link.source_page, "docs/index.html");
assert_eq!(link.link_url, "../missing/");
}
#[test]
fn test_normalize_path_simple() {
let path = PathBuf::from("/foo/bar/baz");
assert_eq!(normalize_path(&path), PathBuf::from("/foo/bar/baz"));
}
#[test]
fn test_normalize_path_with_parent() {
let path = PathBuf::from("/foo/bar/../baz");
assert_eq!(normalize_path(&path), PathBuf::from("/foo/baz"));
}
#[test]
fn test_normalize_path_with_multiple_parents() {
let path = PathBuf::from("/foo/bar/qux/../../baz");
assert_eq!(normalize_path(&path), PathBuf::from("/foo/baz"));
}
#[test]
fn test_normalize_path_with_current_dir() {
let path = PathBuf::from("/foo/./bar/./baz");
assert_eq!(normalize_path(&path), PathBuf::from("/foo/bar/baz"));
}
#[test]
fn test_normalize_path_mixed() {
let path = PathBuf::from("/foo/./bar/../baz/./qux");
assert_eq!(normalize_path(&path), PathBuf::from("/foo/baz/qux"));
}
#[test]
fn test_url_depth_root() {
assert_eq!(url_depth("/"), 0);
assert_eq!(url_depth(""), 0);
}
#[test]
fn test_url_depth_one_level() {
assert_eq!(url_depth("/docs/"), 1);
assert_eq!(url_depth("docs/"), 1);
assert_eq!(url_depth("/docs"), 1);
}
#[test]
fn test_url_depth_multiple_levels() {
assert_eq!(url_depth("/docs/guide/"), 2);
assert_eq!(url_depth("/a/b/c/"), 3);
assert_eq!(url_depth("/a/b/c/d/e/"), 5);
}
#[test]
fn test_relative_base_at_root() {
assert_eq!(relative_base(0), ".mbr/");
}
#[test]
fn test_relative_base_one_level() {
assert_eq!(relative_base(1), "../.mbr/");
}
#[test]
fn test_relative_base_multiple_levels() {
assert_eq!(relative_base(2), "../../.mbr/");
assert_eq!(relative_base(3), "../../../.mbr/");
}
#[test]
fn test_relative_root_at_root() {
assert_eq!(relative_root(0), "");
}
#[test]
fn test_relative_root_one_level() {
assert_eq!(relative_root(1), "../");
}
#[test]
fn test_relative_root_multiple_levels() {
assert_eq!(relative_root(2), "../../");
assert_eq!(relative_root(3), "../../../");
}
#[test]
fn test_resolve_relative_url_parent() {
assert_eq!(resolve_relative_url("/source/", "../target/"), "/target/");
}
#[test]
fn test_resolve_relative_url_nested() {
assert_eq!(
resolve_relative_url("/docs/guide/", "../reference/"),
"/reference/"
);
assert_eq!(
resolve_relative_url("/docs/guide/", "../../other/"),
"/other/"
);
}
#[test]
fn test_resolve_relative_url_sibling() {
assert_eq!(
resolve_relative_url("/docs/guide/", "reference/"),
"/docs/reference/"
);
assert_eq!(resolve_relative_url("/source/", "target/"), "/target/");
}
#[test]
fn test_resolve_relative_url_absolute() {
assert_eq!(resolve_relative_url("/source/", "/target/"), "/target/");
assert_eq!(resolve_relative_url("/source/", "/"), "/");
}
#[test]
fn test_resolve_relative_url_to_root() {
assert_eq!(resolve_relative_url("/source/", "../"), "/");
assert_eq!(resolve_relative_url("/docs/guide/", "../../"), "/");
}
pub(super) fn test_builder(output_dir: PathBuf, root_dir: PathBuf) -> Builder {
use crate::config::Config;
use crate::oembed_cache::OembedCache;
use crate::repo::Repo;
use crate::templates::Templates;
use papaya::HashMap as ConcurrentHashMap;
use std::sync::Arc;
let config = Config {
root_dir: root_dir.clone(),
..Default::default()
};
let templates =
Templates::new(&root_dir, None).expect("Failed to create templates for test");
let repo = Repo::init_from_config(&config);
let oembed_cache = Arc::new(OembedCache::new(1024));
let build_link_index = Arc::new(ConcurrentHashMap::new());
Builder {
config,
templates,
output_dir,
repo,
oembed_cache,
build_link_index,
}
}
#[test]
fn test_resolve_link_absolute_path() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let builder = test_builder(temp_path.clone(), root);
let source = temp_path.join("docs").join("index.html");
let result = builder.resolve_link(&source, "/readme/");
assert_eq!(result, Some(temp_path.join("readme/")));
}
#[test]
fn test_resolve_link_relative_path() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let builder = test_builder(temp_path.clone(), root);
let docs_dir = temp_path.join("docs");
std::fs::create_dir_all(&docs_dir).unwrap();
let source = docs_dir.join("index.html");
let result = builder.resolve_link(&source, "guide/");
assert_eq!(result, Some(docs_dir.join("guide")));
let result = builder.resolve_link(&source, "../readme/");
assert_eq!(result, Some(temp_path.join("readme")));
}
#[test]
fn test_resolve_link_with_anchor() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let builder = test_builder(temp_path.clone(), root);
let source = temp_path.join("index.html");
let result = builder.resolve_link(&source, "/docs/#section");
assert_eq!(result, Some(temp_path.join("docs/")));
let result = builder.resolve_link(&source, "#section");
assert_eq!(result, None);
}
#[test]
fn test_resolve_link_with_query_string() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let builder = test_builder(temp_path.clone(), root);
let source = temp_path.join("index.html");
let result = builder.resolve_link(&source, "/search/?q=test");
assert_eq!(result, Some(temp_path.join("search/")));
}
#[test]
fn test_resolve_link_url_encoded() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let builder = test_builder(temp_path.clone(), root);
let source = temp_path.join("index.html");
let result = builder.resolve_link(&source, "/my%20file/");
assert_eq!(result, Some(temp_path.join("my file/")));
}
#[test]
fn test_resolve_link_empty() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let builder = test_builder(temp_path, root);
let source = PathBuf::from("/some/source.html");
let result = builder.resolve_link(&source, "");
assert_eq!(result, None);
}
fn build_valid_files(dir: &Path) -> HashSet<PathBuf> {
WalkDir::new(dir)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.map(|e| e.into_path())
.collect()
}
#[test]
fn test_link_target_exists_file() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let file_path = temp_path.join("readme.html");
std::fs::write(&file_path, "content").unwrap();
let valid_files = build_valid_files(&temp_path);
assert!(link_target_exists(&file_path, &valid_files));
}
#[test]
fn test_link_target_exists_directory_with_index() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let dir_path = temp_path.join("docs");
std::fs::create_dir_all(&dir_path).unwrap();
std::fs::write(dir_path.join("index.html"), "content").unwrap();
let valid_files = build_valid_files(&temp_path);
assert!(link_target_exists(&dir_path, &valid_files));
}
#[test]
fn test_link_target_exists_directory_without_index() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let dir_path = temp_path.join("docs");
std::fs::create_dir_all(&dir_path).unwrap();
let valid_files = build_valid_files(&temp_path);
assert!(!link_target_exists(&dir_path, &valid_files));
}
#[test]
fn test_link_target_exists_missing() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let valid_files = build_valid_files(&temp_path);
let missing = temp_path.join("nonexistent");
assert!(!link_target_exists(&missing, &valid_files));
}
#[test]
fn test_link_target_exists_path_with_trailing_slash() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let dir_path = temp_path.join("docs");
std::fs::create_dir_all(&dir_path).unwrap();
std::fs::write(dir_path.join("index.html"), "content").unwrap();
let valid_files = build_valid_files(&temp_path);
let path_with_slash = temp_path.join("docs/");
assert!(link_target_exists(&path_with_slash, &valid_files));
let missing_with_slash = temp_path.join("missing/");
assert!(!link_target_exists(&missing_with_slash, &valid_files));
}
#[test]
fn test_validate_links_skips_external() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let html_path = temp_path.join("test.html");
std::fs::write(
&html_path,
r##"<html><body>
<a href="https://example.com">External HTTPS</a>
<a href="http://example.com">External HTTP</a>
<a href="//cdn.example.com">Protocol-relative</a>
<a href="mailto:test@example.com">Email</a>
<a href="tel:+1234567890">Phone</a>
<a href="javascript:void(0)">JavaScript</a>
<a href="data:text/html,Hello">Data URI</a>
<a href="#section">Anchor</a>
</body></html>"##,
)
.unwrap();
let builder = test_builder(temp_path, root);
let broken = builder.validate_links();
assert!(
broken.is_empty(),
"Expected no broken links, got: {:?}",
broken
);
}
#[test]
fn test_validate_links_finds_broken() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let html_path = temp_path.join("test.html");
std::fs::write(
&html_path,
r#"<html><body>
<a href="/nonexistent/">Broken link</a>
</body></html>"#,
)
.unwrap();
let builder = test_builder(temp_path, root);
let broken = builder.validate_links();
assert_eq!(broken.len(), 1);
assert_eq!(broken[0].link_url, "/nonexistent/");
}
#[test]
fn test_validate_links_valid_links() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let docs_dir = temp_path.join("docs");
std::fs::create_dir_all(&docs_dir).unwrap();
std::fs::write(docs_dir.join("index.html"), "content").unwrap();
let html_path = temp_path.join("test.html");
std::fs::write(
&html_path,
r#"<html><body>
<a href="/docs/">Valid link</a>
</body></html>"#,
)
.unwrap();
let builder = test_builder(temp_path, root);
let broken = builder.validate_links();
assert!(
broken.is_empty(),
"Expected no broken links, got: {:?}",
broken
);
}
#[test]
fn test_validate_links_skips_mbr_directory() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let mbr_dir = temp_path.join(".mbr");
std::fs::create_dir_all(&mbr_dir).unwrap();
std::fs::write(
mbr_dir.join("pagefind-ui.html"),
r#"<html><body><a href="/broken/">Broken</a></body></html>"#,
)
.unwrap();
let builder = test_builder(temp_path, root);
let broken = builder.validate_links();
assert!(broken.is_empty());
}
#[test]
fn test_validate_links_relative_path() {
let temp = tempfile::tempdir().unwrap();
let temp_path = temp.path().to_path_buf();
let root = temp.path().join("root");
std::fs::create_dir_all(root.join(".mbr")).unwrap();
let docs_dir = temp_path.join("docs");
std::fs::create_dir_all(&docs_dir).unwrap();
let guide_dir = docs_dir.join("guide");
std::fs::create_dir_all(&guide_dir).unwrap();
std::fs::write(guide_dir.join("index.html"), "content").unwrap();
std::fs::write(
docs_dir.join("index.html"),
r#"<html><body>
<a href="guide/">Valid relative link</a>
<a href="missing/">Broken relative link</a>
</body></html>"#,
)
.unwrap();
let builder = test_builder(temp_path, root);
let broken = builder.validate_links();
assert_eq!(broken.len(), 1);
assert_eq!(broken[0].link_url, "missing/");
}
fn symlink_helper(from: &str, to: &str) -> PathBuf {
let temp = tempfile::tempdir().unwrap();
let root = temp.path().to_path_buf();
std::fs::create_dir_all(&root).unwrap();
let builder = test_builder(temp.path().to_path_buf(), root);
builder
.calculate_relative_symlink(Path::new(from), Path::new(to))
.unwrap()
}
#[test]
fn test_symlink_same_directory() {
let result = symlink_helper("/a/b/link", "/a/b/target");
assert_eq!(result, PathBuf::from("target"));
}
#[test]
fn test_symlink_parent_directory() {
let result = symlink_helper("/a/b/link", "/a/target");
assert_eq!(result, PathBuf::from("../target"));
}
#[test]
fn test_symlink_sibling_directory() {
let result = symlink_helper("/a/b/link", "/a/c/target");
assert_eq!(result, PathBuf::from("../c/target"));
}
#[test]
fn test_symlink_deeply_nested_up() {
let result = symlink_helper("/a/b/c/d/link", "/a/target");
assert_eq!(result, PathBuf::from("../../../target"));
}
#[test]
fn test_symlink_deeply_nested_both() {
let result = symlink_helper("/a/b/c/link", "/a/x/y/z/target");
assert_eq!(result, PathBuf::from("../../x/y/z/target"));
}
#[test]
fn test_symlink_to_root_level() {
let result = symlink_helper("/a/b/c/link", "/target");
assert_eq!(result, PathBuf::from("../../../target"));
}
#[test]
fn test_symlink_from_root_level() {
let result = symlink_helper("/link", "/a/b/target");
assert_eq!(result, PathBuf::from("a/b/target"));
}
#[test]
fn test_symlink_no_common_prefix() {
let result = symlink_helper("/a/b/link", "/x/y/z/target");
assert_eq!(result, PathBuf::from("../../x/y/z/target"));
}
#[test]
fn test_symlink_same_path() {
let result = symlink_helper("/a/b/link", "/a/b/link");
assert_eq!(result, PathBuf::from("link"));
}
#[test]
fn test_symlink_resolution_property() {
let from = PathBuf::from("/project/build/docs/images/link");
let to = PathBuf::from("/project/source/assets/image.png");
let temp = tempfile::tempdir().unwrap();
let root = temp.path().to_path_buf();
std::fs::create_dir_all(&root).unwrap();
let builder = test_builder(temp.path().to_path_buf(), root);
let relative = builder.calculate_relative_symlink(&from, &to).unwrap();
let from_dir = from.parent().unwrap();
let resolved = from_dir.join(&relative);
let normalized = normalize_path(&resolved);
assert_eq!(normalized, to);
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
fn path_component() -> impl Strategy<Value = String> {
"[a-z][a-z0-9_]{0,10}".prop_map(|s| s.to_string())
}
fn reasonable_path() -> impl Strategy<Value = PathBuf> {
prop::collection::vec(path_component(), 1..8).prop_map(|components| {
let mut path = PathBuf::from("/");
for c in components {
path.push(c);
}
path
})
}
proptest! {
#[test]
fn prop_symlink_resolution_correct(from in reasonable_path(), to in reasonable_path()) {
if from.parent().is_none() {
return Ok(());
}
let temp = tempfile::tempdir().unwrap();
let root = temp.path().to_path_buf();
std::fs::create_dir_all(&root).unwrap();
let builder = super::tests::test_builder(temp.path().to_path_buf(), root);
let from_link = from.join("link");
let relative = builder.calculate_relative_symlink(&from_link, &to).unwrap();
let from_dir = from_link.parent().unwrap();
let resolved = from_dir.join(&relative);
let normalized = normalize_path(&resolved);
prop_assert_eq!(normalized, to.clone(),
"from={:?}, to={:?}, relative={:?}, resolved={:?}",
from_link, to, relative, resolved
);
}
#[test]
fn prop_symlink_no_absolute_in_result(from in reasonable_path(), to in reasonable_path()) {
let temp = tempfile::tempdir().unwrap();
let root = temp.path().to_path_buf();
std::fs::create_dir_all(&root).unwrap();
let builder = super::tests::test_builder(temp.path().to_path_buf(), root);
let from_link = from.join("link");
let relative = builder.calculate_relative_symlink(&from_link, &to).unwrap();
prop_assert!(!relative.is_absolute(),
"Relative symlink should not be absolute: {:?}", relative);
}
#[test]
fn prop_symlink_starts_with_parent_or_component(from in reasonable_path(), to in reasonable_path()) {
let temp = tempfile::tempdir().unwrap();
let root = temp.path().to_path_buf();
std::fs::create_dir_all(&root).unwrap();
let builder = super::tests::test_builder(temp.path().to_path_buf(), root);
let from_link = from.join("link");
let relative = builder.calculate_relative_symlink(&from_link, &to).unwrap();
if let Some(first) = relative.components().next() {
let is_parent_dir = matches!(first, std::path::Component::ParentDir);
let is_normal = matches!(first, std::path::Component::Normal(_));
prop_assert!(is_parent_dir || is_normal,
"First component should be '..' or normal: {:?}", first);
}
}
}
}