use crate::walk;
use anyhow::{Context, Result};
use std::{
fs,
path::{Path, PathBuf},
};
pub const DEFAULT_MEMORY_BUDGET_MB: usize = 512;
const ESTIMATED_BYTES_PER_PAGE: usize = 64 * 1024;
#[derive(Debug, Clone, Copy)]
pub struct MemoryBudget {
pub max_bytes: usize,
pub batch_size: usize,
}
impl MemoryBudget {
#[must_use]
pub fn from_mb(mb: usize) -> Self {
let max_bytes = mb * 1024 * 1024;
let batch_size = (max_bytes / ESTIMATED_BYTES_PER_PAGE).max(10);
Self {
max_bytes,
batch_size,
}
}
#[must_use]
pub fn default_budget() -> Self {
Self::from_mb(DEFAULT_MEMORY_BUDGET_MB)
}
}
pub fn batched_content_files(
content_dir: &Path,
budget: &MemoryBudget,
) -> Result<Vec<Vec<PathBuf>>> {
let all_files = walk::walk_files(content_dir, "md")
.with_context(|| format!("cannot walk {}", content_dir.display()))?;
if all_files.is_empty() {
return Ok(vec![]);
}
let batches: Vec<Vec<PathBuf>> = all_files
.chunks(budget.batch_size)
.map(|chunk| chunk.to_vec())
.collect();
log::info!(
"[streaming] {} file(s) in {} batch(es) (budget: {} MB, {} pages/batch)",
all_files.len(),
batches.len(),
budget.max_bytes / (1024 * 1024),
budget.batch_size,
);
Ok(batches)
}
pub fn compile_batch(
batch: &[PathBuf],
content_dir: &Path,
build_dir: &Path,
site_dir: &Path,
template_dir: &Path,
batch_idx: usize,
) -> Result<()> {
if batch.is_empty() {
return Ok(());
}
let batch_content = build_dir.join(format!(".batch-{batch_idx}"));
fs::create_dir_all(&batch_content)?;
for file in batch {
let rel = file.strip_prefix(content_dir).unwrap_or(file);
let dest = batch_content.join(rel);
if let Some(parent) = dest.parent() {
fs::create_dir_all(parent)?;
}
let _ = fs::copy(file, &dest)?;
}
let batch_build = build_dir.join(format!(".batch-{batch_idx}-build"));
let batch_site = build_dir.join(format!(".batch-{batch_idx}-site"));
fs::create_dir_all(&batch_build)?;
fs::create_dir_all(&batch_site)?;
let compile_result = staticdatagen::compile(
&batch_build,
&batch_content,
&batch_site,
template_dir,
);
if compile_result.is_ok() {
fs::create_dir_all(site_dir)?;
merge_dir(&batch_site, site_dir)?;
}
let _ = fs::remove_dir_all(&batch_content);
let _ = fs::remove_dir_all(&batch_build);
let _ = fs::remove_dir_all(&batch_site);
compile_result.map_err(|e| anyhow::anyhow!("batch {batch_idx}: {e:?}"))
}
fn merge_dir(src: &Path, dst: &Path) -> Result<()> {
if !src.exists() {
return Ok(());
}
for entry in fs::read_dir(src)? {
let entry = entry?;
let path = entry.path();
let dest = dst.join(entry.file_name());
if path.is_dir() {
fs::create_dir_all(&dest)?;
merge_dir(&path, &dest)?;
} else {
let _ = fs::copy(&path, &dest)?;
}
}
Ok(())
}
#[must_use]
pub fn should_stream(
content_dir: &Path,
budget: &MemoryBudget,
explicitly_set: bool,
) -> bool {
if explicitly_set {
return true;
}
let count = walk::walk_files(content_dir, "md").map_or(0, |f| f.len());
count > budget.batch_size
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn memory_budget_from_mb() {
let budget = MemoryBudget::from_mb(256);
assert_eq!(budget.max_bytes, 256 * 1024 * 1024);
assert!(budget.batch_size > 0);
}
#[test]
fn memory_budget_default() {
let budget = MemoryBudget::default_budget();
assert_eq!(budget.max_bytes, 512 * 1024 * 1024);
}
#[test]
fn memory_budget_minimum_batch_size() {
let budget = MemoryBudget::from_mb(0);
assert!(
budget.batch_size >= 10,
"batch size should have a floor of 10"
);
}
#[test]
fn batched_content_files_empty_dir() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
let budget = MemoryBudget::from_mb(512);
let batches = batched_content_files(&content, &budget).unwrap();
assert!(batches.is_empty());
}
#[test]
fn batched_content_files_splits_correctly() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
for i in 0..25 {
fs::write(
content.join(format!("page{i}.md")),
format!("# Page {i}"),
)
.unwrap();
}
let budget = MemoryBudget {
max_bytes: 0,
batch_size: 10,
};
let batches = batched_content_files(&content, &budget).unwrap();
assert_eq!(batches.len(), 3); assert_eq!(batches[0].len(), 10);
assert_eq!(batches[1].len(), 10);
assert_eq!(batches[2].len(), 5);
}
#[test]
fn merge_dir_combines_files() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
let dst = dir.path().join("dst");
fs::create_dir_all(&src).unwrap();
fs::create_dir_all(&dst).unwrap();
fs::write(src.join("a.html"), "from src").unwrap();
fs::write(dst.join("b.html"), "existing").unwrap();
merge_dir(&src, &dst).unwrap();
assert_eq!(fs::read_to_string(dst.join("a.html")).unwrap(), "from src");
assert_eq!(fs::read_to_string(dst.join("b.html")).unwrap(), "existing");
}
#[test]
fn merge_dir_overwrites_on_conflict() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
let dst = dir.path().join("dst");
fs::create_dir_all(&src).unwrap();
fs::create_dir_all(&dst).unwrap();
fs::write(src.join("a.html"), "new").unwrap();
fs::write(dst.join("a.html"), "old").unwrap();
merge_dir(&src, &dst).unwrap();
assert_eq!(fs::read_to_string(dst.join("a.html")).unwrap(), "new");
}
#[test]
fn should_stream_when_explicitly_set() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
let budget = MemoryBudget::default_budget();
assert!(should_stream(&content, &budget, true));
}
#[test]
fn compile_batch_empty_is_noop() {
let dir = tempdir().unwrap();
let result = compile_batch(
&[],
dir.path(),
&dir.path().join("build"),
&dir.path().join("site"),
&dir.path().join("templates"),
0,
);
assert!(result.is_ok());
}
#[test]
fn merge_dir_nonexistent_src_is_noop() {
let dir = tempdir().unwrap();
let result =
merge_dir(&dir.path().join("nonexistent"), &dir.path().join("dst"));
assert!(result.is_ok());
}
#[test]
fn merge_dir_nested() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
let dst = dir.path().join("dst");
let nested = src.join("sub");
fs::create_dir_all(&nested).unwrap();
fs::create_dir_all(&dst).unwrap();
fs::write(nested.join("file.txt"), "nested").unwrap();
merge_dir(&src, &dst).unwrap();
assert_eq!(
fs::read_to_string(dst.join("sub/file.txt")).unwrap(),
"nested"
);
}
#[test]
fn should_stream_large_site() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
let budget = MemoryBudget {
max_bytes: 0,
batch_size: 2,
};
for i in 0..5 {
fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
}
assert!(should_stream(&content, &budget, false));
}
#[test]
fn should_not_stream_small_site() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(content.join("index.md"), "# Home").unwrap();
let budget = MemoryBudget::default_budget();
assert!(!should_stream(&content, &budget, false));
}
#[test]
fn memory_budget_from_mb_one() {
let budget = MemoryBudget::from_mb(1);
assert_eq!(budget.max_bytes, 1024 * 1024);
assert_eq!(budget.batch_size, 16);
}
#[test]
fn memory_budget_from_mb_very_large() {
let budget = MemoryBudget::from_mb(4096);
assert_eq!(budget.max_bytes, 4096 * 1024 * 1024);
assert_eq!(budget.batch_size, 65536);
}
#[test]
fn memory_budget_batch_size_floor_is_ten() {
let budget = MemoryBudget::from_mb(0);
assert_eq!(budget.max_bytes, 0);
assert_eq!(budget.batch_size, 10);
}
#[test]
fn memory_budget_default_budget_matches_constant() {
let budget = MemoryBudget::default_budget();
assert_eq!(budget.max_bytes, DEFAULT_MEMORY_BUDGET_MB * 1024 * 1024);
assert_eq!(
budget.batch_size,
MemoryBudget::from_mb(DEFAULT_MEMORY_BUDGET_MB).batch_size
);
}
#[test]
fn memory_budget_clone_copy_debug() {
let a = MemoryBudget::from_mb(128);
let b = a; assert_eq!(a.max_bytes, b.max_bytes);
assert_eq!(a.batch_size, b.batch_size);
let debug = format!("{a:?}");
assert!(debug.contains("MemoryBudget"));
}
#[test]
fn batched_content_files_nonexistent_dir_returns_empty() {
let dir = tempdir().unwrap();
let budget = MemoryBudget::from_mb(512);
let result =
batched_content_files(&dir.path().join("nonexistent"), &budget);
if let Ok(batches) = result {
assert!(batches.is_empty());
}
}
#[test]
fn batched_content_files_single_file() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(content.join("index.md"), "# Home").unwrap();
let budget = MemoryBudget::from_mb(512);
let batches = batched_content_files(&content, &budget).unwrap();
assert_eq!(batches.len(), 1);
assert_eq!(batches[0].len(), 1);
}
#[test]
fn batched_content_files_ignores_non_md() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
fs::write(content.join("page.md"), "# Page").unwrap();
fs::write(content.join("image.png"), "fakepng").unwrap();
fs::write(content.join("style.css"), "body{}").unwrap();
let budget = MemoryBudget::from_mb(512);
let batches = batched_content_files(&content, &budget).unwrap();
let total: usize = batches.iter().map(|b| b.len()).sum();
assert_eq!(total, 1, "only .md files should be collected");
}
#[test]
fn batched_content_files_exact_batch_boundary() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
for i in 0..10 {
fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
}
let budget = MemoryBudget {
max_bytes: 0,
batch_size: 10,
};
let batches = batched_content_files(&content, &budget).unwrap();
assert_eq!(batches.len(), 1);
assert_eq!(batches[0].len(), 10);
}
#[test]
fn batched_content_files_many_small_batches() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
for i in 0..7 {
fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
}
let budget = MemoryBudget {
max_bytes: 0,
batch_size: 2,
};
let batches = batched_content_files(&content, &budget).unwrap();
assert_eq!(batches.len(), 4); assert_eq!(batches[3].len(), 1);
}
#[test]
fn batched_content_files_nested_directories() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(content.join("blog")).unwrap();
fs::create_dir_all(content.join("docs")).unwrap();
fs::write(content.join("index.md"), "# Index").unwrap();
fs::write(content.join("blog/post.md"), "# Post").unwrap();
fs::write(content.join("docs/api.md"), "# API").unwrap();
let budget = MemoryBudget::from_mb(512);
let batches = batched_content_files(&content, &budget).unwrap();
let total: usize = batches.iter().map(|b| b.len()).sum();
assert_eq!(total, 3);
}
#[test]
fn merge_dir_deeply_nested() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
let dst = dir.path().join("dst");
fs::create_dir_all(src.join("a/b/c")).unwrap();
fs::create_dir_all(&dst).unwrap();
fs::write(src.join("a/b/c/deep.txt"), "deep content").unwrap();
merge_dir(&src, &dst).unwrap();
assert_eq!(
fs::read_to_string(dst.join("a/b/c/deep.txt")).unwrap(),
"deep content"
);
}
#[test]
fn merge_dir_empty_src() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
let dst = dir.path().join("dst");
fs::create_dir_all(&src).unwrap();
fs::create_dir_all(&dst).unwrap();
fs::write(dst.join("existing.txt"), "keep").unwrap();
merge_dir(&src, &dst).unwrap();
assert_eq!(
fs::read_to_string(dst.join("existing.txt")).unwrap(),
"keep"
);
}
#[test]
fn merge_dir_multiple_files() {
let dir = tempdir().unwrap();
let src = dir.path().join("src");
let dst = dir.path().join("dst");
fs::create_dir_all(&src).unwrap();
fs::create_dir_all(&dst).unwrap();
for i in 0..5 {
fs::write(src.join(format!("f{i}.txt")), format!("data{i}"))
.unwrap();
}
merge_dir(&src, &dst).unwrap();
for i in 0..5 {
assert_eq!(
fs::read_to_string(dst.join(format!("f{i}.txt"))).unwrap(),
format!("data{i}")
);
}
}
#[test]
fn should_stream_with_no_content_dir() {
let dir = tempdir().unwrap();
let budget = MemoryBudget::from_mb(512);
assert!(!should_stream(
&dir.path().join("no-content"),
&budget,
false
));
}
#[test]
fn should_stream_explicitly_set_overrides_count() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
let budget = MemoryBudget::from_mb(512);
assert!(should_stream(&content, &budget, true));
}
#[test]
fn should_stream_exactly_at_batch_boundary() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
let budget = MemoryBudget {
max_bytes: 0,
batch_size: 3,
};
for i in 0..3 {
fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
}
assert!(!should_stream(&content, &budget, false));
}
#[test]
fn should_stream_one_over_boundary() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
fs::create_dir_all(&content).unwrap();
let budget = MemoryBudget {
max_bytes: 0,
batch_size: 3,
};
for i in 0..4 {
fs::write(content.join(format!("p{i}.md")), "# Hi").unwrap();
}
assert!(should_stream(&content, &budget, false));
}
#[test]
fn compile_batch_with_nonexistent_files_still_creates_dirs() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
let build = dir.path().join("build");
let site = dir.path().join("site");
let templates = dir.path().join("templates");
fs::create_dir_all(&content).unwrap();
let result = compile_batch(
&[content.join("nonexistent.md")],
&content,
&build,
&site,
&templates,
0,
);
let _ = result;
}
#[test]
fn compile_batch_creates_batch_content_dir() {
let dir = tempdir().unwrap();
let content = dir.path().join("content");
let build = dir.path().join("build");
let site = dir.path().join("site");
let templates = dir.path().join("templates");
fs::create_dir_all(&content).unwrap();
fs::create_dir_all(&templates).unwrap();
fs::write(content.join("page.md"), "---\ntitle: T\n---\n# Hi").unwrap();
let _result = compile_batch(
&[content.join("page.md")],
&content,
&build,
&site,
&templates,
42,
);
}
}