calepin 0.0.19

A Rust CLI for preprocessing Typst documents with executable code chunks
use std::collections::BTreeSet;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::OnceLock;

use anyhow::{anyhow, Context, Result};
use xxhash_rust::xxh3::xxh3_64;

use super::paths::rel_posix;
use super::{PageInfoMap, WebsiteManifest};

pub(super) const PAGEFIND_DIR: &str = "pagefind";
pub(super) const PAGEFIND_CSS: &str = "pagefind/pagefind-component-ui.css";
pub(super) const PAGEFIND_JS: &str = "pagefind/pagefind-component-ui.js";
const PAGEFIND_ROOT_SELECTOR: &str = "[data-pagefind-body]";

static PAGEFIND_RUNTIME: OnceLock<std::result::Result<tokio::runtime::Runtime, String>> =
    OnceLock::new();

pub(super) fn pagefind_pages(
    out_dir: &Path,
    typ_files: &[PathBuf],
    page_info: &PageInfoMap,
    fallback_files: &[PathBuf],
    base_url: Option<&str>,
) -> Vec<(PathBuf, String)> {
    typ_files
        .iter()
        .filter(|path| !fallback_files.contains(path))
        .filter_map(|path| {
            page_info.get(path).map(|info| {
                (
                    out_dir.join(&info.href),
                    pagefind_page_url(base_url, &info.href),
                )
            })
        })
        .collect()
}

pub(super) fn pagefind_page_url(base_url: Option<&str>, href: &str) -> String {
    match base_url.and_then(base_url_path_prefix) {
        Some(prefix) => format!("{prefix}/{href}"),
        None => format!("/{href}"),
    }
}

pub(super) fn base_url_path_prefix(base_url: &str) -> Option<String> {
    let after_host = base_url
        .split_once("://")
        .map(|(_, rest)| rest)
        .unwrap_or(base_url);
    let path = after_host
        .find('/')
        .map(|index| &after_host[index..])
        .unwrap_or("");
    let path = path
        .split(['?', '#'])
        .next()
        .unwrap_or("")
        .trim_end_matches('/');
    (!path.is_empty()).then(|| path.to_string())
}

pub(super) fn pagefind_signature(out_dir: &Path, pages: &[(PathBuf, String)]) -> Result<u64> {
    let mut bytes = Vec::new();
    bytes.extend_from_slice(b"calepin-pagefind-v1\0");
    bytes.extend_from_slice(PAGEFIND_ROOT_SELECTOR.as_bytes());
    bytes.push(0);
    bytes.extend_from_slice(b"keep_index_url=true\0");
    let mut pages = pages.to_vec();
    pages.sort_by(|left, right| {
        rel_posix(out_dir, &left.0)
            .cmp(&rel_posix(out_dir, &right.0))
            .then_with(|| left.1.cmp(&right.1))
    });
    for (path, url) in pages {
        bytes.extend_from_slice(rel_posix(out_dir, &path).as_bytes());
        bytes.push(0);
        bytes.extend_from_slice(url.as_bytes());
        bytes.push(0);
        let html = fs::read(&path).with_context(|| format!("failed to read {}", path.display()))?;
        bytes.extend_from_slice(&(html.len() as u64).to_le_bytes());
        bytes.extend_from_slice(&html);
        bytes.push(0xff);
    }
    Ok(xxh3_64(&bytes))
}

pub(super) fn manifest_output_paths(out_dir: &Path, outputs: &[String]) -> BTreeSet<PathBuf> {
    outputs.iter().map(|rel| out_dir.join(rel)).collect()
}

pub(super) fn cached_pagefind_outputs(
    out_dir: &Path,
    manifest: &WebsiteManifest,
    signature: u64,
) -> Option<BTreeSet<PathBuf>> {
    let pagefind = manifest.pagefind.as_ref()?;
    if pagefind.signature != signature {
        return None;
    }
    let outputs = manifest_output_paths(out_dir, &pagefind.outputs);
    if outputs.iter().all(|path| path.is_file()) {
        Some(outputs)
    } else {
        None
    }
}

pub(super) fn remove_stale_pagefind_outputs(
    out_dir: &Path,
    manifest: &WebsiteManifest,
    expected_outputs: &BTreeSet<PathBuf>,
) -> Result<()> {
    let Some(pagefind) = manifest.pagefind.as_ref() else {
        return Ok(());
    };
    for path in manifest_output_paths(out_dir, &pagefind.outputs) {
        if expected_outputs.contains(&path) || !path.exists() {
            continue;
        }
        if path.is_file() {
            fs::remove_file(&path)
                .with_context(|| format!("failed to remove stale output {}", path.display()))?;
        }
    }
    Ok(())
}

pub(super) fn write_pagefind_index(
    out_dir: &Path,
    pages: &[(PathBuf, String)],
) -> Result<BTreeSet<PathBuf>> {
    if pages.is_empty() {
        return Ok(BTreeSet::new());
    }
    pagefind_runtime()?.block_on(write_pagefind_index_async(
        out_dir.to_path_buf(),
        pages.to_vec(),
    ))
}

fn pagefind_runtime() -> Result<&'static tokio::runtime::Runtime> {
    PAGEFIND_RUNTIME
        .get_or_init(|| {
            tokio::runtime::Builder::new_multi_thread()
                .enable_all()
                .build()
                .map_err(|error| error.to_string())
        })
        .as_ref()
        .map_err(|error| anyhow!("failed to start Pagefind runtime: {error}"))
}

async fn write_pagefind_index_async(
    out_dir: PathBuf,
    pages: Vec<(PathBuf, String)>,
) -> Result<BTreeSet<PathBuf>> {
    let options = pagefind::options::PagefindServiceConfig::builder()
        .root_selector(PAGEFIND_ROOT_SELECTOR.to_string())
        .keep_index_url(true)
        .build();
    let mut index = pagefind::api::PagefindIndex::new(Some(options))
        .context("failed to initialize Pagefind")?;

    for (path, url) in pages {
        let html = tokio::fs::read_to_string(&path)
            .await
            .with_context(|| format!("failed to read {}", path.display()))?;
        index
            .add_html_file(Some(path.to_string_lossy().into_owned()), Some(url), html)
            .await
            .with_context(|| format!("failed to index {}", path.display()))?;
    }

    let files = index
        .get_files()
        .await
        .context("failed to build Pagefind index")?;
    let mut outputs = BTreeSet::new();
    for file in files {
        let path = pagefind_output_path(&out_dir, &file.filename)?;
        if let Some(parent) = path.parent() {
            tokio::fs::create_dir_all(parent)
                .await
                .with_context(|| format!("failed to create {}", parent.display()))?;
        }
        tokio::fs::write(&path, file.contents)
            .await
            .with_context(|| format!("failed to write {}", path.display()))?;
        outputs.insert(path);
    }
    Ok(outputs)
}

fn pagefind_output_path(out_dir: &Path, rel: &Path) -> Result<PathBuf> {
    if rel.is_absolute()
        || rel
            .components()
            .any(|component| matches!(component, std::path::Component::ParentDir))
    {
        return Err(anyhow!("invalid Pagefind output path: {}", rel.display()));
    }
    Ok(out_dir.join(PAGEFIND_DIR).join(rel))
}