use std::{
collections::{BTreeMap, HashMap},
fs::File,
io::Read,
ops::Index,
path::{Path, PathBuf},
};
use futures::StreamExt;
use miette::{Diagnostic, Severity};
use serde::Deserialize;
use thiserror::Error;
use tokio::fs;
use tokio_stream::wrappers::ReadDirStream;
mod links;
mod page;
mod wayback_links;
pub use links::{LinkDest, LinkDestError};
pub use page::{PageKind, PageMetadata, PageSource, SourceFormat};
pub use wayback_links::{WaybackLink, WaybackLinks, WaybackLinksError};
use crate::wayback::Snapshot;
use self::page::PageLoadError;
#[derive(Deserialize, Default)]
pub struct Config {
#[serde(default)]
pub title: String,
pub url: Option<String>,
pub author: Option<String>,
pub author_email: Option<String>,
pub subtitle: Option<String>,
pub posts: Option<PathBuf>,
pub theme: Option<PathBuf>,
#[serde(default)]
pub content: Vec<PathBuf>,
#[serde(default)]
pub macros: HashMap<String, PathBuf>,
#[serde(default)]
pub theme_opts: serde_json::Value,
pub wayback: Option<WaybackConfig>,
}
#[derive(Deserialize, Default)]
pub struct WaybackConfig {
pub snapshots: PathBuf,
pub exclude: Vec<WaybackFilter>,
}
impl WaybackConfig {
pub fn should_exclude_post(&self, post: &PageSource) -> bool {
for filter in &self.exclude {
match filter {
WaybackFilter::Before(date) => {
if let Some(post_date) = post.publish_date() {
if post_date < *date {
return true;
}
}
}
}
}
false
}
}
#[derive(Deserialize, PartialEq, Debug)]
pub enum WaybackFilter {
#[serde(rename = "before")]
Before(
#[serde(deserialize_with = "page::parsing_helpers::deserialize_date")]
chrono::DateTime<chrono::Utc>,
),
}
#[non_exhaustive]
#[derive(Diagnostic, Error, Debug)]
pub enum IndexError {
#[error("reading directory entry")]
ReadingDirectoryEntry(#[source] std::io::Error),
#[error("reading directory entry")]
WalkdirReadingDirectoryEntry(#[source] walkdir::Error),
#[error("invalid post filename: `{}`", .0.display())]
InvalidFilename(PathBuf),
#[error("reading post contents")]
ReadingPostContents(#[source] std::io::Error),
#[error("reading Site.toml")]
ReadingConfigFile(#[source] std::io::Error),
#[error("parsing Site.toml")]
ParsingConfigFile(#[source] Box<dyn std::error::Error + Send + Sync>),
}
#[derive(Default)]
pub struct SiteIndex {
config: Config,
root_dir: PathBuf,
pages: Vec<PageSource>,
raw_files: Vec<PathBuf>,
categories: BTreeMap<String, Category>,
}
impl SiteIndex {
pub async fn from_directory(
path: impl Into<PathBuf>,
include_unpublished: bool,
) -> Result<Self, IndexError> {
let root_dir = path.into();
let config: Config = toml::from_str(
&std::fs::read_to_string(root_dir.join("Site.toml"))
.map_err(IndexError::ReadingConfigFile)?,
)
.map_err(|e| IndexError::ParsingConfigFile(Box::new(e)))?;
let mut pages = vec![];
let mut raw_files = Vec::new();
pages.extend(
load_posts(
&root_dir.join(config.posts.as_ref().unwrap_or(&"_posts".into())),
&root_dir,
include_unpublished,
)
.await?,
);
for path in config.content.iter() {
match load_directory(root_dir.join(path), &root_dir, include_unpublished).await? {
(new_pages, files) => {
pages.extend(new_pages.into_iter());
raw_files.extend(files.into_iter());
}
}
}
let mut categories = BTreeMap::new();
for (id, page) in pages.iter().enumerate() {
match page.categories() {
Some(page_categories) => {
for category in page_categories {
categories
.entry(category.to_string())
.or_insert_with(|| Category {
name: category.to_string(),
posts: vec![],
})
.posts
.push(PageId(id));
}
}
None => (),
}
}
Ok(SiteIndex {
config,
root_dir,
pages,
raw_files,
categories,
})
}
pub fn config(&self) -> &Config {
&self.config
}
pub fn posts(&self) -> impl Iterator<Item = &PageSource> {
self.pages
.iter()
.filter(|post| post.kind() == PageKind::Post)
}
pub fn all_pages(&self) -> impl Iterator<Item = &PageSource> {
self.pages.iter()
}
pub fn find_page_by_source_path(&self, path: &Path) -> Option<&PageSource> {
self.pages.iter().find(|page| page.source_path() == path)
}
pub fn add_page(&mut self, page: PageSource) {
self.pages.push(page);
}
pub fn load_wayback_snapshot(&self) -> Result<Option<Snapshot>, WaybackError> {
let Some(WaybackConfig {
snapshots: ref src,
exclude: _,
}) = self.config().wayback
else {
return Ok(None);
};
let mut buf = <_>::default();
File::open(src)?.read_to_string(&mut buf)?;
let snapshot = toml::from_str(&buf)?;
Ok(Some(snapshot))
}
pub fn categories(&self) -> impl Iterator<Item = &Category> {
self.categories.values()
}
pub fn find_pages_in_category(&self, category: &str) -> impl Iterator<Item = &PageSource> {
self.categories
.get(category)
.into_iter()
.flat_map(|cat| cat.posts.iter())
.map(|id| &self.pages[id.0])
}
}
#[derive(Debug, Diagnostic, Error)]
pub enum WaybackError {
#[error("reading snapshot file")]
FileRead(
#[source]
#[from]
std::io::Error,
),
#[error("parsing snapshot file")]
ParseError(
#[source]
#[from]
toml::de::Error,
),
}
impl Index<PageId> for SiteIndex {
type Output = PageSource;
fn index(&self, id: PageId) -> &Self::Output {
&self.pages[id.0]
}
}
pub trait SiteMetadata {
fn config(&self) -> &Config;
fn base_url(&self) -> &str; fn title(&self) -> &str;
fn subtitle(&self) -> Option<&str>;
fn author(&self) -> Option<&str>;
fn author_email(&self) -> Option<&str>;
fn root_dir(&self) -> &PathBuf;
fn num_pages(&self) -> usize;
fn raw_files(&self) -> impl Iterator<Item = &Path>
where
Self: Sized;
}
impl SiteMetadata for SiteIndex {
fn base_url(&self) -> &str {
match &self.config.url {
Some(url) => url,
None => "",
}
}
fn title(&self) -> &str {
&self.config.title
}
fn subtitle(&self) -> Option<&str> {
self.config.subtitle.as_deref()
}
fn author(&self) -> Option<&str> {
self.config.author.as_deref()
}
fn author_email(&self) -> Option<&str> {
self.config.author_email.as_deref()
}
fn config(&self) -> &Config {
&self.config
}
fn root_dir(&self) -> &PathBuf {
&self.root_dir
}
fn num_pages(&self) -> usize {
self.pages.len()
}
fn raw_files(&self) -> impl Iterator<Item = &Path> {
self.raw_files.iter().map(AsRef::as_ref)
}
}
#[derive(Debug, Diagnostic, Error)]
#[diagnostic(severity(warning))]
#[error("skipping post with filename `{filename}`")]
struct SkippedPost {
#[source_code]
filename: String,
#[diagnostic_source]
reason: PageLoadError,
}
async fn load_posts(
path: &Path,
root_dir: &Path,
include_unpublished: bool,
) -> Result<Vec<PageSource>, IndexError> {
if !path.is_dir() {
return Ok(vec![]);
}
let mut posts = vec![];
let mut dir_stream = ReadDirStream::new(
fs::read_dir(path)
.await
.map_err(IndexError::ReadingDirectoryEntry)?,
);
while let Some(entry) = dir_stream.next().await {
let entry = entry.map_err(IndexError::ReadingDirectoryEntry)?;
let entry_path = entry.path();
let metadata = entry
.metadata()
.await
.map_err(IndexError::ReadingDirectoryEntry)?;
let file_to_load = if metadata.is_dir() {
let index_file = entry_path.join("index.md");
match fs::try_exists(&index_file).await {
Ok(true) => index_file,
_ => continue, }
} else {
if entry_path
.file_name()
.and_then(|s| s.to_str())
.map_or(false, |s| s.ends_with(".wayback.toml"))
{
continue;
}
entry_path
};
let page = match PageSource::from_file(file_to_load, root_dir).await {
Ok(page) => page,
Err(e) if e.severity() <= Some(Severity::Warning) => {
println!(
"{:?}",
miette::Report::new(SkippedPost {
filename: entry.path().display().to_string(),
reason: e,
})
);
continue;
}
Err(e) => panic!("I don't know what to do with this error: {}", e),
};
if page.published() || include_unpublished {
posts.push(page)
}
}
Ok(posts)
}
async fn load_directory(
path: impl AsRef<Path>,
root_dir: &Path,
include_unpublished: bool,
) -> Result<(Vec<PageSource>, Vec<PathBuf>), IndexError> {
let path = path.as_ref();
let mut pages = vec![];
let mut raw_files = vec![];
if path.is_file() {
if let Ok(page) = PageSource::from_file(path, root_dir).await {
if page.published() || include_unpublished {
return Ok((vec![page], vec![]));
} else {
return Ok((vec![], vec![]));
}
} else {
return Ok((vec![], vec![path.into()]));
}
}
let walk = walkdir::WalkDir::new(path);
for result in walk {
let entry = result.map_err(IndexError::WalkdirReadingDirectoryEntry)?;
if !entry.file_type().is_file() {
continue;
}
let filename = entry.path();
if let Ok(page) = PageSource::from_file(&filename, root_dir).await {
if page.published() || include_unpublished {
pages.push(page)
}
} else {
raw_files.push(filename.to_path_buf())
}
}
Ok((pages, raw_files))
}
pub struct PageId(usize);
pub struct Category {
pub(crate) name: String,
posts: Vec<PageId>,
}
impl Category {
pub fn slug(&self) -> String {
slug::slugify(&self.name)
}
pub fn url_path(&self) -> String {
format!("/blog/category/{}/", self.slug())
}
pub fn full_url(&self, base_url: &str) -> String {
format!("{}/blog/category/{}/", base_url, self.slug())
}
}
#[cfg(test)]
mod test {
use std::path::Path;
use chrono::{DateTime, Utc};
use miette::IntoDiagnostic;
use crate::index::WaybackFilter;
use super::Config;
#[test]
fn parse_site_config() {
let config = r#"url = "https://example.com"
title = "example site"
"#;
let config: Config = toml::from_str(config).unwrap();
assert_eq!(config.url, Some("https://example.com".to_string()));
}
#[test]
fn parse_wayback_config() -> miette::Result<()> {
let config = r#"
[wayback]
snapshots = "wayback.toml"
exclude = [{ before = "2024-01-01" }]
"#;
let config: super::Config = toml::from_str(config).into_diagnostic()?;
let wayback = config.wayback.unwrap();
assert_eq!(wayback.snapshots.as_path(), Path::new("wayback.toml"));
assert_eq!(wayback.exclude.len(), 1);
let date = DateTime::parse_from_rfc3339("2024-01-01T00:00:00Z")
.into_diagnostic()?
.with_timezone(&Utc);
assert_eq!(wayback.exclude[0], WaybackFilter::Before(date));
Ok(())
}
}