use std::path::{Path, PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use crate::frontmatter::Frontmatter;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ContentType {
Markdown,
Typst,
}
impl ContentType {
pub fn from_extension(ext: &str) -> Option<Self> {
match ext.to_lowercase().as_str() {
"md" | "markdown" => Some(Self::Markdown),
"typ" | "typst" => Some(Self::Typst),
_ => None,
}
}
pub fn extension(&self) -> &'static str {
match self {
Self::Markdown => "md",
Self::Typst => "typ",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ContentPath {
pub path: PathBuf,
pub lang: String,
pub is_default_lang: bool,
pub canonical_id: String,
pub slug: String,
pub content_type: ContentType,
}
impl ContentPath {
pub fn from_path(path: &Path, default_lang: &str) -> Option<Self> {
let extension = path.extension()?.to_str()?;
let content_type = ContentType::from_extension(extension)?;
let stem = path.file_stem()?.to_str()?;
let (base_stem, detected_lang) = if let Some(dot_pos) = stem.rfind('.') {
let potential_lang = &stem[dot_pos + 1..];
if potential_lang.len() >= 2
&& potential_lang.len() <= 3
&& potential_lang.chars().all(|c| c.is_ascii_lowercase())
{
(&stem[..dot_pos], Some(potential_lang.to_string()))
} else {
(stem, None)
}
} else {
(stem, None)
};
let lang = detected_lang.unwrap_or_else(|| default_lang.to_string());
let is_default_lang = lang == default_lang;
let parent = path.parent().unwrap_or(Path::new(""));
let canonical_id = if base_stem == "index" {
parent.to_string_lossy().to_string()
} else {
if parent.as_os_str().is_empty() {
base_stem.to_string()
} else {
format!("{}/{}", parent.display(), base_stem)
}
};
let canonical_id = canonical_id.trim_matches('/').to_string();
let slug = if is_default_lang {
canonical_id.clone()
} else {
format!("{lang}/{canonical_id}")
};
Some(Self {
path: path.to_path_buf(),
lang,
is_default_lang,
canonical_id,
slug,
content_type,
})
}
pub fn url_path(&self) -> String {
format!("/{}", self.slug)
}
}
#[derive(Debug, Clone)]
pub struct ParsedContent {
pub frontmatter: Frontmatter,
pub html: String,
pub raw: String,
pub toc: Vec<TocEntry>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TocEntry {
pub level: u8,
pub text: String,
pub id: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Page {
pub url: String,
pub title: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub date: Option<DateTime<Utc>>,
#[serde(default)]
pub updated: Option<DateTime<Utc>>,
#[serde(default)]
pub draft: bool,
pub lang: String,
#[serde(default)]
pub is_default_lang: bool,
#[serde(default)]
pub canonical_id: String,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default)]
pub categories: Vec<String>,
pub content: String,
#[serde(default)]
pub summary: Option<String>,
#[serde(default)]
pub reading_time: Option<u32>,
#[serde(default)]
pub word_count: Option<u32>,
#[serde(default)]
pub toc: Vec<TocEntry>,
#[serde(default)]
pub custom_js: Vec<String>,
#[serde(default)]
pub custom_css: Vec<String>,
#[serde(default)]
pub aliases: Vec<String>,
#[serde(default)]
pub template: Option<String>,
#[serde(default)]
pub weight: i32,
#[serde(default)]
pub source_path: Option<PathBuf>,
}
impl Page {
pub fn from_parsed(content: ParsedContent, content_path: &ContentPath) -> Self {
let fm = &content.frontmatter;
let word_count = content.raw.split_whitespace().count() as u32;
let reading_time = (word_count / 200).max(1);
let summary = fm.description.clone().or_else(|| {
let plain_text = strip_html(&content.html);
Some(truncate_at_word_boundary(&plain_text, 160))
});
Self {
url: content_path.url_path(),
title: fm.title.clone(),
description: fm.description.clone(),
date: fm.date,
updated: fm.updated,
draft: fm.draft,
lang: content_path.lang.clone(),
is_default_lang: content_path.is_default_lang,
canonical_id: content_path.canonical_id.clone(),
tags: fm.tags.clone(),
categories: fm.categories.clone(),
content: content.html,
summary,
reading_time: Some(reading_time),
word_count: Some(word_count),
toc: content.toc,
custom_js: fm.custom_js.clone(),
custom_css: fm.custom_css.clone(),
aliases: fm.aliases.clone(),
template: fm.template.clone(),
weight: fm.weight,
source_path: Some(content_path.path.clone()),
}
}
}
fn strip_html(html: &str) -> String {
let mut result = String::new();
let mut in_tag = false;
for c in html.chars() {
match c {
'<' => in_tag = true,
'>' => in_tag = false,
_ if !in_tag => result.push(c),
_ => {}
}
}
result
}
fn truncate_at_word_boundary(text: &str, max_len: usize) -> String {
if text.len() <= max_len {
return text.to_string();
}
let truncated = &text[..max_len];
if let Some(last_space) = truncated.rfind(' ') {
format!("{}...", &truncated[..last_space])
} else {
format!("{truncated}...")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_content_type_from_extension() {
assert_eq!(
ContentType::from_extension("md"),
Some(ContentType::Markdown)
);
assert_eq!(
ContentType::from_extension("MD"),
Some(ContentType::Markdown)
);
assert_eq!(ContentType::from_extension("typ"), Some(ContentType::Typst));
assert_eq!(ContentType::from_extension("txt"), None);
}
#[test]
fn test_content_path_simple() {
let path = Path::new("posts/hello.md");
let cp = ContentPath::from_path(path, "en").expect("parse path");
assert_eq!(cp.lang, "en");
assert!(cp.is_default_lang);
assert_eq!(cp.canonical_id, "posts/hello");
assert_eq!(cp.slug, "posts/hello");
assert_eq!(cp.content_type, ContentType::Markdown);
assert_eq!(cp.url_path(), "/posts/hello");
}
#[test]
fn test_content_path_with_language() {
let path = Path::new("posts/hello.zh.md");
let cp = ContentPath::from_path(path, "en").expect("parse path");
assert_eq!(cp.lang, "zh");
assert!(!cp.is_default_lang);
assert_eq!(cp.canonical_id, "posts/hello");
assert_eq!(cp.slug, "zh/posts/hello");
assert_eq!(cp.url_path(), "/zh/posts/hello");
}
#[test]
fn test_content_path_default_language() {
let path = Path::new("posts/hello.en.md");
let cp = ContentPath::from_path(path, "en").expect("parse path");
assert_eq!(cp.lang, "en");
assert!(cp.is_default_lang);
assert_eq!(cp.canonical_id, "posts/hello");
assert_eq!(cp.slug, "posts/hello");
}
#[test]
fn test_content_path_index_file() {
let path = Path::new("posts/hello/index.md");
let cp = ContentPath::from_path(path, "en").expect("parse path");
assert_eq!(cp.lang, "en");
assert!(cp.is_default_lang);
assert_eq!(cp.canonical_id, "posts/hello");
assert_eq!(cp.slug, "posts/hello");
}
#[test]
fn test_content_path_index_with_lang() {
let path = Path::new("posts/hello/index.zh.md");
let cp = ContentPath::from_path(path, "en").expect("parse path");
assert_eq!(cp.lang, "zh");
assert!(!cp.is_default_lang);
assert_eq!(cp.canonical_id, "posts/hello");
assert_eq!(cp.slug, "zh/posts/hello");
}
#[test]
fn test_content_path_typst() {
let path = Path::new("docs/guide.typ");
let cp = ContentPath::from_path(path, "en").expect("parse path");
assert_eq!(cp.lang, "en");
assert!(cp.is_default_lang);
assert_eq!(cp.canonical_id, "docs/guide");
assert_eq!(cp.slug, "docs/guide");
assert_eq!(cp.content_type, ContentType::Typst);
}
#[test]
fn test_strip_html() {
assert_eq!(
strip_html("<p>Hello <strong>World</strong></p>"),
"Hello World"
);
assert_eq!(strip_html("No tags here"), "No tags here");
}
#[test]
fn test_truncate_at_word_boundary() {
let text = "Hello world this is a test";
assert_eq!(truncate_at_word_boundary(text, 100), text);
assert_eq!(truncate_at_word_boundary(text, 11), "Hello...");
assert_eq!(truncate_at_word_boundary(text, 5), "Hello...");
assert_eq!(truncate_at_word_boundary(text, 12), "Hello world...");
}
}