1use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8use crate::frontmatter::Frontmatter;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
12#[serde(rename_all = "lowercase")]
13pub enum ContentType {
14 Markdown,
16 Typst,
18}
19
20impl ContentType {
21 pub fn from_extension(ext: &str) -> Option<Self> {
23 match ext.to_lowercase().as_str() {
24 "md" | "markdown" => Some(Self::Markdown),
25 "typ" | "typst" => Some(Self::Typst),
26 _ => None,
27 }
28 }
29
30 pub fn extension(&self) -> &'static str {
32 match self {
33 Self::Markdown => "md",
34 Self::Typst => "typ",
35 }
36 }
37}
38
39#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct ContentPath {
42 pub path: PathBuf,
44
45 pub lang: Option<String>,
47
48 pub slug: String,
50
51 pub content_type: ContentType,
53}
54
55impl ContentPath {
56 pub fn from_path(path: &Path, default_lang: &str) -> Option<Self> {
64 let extension = path.extension()?.to_str()?;
65 let content_type = ContentType::from_extension(extension)?;
66
67 let stem = path.file_stem()?.to_str()?;
68
69 let (base_stem, lang) = if let Some(dot_pos) = stem.rfind('.') {
71 let potential_lang = &stem[dot_pos + 1..];
72 if potential_lang.len() >= 2
74 && potential_lang.len() <= 3
75 && potential_lang.chars().all(|c| c.is_ascii_lowercase())
76 {
77 let lang = if potential_lang == default_lang {
78 None } else {
80 Some(potential_lang.to_string())
81 };
82 (&stem[..dot_pos], lang)
83 } else {
84 (stem, None)
85 }
86 } else {
87 (stem, None)
88 };
89
90 let parent = path.parent().unwrap_or(Path::new(""));
92 let slug = if base_stem == "index" {
93 parent.to_string_lossy().to_string()
95 } else {
96 if parent.as_os_str().is_empty() {
98 base_stem.to_string()
99 } else {
100 format!("{}/{}", parent.display(), base_stem)
101 }
102 };
103
104 let slug = slug.trim_matches('/').to_string();
106
107 Some(Self {
108 path: path.to_path_buf(),
109 lang,
110 slug,
111 content_type,
112 })
113 }
114
115 pub fn url_path(&self) -> String {
117 if let Some(ref lang) = self.lang {
118 format!("/{}/{}", lang, self.slug)
119 } else {
120 format!("/{}", self.slug)
121 }
122 }
123}
124
125#[derive(Debug, Clone)]
127pub struct ParsedContent {
128 pub frontmatter: Frontmatter,
130
131 pub html: String,
133
134 pub raw: String,
136
137 pub toc: Vec<TocEntry>,
139}
140
141#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct TocEntry {
144 pub level: u8,
146
147 pub text: String,
149
150 pub id: String,
152}
153
154#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct Page {
157 pub url: String,
159
160 pub title: String,
162
163 #[serde(default)]
165 pub description: Option<String>,
166
167 #[serde(default)]
169 pub date: Option<DateTime<Utc>>,
170
171 #[serde(default)]
173 pub updated: Option<DateTime<Utc>>,
174
175 #[serde(default)]
177 pub draft: bool,
178
179 #[serde(default)]
181 pub lang: Option<String>,
182
183 #[serde(default)]
185 pub tags: Vec<String>,
186
187 #[serde(default)]
189 pub categories: Vec<String>,
190
191 pub content: String,
193
194 #[serde(default)]
196 pub summary: Option<String>,
197
198 #[serde(default)]
200 pub reading_time: Option<u32>,
201
202 #[serde(default)]
204 pub word_count: Option<u32>,
205
206 #[serde(default)]
208 pub toc: Vec<TocEntry>,
209
210 #[serde(default)]
212 pub custom_js: Vec<String>,
213
214 #[serde(default)]
216 pub custom_css: Vec<String>,
217
218 #[serde(default)]
220 pub aliases: Vec<String>,
221
222 #[serde(default)]
224 pub template: Option<String>,
225
226 #[serde(default)]
228 pub weight: i32,
229
230 #[serde(default)]
232 pub source_path: Option<PathBuf>,
233}
234
235impl Page {
236 pub fn from_parsed(content: ParsedContent, content_path: &ContentPath) -> Self {
238 let fm = &content.frontmatter;
239
240 let word_count = content.raw.split_whitespace().count() as u32;
242 let reading_time = (word_count / 200).max(1); let summary = fm.description.clone().or_else(|| {
246 let plain_text = strip_html(&content.html);
248 Some(truncate_at_word_boundary(&plain_text, 160))
249 });
250
251 Self {
252 url: content_path.url_path(),
253 title: fm.title.clone(),
254 description: fm.description.clone(),
255 date: fm.date,
256 updated: fm.updated,
257 draft: fm.draft,
258 lang: content_path.lang.clone(),
259 tags: fm.tags.clone(),
260 categories: fm.categories.clone(),
261 content: content.html,
262 summary,
263 reading_time: Some(reading_time),
264 word_count: Some(word_count),
265 toc: content.toc,
266 custom_js: fm.custom_js.clone(),
267 custom_css: fm.custom_css.clone(),
268 aliases: fm.aliases.clone(),
269 template: fm.template.clone(),
270 weight: fm.weight,
271 source_path: Some(content_path.path.clone()),
272 }
273 }
274}
275
276fn strip_html(html: &str) -> String {
278 let mut result = String::new();
279 let mut in_tag = false;
280
281 for c in html.chars() {
282 match c {
283 '<' => in_tag = true,
284 '>' => in_tag = false,
285 _ if !in_tag => result.push(c),
286 _ => {}
287 }
288 }
289
290 result
291}
292
293fn truncate_at_word_boundary(text: &str, max_len: usize) -> String {
295 if text.len() <= max_len {
296 return text.to_string();
297 }
298
299 let truncated = &text[..max_len];
300 if let Some(last_space) = truncated.rfind(' ') {
301 format!("{}...", &truncated[..last_space])
302 } else {
303 format!("{truncated}...")
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310
311 #[test]
312 fn test_content_type_from_extension() {
313 assert_eq!(
314 ContentType::from_extension("md"),
315 Some(ContentType::Markdown)
316 );
317 assert_eq!(
318 ContentType::from_extension("MD"),
319 Some(ContentType::Markdown)
320 );
321 assert_eq!(ContentType::from_extension("typ"), Some(ContentType::Typst));
322 assert_eq!(ContentType::from_extension("txt"), None);
323 }
324
325 #[test]
326 fn test_content_path_simple() {
327 let path = Path::new("posts/hello.md");
328 let cp = ContentPath::from_path(path, "en").expect("parse path");
329
330 assert_eq!(cp.slug, "posts/hello");
331 assert_eq!(cp.lang, None);
332 assert_eq!(cp.content_type, ContentType::Markdown);
333 assert_eq!(cp.url_path(), "/posts/hello");
334 }
335
336 #[test]
337 fn test_content_path_with_language() {
338 let path = Path::new("posts/hello.zh.md");
339 let cp = ContentPath::from_path(path, "en").expect("parse path");
340
341 assert_eq!(cp.slug, "posts/hello");
342 assert_eq!(cp.lang, Some("zh".to_string()));
343 assert_eq!(cp.url_path(), "/zh/posts/hello");
344 }
345
346 #[test]
347 fn test_content_path_default_language() {
348 let path = Path::new("posts/hello.en.md");
349 let cp = ContentPath::from_path(path, "en").expect("parse path");
350
351 assert_eq!(cp.slug, "posts/hello");
353 assert_eq!(cp.lang, None);
354 }
355
356 #[test]
357 fn test_content_path_index_file() {
358 let path = Path::new("posts/hello/index.md");
359 let cp = ContentPath::from_path(path, "en").expect("parse path");
360
361 assert_eq!(cp.slug, "posts/hello");
362 assert_eq!(cp.lang, None);
363 }
364
365 #[test]
366 fn test_content_path_index_with_lang() {
367 let path = Path::new("posts/hello/index.zh.md");
368 let cp = ContentPath::from_path(path, "en").expect("parse path");
369
370 assert_eq!(cp.slug, "posts/hello");
371 assert_eq!(cp.lang, Some("zh".to_string()));
372 }
373
374 #[test]
375 fn test_content_path_typst() {
376 let path = Path::new("docs/guide.typ");
377 let cp = ContentPath::from_path(path, "en").expect("parse path");
378
379 assert_eq!(cp.slug, "docs/guide");
380 assert_eq!(cp.content_type, ContentType::Typst);
381 }
382
383 #[test]
384 fn test_strip_html() {
385 assert_eq!(
386 strip_html("<p>Hello <strong>World</strong></p>"),
387 "Hello World"
388 );
389 assert_eq!(strip_html("No tags here"), "No tags here");
390 }
391
392 #[test]
393 fn test_truncate_at_word_boundary() {
394 let text = "Hello world this is a test";
395 assert_eq!(truncate_at_word_boundary(text, 100), text);
396 assert_eq!(truncate_at_word_boundary(text, 11), "Hello...");
398 assert_eq!(truncate_at_word_boundary(text, 5), "Hello...");
399 assert_eq!(truncate_at_word_boundary(text, 12), "Hello world...");
401 }
402}