1use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8use crate::frontmatter::Frontmatter;
9
10#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
12#[serde(rename_all = "lowercase")]
13pub enum ContentType {
14 Markdown,
16 Typst,
18}
19
20impl ContentType {
21 pub fn from_extension(ext: &str) -> Option<Self> {
23 match ext.to_lowercase().as_str() {
24 "md" | "markdown" => Some(Self::Markdown),
25 "typ" | "typst" => Some(Self::Typst),
26 _ => None,
27 }
28 }
29
30 pub fn extension(&self) -> &'static str {
32 match self {
33 Self::Markdown => "md",
34 Self::Typst => "typ",
35 }
36 }
37}
38
39#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct ContentPath {
42 pub path: PathBuf,
44
45 pub lang: String,
47
48 pub is_default_lang: bool,
50
51 pub canonical_id: String,
54
55 pub slug: String,
57
58 pub content_type: ContentType,
60}
61
62impl ContentPath {
63 pub fn from_path(path: &Path, default_lang: &str) -> Option<Self> {
71 let extension = path.extension()?.to_str()?;
72 let content_type = ContentType::from_extension(extension)?;
73
74 let stem = path.file_stem()?.to_str()?;
75
76 let (base_stem, detected_lang) = if let Some(dot_pos) = stem.rfind('.') {
78 let potential_lang = &stem[dot_pos + 1..];
79 if potential_lang.len() >= 2
81 && potential_lang.len() <= 3
82 && potential_lang.chars().all(|c| c.is_ascii_lowercase())
83 {
84 (&stem[..dot_pos], Some(potential_lang.to_string()))
85 } else {
86 (stem, None)
87 }
88 } else {
89 (stem, None)
90 };
91
92 let lang = detected_lang.unwrap_or_else(|| default_lang.to_string());
94 let is_default_lang = lang == default_lang;
95
96 let parent = path.parent().unwrap_or(Path::new(""));
98 let canonical_id = if base_stem == "index" {
99 parent.to_string_lossy().to_string()
101 } else {
102 if parent.as_os_str().is_empty() {
104 base_stem.to_string()
105 } else {
106 format!("{}/{}", parent.display(), base_stem)
107 }
108 };
109
110 let canonical_id = canonical_id.trim_matches('/').to_string();
112
113 let slug = if is_default_lang {
115 canonical_id.clone()
116 } else {
117 format!("{lang}/{canonical_id}")
118 };
119
120 Some(Self {
121 path: path.to_path_buf(),
122 lang,
123 is_default_lang,
124 canonical_id,
125 slug,
126 content_type,
127 })
128 }
129
130 pub fn url_path(&self) -> String {
132 format!("/{}", self.slug)
133 }
134}
135
136#[derive(Debug, Clone)]
138pub struct ParsedContent {
139 pub frontmatter: Frontmatter,
141
142 pub html: String,
144
145 pub raw: String,
147
148 pub toc: Vec<TocEntry>,
150}
151
152#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct TocEntry {
155 pub level: u8,
157
158 pub text: String,
160
161 pub id: String,
163}
164
165#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct Page {
168 pub url: String,
170
171 pub title: String,
173
174 #[serde(default)]
176 pub description: Option<String>,
177
178 #[serde(default)]
180 pub date: Option<DateTime<Utc>>,
181
182 #[serde(default)]
184 pub updated: Option<DateTime<Utc>>,
185
186 #[serde(default)]
188 pub draft: bool,
189
190 pub lang: String,
192
193 #[serde(default)]
195 pub is_default_lang: bool,
196
197 #[serde(default)]
199 pub canonical_id: String,
200
201 #[serde(default)]
203 pub tags: Vec<String>,
204
205 #[serde(default)]
207 pub categories: Vec<String>,
208
209 pub content: String,
211
212 #[serde(default)]
214 pub summary: Option<String>,
215
216 #[serde(default)]
218 pub reading_time: Option<u32>,
219
220 #[serde(default)]
222 pub word_count: Option<u32>,
223
224 #[serde(default)]
226 pub toc: Vec<TocEntry>,
227
228 #[serde(default)]
230 pub custom_js: Vec<String>,
231
232 #[serde(default)]
234 pub custom_css: Vec<String>,
235
236 #[serde(default)]
238 pub aliases: Vec<String>,
239
240 #[serde(default)]
242 pub template: Option<String>,
243
244 #[serde(default)]
246 pub weight: i32,
247
248 #[serde(default)]
250 pub source_path: Option<PathBuf>,
251}
252
253impl Page {
254 pub fn from_parsed(content: ParsedContent, content_path: &ContentPath) -> Self {
256 let fm = &content.frontmatter;
257
258 let word_count = content.raw.split_whitespace().count() as u32;
260 let reading_time = (word_count / 200).max(1); let summary = fm.description.clone().or_else(|| {
264 let plain_text = strip_html(&content.html);
266 Some(truncate_at_word_boundary(&plain_text, 160))
267 });
268
269 Self {
270 url: content_path.url_path(),
271 title: fm.title.clone(),
272 description: fm.description.clone(),
273 date: fm.date,
274 updated: fm.updated,
275 draft: fm.draft,
276 lang: content_path.lang.clone(),
277 is_default_lang: content_path.is_default_lang,
278 canonical_id: content_path.canonical_id.clone(),
279 tags: fm.tags.clone(),
280 categories: fm.categories.clone(),
281 content: content.html,
282 summary,
283 reading_time: Some(reading_time),
284 word_count: Some(word_count),
285 toc: content.toc,
286 custom_js: fm.custom_js.clone(),
287 custom_css: fm.custom_css.clone(),
288 aliases: fm.aliases.clone(),
289 template: fm.template.clone(),
290 weight: fm.weight,
291 source_path: Some(content_path.path.clone()),
292 }
293 }
294}
295
296fn strip_html(html: &str) -> String {
298 let mut result = String::new();
299 let mut in_tag = false;
300
301 for c in html.chars() {
302 match c {
303 '<' => in_tag = true,
304 '>' => in_tag = false,
305 _ if !in_tag => result.push(c),
306 _ => {}
307 }
308 }
309
310 result
311}
312
313fn truncate_at_word_boundary(text: &str, max_len: usize) -> String {
315 if text.len() <= max_len {
316 return text.to_string();
317 }
318
319 let truncated = &text[..max_len];
320 if let Some(last_space) = truncated.rfind(' ') {
321 format!("{}...", &truncated[..last_space])
322 } else {
323 format!("{truncated}...")
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 #[test]
332 fn test_content_type_from_extension() {
333 assert_eq!(
334 ContentType::from_extension("md"),
335 Some(ContentType::Markdown)
336 );
337 assert_eq!(
338 ContentType::from_extension("MD"),
339 Some(ContentType::Markdown)
340 );
341 assert_eq!(ContentType::from_extension("typ"), Some(ContentType::Typst));
342 assert_eq!(ContentType::from_extension("txt"), None);
343 }
344
345 #[test]
346 fn test_content_path_simple() {
347 let path = Path::new("posts/hello.md");
348 let cp = ContentPath::from_path(path, "en").expect("parse path");
349
350 assert_eq!(cp.lang, "en");
351 assert!(cp.is_default_lang);
352 assert_eq!(cp.canonical_id, "posts/hello");
353 assert_eq!(cp.slug, "posts/hello");
354 assert_eq!(cp.content_type, ContentType::Markdown);
355 assert_eq!(cp.url_path(), "/posts/hello");
356 }
357
358 #[test]
359 fn test_content_path_with_language() {
360 let path = Path::new("posts/hello.zh.md");
361 let cp = ContentPath::from_path(path, "en").expect("parse path");
362
363 assert_eq!(cp.lang, "zh");
364 assert!(!cp.is_default_lang);
365 assert_eq!(cp.canonical_id, "posts/hello");
366 assert_eq!(cp.slug, "zh/posts/hello");
367 assert_eq!(cp.url_path(), "/zh/posts/hello");
368 }
369
370 #[test]
371 fn test_content_path_default_language() {
372 let path = Path::new("posts/hello.en.md");
373 let cp = ContentPath::from_path(path, "en").expect("parse path");
374
375 assert_eq!(cp.lang, "en");
377 assert!(cp.is_default_lang);
378 assert_eq!(cp.canonical_id, "posts/hello");
379 assert_eq!(cp.slug, "posts/hello");
380 }
381
382 #[test]
383 fn test_content_path_index_file() {
384 let path = Path::new("posts/hello/index.md");
385 let cp = ContentPath::from_path(path, "en").expect("parse path");
386
387 assert_eq!(cp.lang, "en");
388 assert!(cp.is_default_lang);
389 assert_eq!(cp.canonical_id, "posts/hello");
390 assert_eq!(cp.slug, "posts/hello");
391 }
392
393 #[test]
394 fn test_content_path_index_with_lang() {
395 let path = Path::new("posts/hello/index.zh.md");
396 let cp = ContentPath::from_path(path, "en").expect("parse path");
397
398 assert_eq!(cp.lang, "zh");
399 assert!(!cp.is_default_lang);
400 assert_eq!(cp.canonical_id, "posts/hello");
401 assert_eq!(cp.slug, "zh/posts/hello");
402 }
403
404 #[test]
405 fn test_content_path_typst() {
406 let path = Path::new("docs/guide.typ");
407 let cp = ContentPath::from_path(path, "en").expect("parse path");
408
409 assert_eq!(cp.lang, "en");
410 assert!(cp.is_default_lang);
411 assert_eq!(cp.canonical_id, "docs/guide");
412 assert_eq!(cp.slug, "docs/guide");
413 assert_eq!(cp.content_type, ContentType::Typst);
414 }
415
416 #[test]
417 fn test_strip_html() {
418 assert_eq!(
419 strip_html("<p>Hello <strong>World</strong></p>"),
420 "Hello World"
421 );
422 assert_eq!(strip_html("No tags here"), "No tags here");
423 }
424
425 #[test]
426 fn test_truncate_at_word_boundary() {
427 let text = "Hello world this is a test";
428 assert_eq!(truncate_at_word_boundary(text, 100), text);
429 assert_eq!(truncate_at_word_boundary(text, 11), "Hello...");
431 assert_eq!(truncate_at_word_boundary(text, 5), "Hello...");
432 assert_eq!(truncate_at_word_boundary(text, 12), "Hello world...");
434 }
435}