typstify_core/
content.rs

1//! Content types and structures.
2
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8use crate::frontmatter::Frontmatter;
9
10/// Type of content source.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
12#[serde(rename_all = "lowercase")]
13pub enum ContentType {
14    /// Markdown content (.md files).
15    Markdown,
16    /// Typst content (.typ files).
17    Typst,
18}
19
20impl ContentType {
21    /// Determine content type from file extension.
22    pub fn from_extension(ext: &str) -> Option<Self> {
23        match ext.to_lowercase().as_str() {
24            "md" | "markdown" => Some(Self::Markdown),
25            "typ" | "typst" => Some(Self::Typst),
26            _ => None,
27        }
28    }
29
30    /// Get the file extension for this content type.
31    pub fn extension(&self) -> &'static str {
32        match self {
33            Self::Markdown => "md",
34            Self::Typst => "typ",
35        }
36    }
37}
38
39/// Parsed content path with language and slug extraction.
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct ContentPath {
42    /// Original file path.
43    pub path: PathBuf,
44
45    /// Language code for this content (always set, defaults to site default).
46    pub lang: String,
47
48    /// Whether this is the default language version.
49    pub is_default_lang: bool,
50
51    /// Canonical identifier for translation linking (language-neutral slug).
52    /// Used to group translations: "posts/hello" in both "hello.md" and "hello.zh.md"
53    pub canonical_id: String,
54
55    /// URL slug derived from the path (may include language prefix for non-default).
56    pub slug: String,
57
58    /// Content type based on extension.
59    pub content_type: ContentType,
60}
61
62impl ContentPath {
63    /// Parse a content path to extract language and slug.
64    ///
65    /// Supports patterns like:
66    /// - `posts/hello.md` → lang: "en" (default), canonical_id: "posts/hello", slug: "posts/hello"
67    /// - `posts/hello.zh.md` → lang: "zh", canonical_id: "posts/hello", slug: "zh/posts/hello"
68    /// - `posts/hello/index.md` → lang: "en" (default), canonical_id: "posts/hello", slug: "posts/hello"
69    /// - `posts/hello/index.zh.md` → lang: "zh", canonical_id: "posts/hello", slug: "zh/posts/hello"
70    pub fn from_path(path: &Path, default_lang: &str) -> Option<Self> {
71        let extension = path.extension()?.to_str()?;
72        let content_type = ContentType::from_extension(extension)?;
73
74        let stem = path.file_stem()?.to_str()?;
75
76        // Check for language suffix in filename (e.g., "index.zh" or "hello.zh")
77        let (base_stem, detected_lang) = if let Some(dot_pos) = stem.rfind('.') {
78            let potential_lang = &stem[dot_pos + 1..];
79            // Check if it looks like a language code (2-3 chars, lowercase alpha)
80            if potential_lang.len() >= 2
81                && potential_lang.len() <= 3
82                && potential_lang.chars().all(|c| c.is_ascii_lowercase())
83            {
84                (&stem[..dot_pos], Some(potential_lang.to_string()))
85            } else {
86                (stem, None)
87            }
88        } else {
89            (stem, None)
90        };
91
92        // Determine final language and whether it's the default
93        let lang = detected_lang.unwrap_or_else(|| default_lang.to_string());
94        let is_default_lang = lang == default_lang;
95
96        // Build the canonical_id (language-neutral) from the path
97        let parent = path.parent().unwrap_or(Path::new(""));
98        let canonical_id = if base_stem == "index" {
99            // For index files, use the parent directory as the canonical id
100            parent.to_string_lossy().to_string()
101        } else {
102            // For regular files, combine parent and stem
103            if parent.as_os_str().is_empty() {
104                base_stem.to_string()
105            } else {
106                format!("{}/{}", parent.display(), base_stem)
107            }
108        };
109
110        // Normalize canonical_id: remove leading/trailing slashes
111        let canonical_id = canonical_id.trim_matches('/').to_string();
112
113        // Build the URL slug (includes language prefix for non-default languages)
114        let slug = if is_default_lang {
115            canonical_id.clone()
116        } else {
117            format!("{lang}/{canonical_id}")
118        };
119
120        Some(Self {
121            path: path.to_path_buf(),
122            lang,
123            is_default_lang,
124            canonical_id,
125            slug,
126            content_type,
127        })
128    }
129
130    /// Get the URL path for this content.
131    pub fn url_path(&self) -> String {
132        format!("/{}", self.slug)
133    }
134}
135
136/// Parsed content with metadata and rendered HTML.
137#[derive(Debug, Clone)]
138pub struct ParsedContent {
139    /// Parsed frontmatter metadata.
140    pub frontmatter: Frontmatter,
141
142    /// Rendered HTML content.
143    pub html: String,
144
145    /// Raw source content (without frontmatter).
146    pub raw: String,
147
148    /// Table of contents extracted from headings.
149    pub toc: Vec<TocEntry>,
150}
151
152/// Table of contents entry.
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct TocEntry {
155    /// Heading level (1-6).
156    pub level: u8,
157
158    /// Heading text.
159    pub text: String,
160
161    /// Anchor ID for linking.
162    pub id: String,
163}
164
165/// A fully processed page ready for rendering.
166#[derive(Debug, Clone, Serialize, Deserialize)]
167pub struct Page {
168    /// URL path for this page.
169    pub url: String,
170
171    /// Page title.
172    pub title: String,
173
174    /// Page description/summary.
175    #[serde(default)]
176    pub description: Option<String>,
177
178    /// Publication date.
179    #[serde(default)]
180    pub date: Option<DateTime<Utc>>,
181
182    /// Last updated date.
183    #[serde(default)]
184    pub updated: Option<DateTime<Utc>>,
185
186    /// Whether this is a draft.
187    #[serde(default)]
188    pub draft: bool,
189
190    /// Language code for this page.
191    pub lang: String,
192
193    /// Whether this is the default language version.
194    #[serde(default)]
195    pub is_default_lang: bool,
196
197    /// Canonical identifier for translation linking (language-neutral).
198    #[serde(default)]
199    pub canonical_id: String,
200
201    /// Tags for this page.
202    #[serde(default)]
203    pub tags: Vec<String>,
204
205    /// Categories for this page.
206    #[serde(default)]
207    pub categories: Vec<String>,
208
209    /// Rendered HTML content.
210    pub content: String,
211
212    /// Summary/excerpt for listings.
213    #[serde(default)]
214    pub summary: Option<String>,
215
216    /// Reading time in minutes.
217    #[serde(default)]
218    pub reading_time: Option<u32>,
219
220    /// Word count.
221    #[serde(default)]
222    pub word_count: Option<u32>,
223
224    /// Table of contents.
225    #[serde(default)]
226    pub toc: Vec<TocEntry>,
227
228    /// Custom JavaScript files to include.
229    #[serde(default)]
230    pub custom_js: Vec<String>,
231
232    /// Custom CSS files to include.
233    #[serde(default)]
234    pub custom_css: Vec<String>,
235
236    /// URL aliases for redirects.
237    #[serde(default)]
238    pub aliases: Vec<String>,
239
240    /// Template to use for rendering.
241    #[serde(default)]
242    pub template: Option<String>,
243
244    /// Sort weight for ordering.
245    #[serde(default)]
246    pub weight: i32,
247
248    /// Source file path.
249    #[serde(default)]
250    pub source_path: Option<PathBuf>,
251}
252
253impl Page {
254    /// Create a new page from parsed content and content path.
255    pub fn from_parsed(content: ParsedContent, content_path: &ContentPath) -> Self {
256        let fm = &content.frontmatter;
257
258        // Calculate word count and reading time
259        let word_count = content.raw.split_whitespace().count() as u32;
260        let reading_time = (word_count / 200).max(1); // Assume 200 WPM
261
262        // Generate summary if not provided
263        let summary = fm.description.clone().or_else(|| {
264            // Take first paragraph or first 160 chars
265            let plain_text = strip_html(&content.html);
266            Some(truncate_at_word_boundary(&plain_text, 160))
267        });
268
269        Self {
270            url: content_path.url_path(),
271            title: fm.title.clone(),
272            description: fm.description.clone(),
273            date: fm.date,
274            updated: fm.updated,
275            draft: fm.draft,
276            lang: content_path.lang.clone(),
277            is_default_lang: content_path.is_default_lang,
278            canonical_id: content_path.canonical_id.clone(),
279            tags: fm.tags.clone(),
280            categories: fm.categories.clone(),
281            content: content.html,
282            summary,
283            reading_time: Some(reading_time),
284            word_count: Some(word_count),
285            toc: content.toc,
286            custom_js: fm.custom_js.clone(),
287            custom_css: fm.custom_css.clone(),
288            aliases: fm.aliases.clone(),
289            template: fm.template.clone(),
290            weight: fm.weight,
291            source_path: Some(content_path.path.clone()),
292        }
293    }
294}
295
296/// Strip HTML tags from content.
297fn strip_html(html: &str) -> String {
298    let mut result = String::new();
299    let mut in_tag = false;
300
301    for c in html.chars() {
302        match c {
303            '<' => in_tag = true,
304            '>' => in_tag = false,
305            _ if !in_tag => result.push(c),
306            _ => {}
307        }
308    }
309
310    result
311}
312
313/// Truncate text at word boundary.
314fn truncate_at_word_boundary(text: &str, max_len: usize) -> String {
315    if text.len() <= max_len {
316        return text.to_string();
317    }
318
319    let truncated = &text[..max_len];
320    if let Some(last_space) = truncated.rfind(' ') {
321        format!("{}...", &truncated[..last_space])
322    } else {
323        format!("{truncated}...")
324    }
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330
331    #[test]
332    fn test_content_type_from_extension() {
333        assert_eq!(
334            ContentType::from_extension("md"),
335            Some(ContentType::Markdown)
336        );
337        assert_eq!(
338            ContentType::from_extension("MD"),
339            Some(ContentType::Markdown)
340        );
341        assert_eq!(ContentType::from_extension("typ"), Some(ContentType::Typst));
342        assert_eq!(ContentType::from_extension("txt"), None);
343    }
344
345    #[test]
346    fn test_content_path_simple() {
347        let path = Path::new("posts/hello.md");
348        let cp = ContentPath::from_path(path, "en").expect("parse path");
349
350        assert_eq!(cp.lang, "en");
351        assert!(cp.is_default_lang);
352        assert_eq!(cp.canonical_id, "posts/hello");
353        assert_eq!(cp.slug, "posts/hello");
354        assert_eq!(cp.content_type, ContentType::Markdown);
355        assert_eq!(cp.url_path(), "/posts/hello");
356    }
357
358    #[test]
359    fn test_content_path_with_language() {
360        let path = Path::new("posts/hello.zh.md");
361        let cp = ContentPath::from_path(path, "en").expect("parse path");
362
363        assert_eq!(cp.lang, "zh");
364        assert!(!cp.is_default_lang);
365        assert_eq!(cp.canonical_id, "posts/hello");
366        assert_eq!(cp.slug, "zh/posts/hello");
367        assert_eq!(cp.url_path(), "/zh/posts/hello");
368    }
369
370    #[test]
371    fn test_content_path_default_language() {
372        let path = Path::new("posts/hello.en.md");
373        let cp = ContentPath::from_path(path, "en").expect("parse path");
374
375        // Default language should still be tracked as default
376        assert_eq!(cp.lang, "en");
377        assert!(cp.is_default_lang);
378        assert_eq!(cp.canonical_id, "posts/hello");
379        assert_eq!(cp.slug, "posts/hello");
380    }
381
382    #[test]
383    fn test_content_path_index_file() {
384        let path = Path::new("posts/hello/index.md");
385        let cp = ContentPath::from_path(path, "en").expect("parse path");
386
387        assert_eq!(cp.lang, "en");
388        assert!(cp.is_default_lang);
389        assert_eq!(cp.canonical_id, "posts/hello");
390        assert_eq!(cp.slug, "posts/hello");
391    }
392
393    #[test]
394    fn test_content_path_index_with_lang() {
395        let path = Path::new("posts/hello/index.zh.md");
396        let cp = ContentPath::from_path(path, "en").expect("parse path");
397
398        assert_eq!(cp.lang, "zh");
399        assert!(!cp.is_default_lang);
400        assert_eq!(cp.canonical_id, "posts/hello");
401        assert_eq!(cp.slug, "zh/posts/hello");
402    }
403
404    #[test]
405    fn test_content_path_typst() {
406        let path = Path::new("docs/guide.typ");
407        let cp = ContentPath::from_path(path, "en").expect("parse path");
408
409        assert_eq!(cp.lang, "en");
410        assert!(cp.is_default_lang);
411        assert_eq!(cp.canonical_id, "docs/guide");
412        assert_eq!(cp.slug, "docs/guide");
413        assert_eq!(cp.content_type, ContentType::Typst);
414    }
415
416    #[test]
417    fn test_strip_html() {
418        assert_eq!(
419            strip_html("<p>Hello <strong>World</strong></p>"),
420            "Hello World"
421        );
422        assert_eq!(strip_html("No tags here"), "No tags here");
423    }
424
425    #[test]
426    fn test_truncate_at_word_boundary() {
427        let text = "Hello world this is a test";
428        assert_eq!(truncate_at_word_boundary(text, 100), text);
429        // max_len=11 gives "Hello world", last space at pos 5, so "Hello..."
430        assert_eq!(truncate_at_word_boundary(text, 11), "Hello...");
431        assert_eq!(truncate_at_word_boundary(text, 5), "Hello...");
432        // max_len=12 gives "Hello world ", last space at pos 11, so "Hello world..."
433        assert_eq!(truncate_at_word_boundary(text, 12), "Hello world...");
434    }
435}