typstify_core/
content.rs

1//! Content types and structures.
2
3use std::path::{Path, PathBuf};
4
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7
8use crate::frontmatter::Frontmatter;
9
10/// Type of content source.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
12#[serde(rename_all = "lowercase")]
13pub enum ContentType {
14    /// Markdown content (.md files).
15    Markdown,
16    /// Typst content (.typ files).
17    Typst,
18}
19
20impl ContentType {
21    /// Determine content type from file extension.
22    pub fn from_extension(ext: &str) -> Option<Self> {
23        match ext.to_lowercase().as_str() {
24            "md" | "markdown" => Some(Self::Markdown),
25            "typ" | "typst" => Some(Self::Typst),
26            _ => None,
27        }
28    }
29
30    /// Get the file extension for this content type.
31    pub fn extension(&self) -> &'static str {
32        match self {
33            Self::Markdown => "md",
34            Self::Typst => "typ",
35        }
36    }
37}
38
39/// Parsed content path with language and slug extraction.
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub struct ContentPath {
42    /// Original file path.
43    pub path: PathBuf,
44
45    /// Extracted language code (if any).
46    pub lang: Option<String>,
47
48    /// URL slug derived from the path.
49    pub slug: String,
50
51    /// Content type based on extension.
52    pub content_type: ContentType,
53}
54
55impl ContentPath {
56    /// Parse a content path to extract language and slug.
57    ///
58    /// Supports patterns like:
59    /// - `posts/hello.md` → slug: "posts/hello", lang: None
60    /// - `posts/hello.zh.md` → slug: "posts/hello", lang: Some("zh")
61    /// - `posts/hello/index.md` → slug: "posts/hello", lang: None
62    /// - `posts/hello/index.zh.md` → slug: "posts/hello", lang: Some("zh")
63    pub fn from_path(path: &Path, default_lang: &str) -> Option<Self> {
64        let extension = path.extension()?.to_str()?;
65        let content_type = ContentType::from_extension(extension)?;
66
67        let stem = path.file_stem()?.to_str()?;
68
69        // Check for language suffix in filename (e.g., "index.zh" or "hello.zh")
70        let (base_stem, lang) = if let Some(dot_pos) = stem.rfind('.') {
71            let potential_lang = &stem[dot_pos + 1..];
72            // Check if it looks like a language code (2-3 chars, lowercase alpha)
73            if potential_lang.len() >= 2
74                && potential_lang.len() <= 3
75                && potential_lang.chars().all(|c| c.is_ascii_lowercase())
76            {
77                let lang = if potential_lang == default_lang {
78                    None // Don't set lang if it's the default
79                } else {
80                    Some(potential_lang.to_string())
81                };
82                (&stem[..dot_pos], lang)
83            } else {
84                (stem, None)
85            }
86        } else {
87            (stem, None)
88        };
89
90        // Build the slug from the path
91        let parent = path.parent().unwrap_or(Path::new(""));
92        let slug = if base_stem == "index" {
93            // For index files, use the parent directory as the slug
94            parent.to_string_lossy().to_string()
95        } else {
96            // For regular files, combine parent and stem
97            if parent.as_os_str().is_empty() {
98                base_stem.to_string()
99            } else {
100                format!("{}/{}", parent.display(), base_stem)
101            }
102        };
103
104        // Normalize slug: remove leading/trailing slashes
105        let slug = slug.trim_matches('/').to_string();
106
107        Some(Self {
108            path: path.to_path_buf(),
109            lang,
110            slug,
111            content_type,
112        })
113    }
114
115    /// Get the URL path for this content.
116    pub fn url_path(&self) -> String {
117        if let Some(ref lang) = self.lang {
118            format!("/{}/{}", lang, self.slug)
119        } else {
120            format!("/{}", self.slug)
121        }
122    }
123}
124
125/// Parsed content with metadata and rendered HTML.
126#[derive(Debug, Clone)]
127pub struct ParsedContent {
128    /// Parsed frontmatter metadata.
129    pub frontmatter: Frontmatter,
130
131    /// Rendered HTML content.
132    pub html: String,
133
134    /// Raw source content (without frontmatter).
135    pub raw: String,
136
137    /// Table of contents extracted from headings.
138    pub toc: Vec<TocEntry>,
139}
140
141/// Table of contents entry.
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct TocEntry {
144    /// Heading level (1-6).
145    pub level: u8,
146
147    /// Heading text.
148    pub text: String,
149
150    /// Anchor ID for linking.
151    pub id: String,
152}
153
154/// A fully processed page ready for rendering.
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct Page {
157    /// URL path for this page.
158    pub url: String,
159
160    /// Page title.
161    pub title: String,
162
163    /// Page description/summary.
164    #[serde(default)]
165    pub description: Option<String>,
166
167    /// Publication date.
168    #[serde(default)]
169    pub date: Option<DateTime<Utc>>,
170
171    /// Last updated date.
172    #[serde(default)]
173    pub updated: Option<DateTime<Utc>>,
174
175    /// Whether this is a draft.
176    #[serde(default)]
177    pub draft: bool,
178
179    /// Language code.
180    #[serde(default)]
181    pub lang: Option<String>,
182
183    /// Tags for this page.
184    #[serde(default)]
185    pub tags: Vec<String>,
186
187    /// Categories for this page.
188    #[serde(default)]
189    pub categories: Vec<String>,
190
191    /// Rendered HTML content.
192    pub content: String,
193
194    /// Summary/excerpt for listings.
195    #[serde(default)]
196    pub summary: Option<String>,
197
198    /// Reading time in minutes.
199    #[serde(default)]
200    pub reading_time: Option<u32>,
201
202    /// Word count.
203    #[serde(default)]
204    pub word_count: Option<u32>,
205
206    /// Table of contents.
207    #[serde(default)]
208    pub toc: Vec<TocEntry>,
209
210    /// Custom JavaScript files to include.
211    #[serde(default)]
212    pub custom_js: Vec<String>,
213
214    /// Custom CSS files to include.
215    #[serde(default)]
216    pub custom_css: Vec<String>,
217
218    /// URL aliases for redirects.
219    #[serde(default)]
220    pub aliases: Vec<String>,
221
222    /// Template to use for rendering.
223    #[serde(default)]
224    pub template: Option<String>,
225
226    /// Sort weight for ordering.
227    #[serde(default)]
228    pub weight: i32,
229
230    /// Source file path.
231    #[serde(default)]
232    pub source_path: Option<PathBuf>,
233}
234
235impl Page {
236    /// Create a new page from parsed content and content path.
237    pub fn from_parsed(content: ParsedContent, content_path: &ContentPath) -> Self {
238        let fm = &content.frontmatter;
239
240        // Calculate word count and reading time
241        let word_count = content.raw.split_whitespace().count() as u32;
242        let reading_time = (word_count / 200).max(1); // Assume 200 WPM
243
244        // Generate summary if not provided
245        let summary = fm.description.clone().or_else(|| {
246            // Take first paragraph or first 160 chars
247            let plain_text = strip_html(&content.html);
248            Some(truncate_at_word_boundary(&plain_text, 160))
249        });
250
251        Self {
252            url: content_path.url_path(),
253            title: fm.title.clone(),
254            description: fm.description.clone(),
255            date: fm.date,
256            updated: fm.updated,
257            draft: fm.draft,
258            lang: content_path.lang.clone(),
259            tags: fm.tags.clone(),
260            categories: fm.categories.clone(),
261            content: content.html,
262            summary,
263            reading_time: Some(reading_time),
264            word_count: Some(word_count),
265            toc: content.toc,
266            custom_js: fm.custom_js.clone(),
267            custom_css: fm.custom_css.clone(),
268            aliases: fm.aliases.clone(),
269            template: fm.template.clone(),
270            weight: fm.weight,
271            source_path: Some(content_path.path.clone()),
272        }
273    }
274}
275
276/// Strip HTML tags from content.
277fn strip_html(html: &str) -> String {
278    let mut result = String::new();
279    let mut in_tag = false;
280
281    for c in html.chars() {
282        match c {
283            '<' => in_tag = true,
284            '>' => in_tag = false,
285            _ if !in_tag => result.push(c),
286            _ => {}
287        }
288    }
289
290    result
291}
292
293/// Truncate text at word boundary.
294fn truncate_at_word_boundary(text: &str, max_len: usize) -> String {
295    if text.len() <= max_len {
296        return text.to_string();
297    }
298
299    let truncated = &text[..max_len];
300    if let Some(last_space) = truncated.rfind(' ') {
301        format!("{}...", &truncated[..last_space])
302    } else {
303        format!("{truncated}...")
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    #[test]
312    fn test_content_type_from_extension() {
313        assert_eq!(
314            ContentType::from_extension("md"),
315            Some(ContentType::Markdown)
316        );
317        assert_eq!(
318            ContentType::from_extension("MD"),
319            Some(ContentType::Markdown)
320        );
321        assert_eq!(ContentType::from_extension("typ"), Some(ContentType::Typst));
322        assert_eq!(ContentType::from_extension("txt"), None);
323    }
324
325    #[test]
326    fn test_content_path_simple() {
327        let path = Path::new("posts/hello.md");
328        let cp = ContentPath::from_path(path, "en").expect("parse path");
329
330        assert_eq!(cp.slug, "posts/hello");
331        assert_eq!(cp.lang, None);
332        assert_eq!(cp.content_type, ContentType::Markdown);
333        assert_eq!(cp.url_path(), "/posts/hello");
334    }
335
336    #[test]
337    fn test_content_path_with_language() {
338        let path = Path::new("posts/hello.zh.md");
339        let cp = ContentPath::from_path(path, "en").expect("parse path");
340
341        assert_eq!(cp.slug, "posts/hello");
342        assert_eq!(cp.lang, Some("zh".to_string()));
343        assert_eq!(cp.url_path(), "/zh/posts/hello");
344    }
345
346    #[test]
347    fn test_content_path_default_language() {
348        let path = Path::new("posts/hello.en.md");
349        let cp = ContentPath::from_path(path, "en").expect("parse path");
350
351        // Default language should not be set explicitly
352        assert_eq!(cp.slug, "posts/hello");
353        assert_eq!(cp.lang, None);
354    }
355
356    #[test]
357    fn test_content_path_index_file() {
358        let path = Path::new("posts/hello/index.md");
359        let cp = ContentPath::from_path(path, "en").expect("parse path");
360
361        assert_eq!(cp.slug, "posts/hello");
362        assert_eq!(cp.lang, None);
363    }
364
365    #[test]
366    fn test_content_path_index_with_lang() {
367        let path = Path::new("posts/hello/index.zh.md");
368        let cp = ContentPath::from_path(path, "en").expect("parse path");
369
370        assert_eq!(cp.slug, "posts/hello");
371        assert_eq!(cp.lang, Some("zh".to_string()));
372    }
373
374    #[test]
375    fn test_content_path_typst() {
376        let path = Path::new("docs/guide.typ");
377        let cp = ContentPath::from_path(path, "en").expect("parse path");
378
379        assert_eq!(cp.slug, "docs/guide");
380        assert_eq!(cp.content_type, ContentType::Typst);
381    }
382
383    #[test]
384    fn test_strip_html() {
385        assert_eq!(
386            strip_html("<p>Hello <strong>World</strong></p>"),
387            "Hello World"
388        );
389        assert_eq!(strip_html("No tags here"), "No tags here");
390    }
391
392    #[test]
393    fn test_truncate_at_word_boundary() {
394        let text = "Hello world this is a test";
395        assert_eq!(truncate_at_word_boundary(text, 100), text);
396        // max_len=11 gives "Hello world", last space at pos 5, so "Hello..."
397        assert_eq!(truncate_at_word_boundary(text, 11), "Hello...");
398        assert_eq!(truncate_at_word_boundary(text, 5), "Hello...");
399        // max_len=12 gives "Hello world ", last space at pos 11, so "Hello world..."
400        assert_eq!(truncate_at_word_boundary(text, 12), "Hello world...");
401    }
402}