1use std::{
6 collections::HashMap,
7 fs,
8 path::{Path, PathBuf},
9};
10
11use rayon::prelude::*;
12use thiserror::Error;
13use tracing::{debug, info, warn};
14use typstify_core::{Config, ContentPath, ContentType, Page};
15use typstify_parser::ParserRegistry;
16
17#[derive(Debug, Error)]
19pub enum CollectorError {
20 #[error("IO error: {0}")]
22 Io(#[from] std::io::Error),
23
24 #[error("parse error in {path}: {message}")]
26 Parse { path: PathBuf, message: String },
27
28 #[error("invalid content path: {0}")]
30 InvalidPath(PathBuf),
31}
32
33pub type Result<T> = std::result::Result<T, CollectorError>;
35
36#[derive(Debug, Default)]
38pub struct SiteContent {
39 pub pages: HashMap<String, Page>,
41
42 pub sections: HashMap<String, Vec<String>>,
44
45 pub taxonomies: TaxonomyIndex,
47
48 pub translations: HashMap<String, Vec<String>>,
50}
51
52#[derive(Debug, Default)]
54pub struct TaxonomyIndex {
55 pub tags: HashMap<String, Vec<String>>,
57
58 pub categories: HashMap<String, Vec<String>>,
60}
61
62#[derive(Debug)]
64pub struct ContentCollector {
65 config: Config,
66 parser: ParserRegistry,
67 content_dir: PathBuf,
68}
69
70impl ContentCollector {
71 #[must_use]
73 pub fn new(config: Config, content_dir: impl Into<PathBuf>) -> Self {
74 Self {
75 config,
76 parser: ParserRegistry::new(),
77 content_dir: content_dir.into(),
78 }
79 }
80
81 pub fn collect(&self) -> Result<SiteContent> {
83 info!(dir = %self.content_dir.display(), "collecting content");
84
85 let files = self.find_content_files()?;
87 info!(count = files.len(), "found content files");
88
89 let pages: Vec<_> = files
91 .par_iter()
92 .filter_map(|path| {
93 match self.parse_file(path) {
94 Ok(page) => {
95 if page.draft && !self.config.build.drafts {
97 debug!(url = %page.url, "skipping draft");
98 None
99 } else {
100 Some(page)
101 }
102 }
103 Err(e) => {
104 warn!(path = %path.display(), error = %e, "failed to parse file");
105 None
106 }
107 }
108 })
109 .collect();
110
111 let mut content = SiteContent::default();
113
114 for page in pages {
115 let url = page.url.clone();
116 let slug = url.trim_start_matches('/').to_string();
117
118 let section = slug.split('/').next().unwrap_or("").to_string();
120 if !section.is_empty() {
121 content
122 .sections
123 .entry(section)
124 .or_default()
125 .push(url.clone());
126 }
127
128 for tag in &page.tags {
130 content
131 .taxonomies
132 .tags
133 .entry(tag.clone())
134 .or_default()
135 .push(url.clone());
136 }
137 for category in &page.categories {
138 content
139 .taxonomies
140 .categories
141 .entry(category.clone())
142 .or_default()
143 .push(url.clone());
144 }
145
146 if !page.canonical_id.is_empty() {
148 content
149 .translations
150 .entry(page.canonical_id.clone())
151 .or_default()
152 .push(url.clone());
153 }
154
155 content.pages.insert(url, page);
156 }
157
158 info!(
159 pages = content.pages.len(),
160 sections = content.sections.len(),
161 tags = content.taxonomies.tags.len(),
162 categories = content.taxonomies.categories.len(),
163 "content collection complete"
164 );
165
166 Ok(content)
167 }
168
169 fn find_content_files(&self) -> Result<Vec<PathBuf>> {
171 let mut files = Vec::new();
172 self.walk_dir(&self.content_dir, &mut files)?;
173 Ok(files)
174 }
175
176 fn walk_dir(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<()> {
178 if !dir.exists() {
179 return Ok(());
180 }
181
182 for entry in fs::read_dir(dir)? {
183 let entry = entry?;
184 let path = entry.path();
185
186 if path.is_dir() {
187 if path
189 .file_name()
190 .is_some_and(|n| n.to_string_lossy().starts_with('.'))
191 {
192 continue;
193 }
194 self.walk_dir(&path, files)?;
195 } else if path.is_file() {
196 if let Some(ext) = path.extension()
198 && ContentType::from_extension(&ext.to_string_lossy()).is_some()
199 {
200 files.push(path);
201 }
202 }
203 }
204
205 Ok(())
206 }
207
208 fn parse_file(&self, path: &Path) -> Result<Page> {
210 debug!(path = %path.display(), "parsing file");
211
212 let content = fs::read_to_string(path)?;
214
215 let relative_path = path.strip_prefix(&self.content_dir).unwrap_or(path);
217 let content_path =
218 ContentPath::from_path(relative_path, &self.config.site.default_language)
219 .ok_or_else(|| CollectorError::InvalidPath(path.to_path_buf()))?;
220
221 let parsed = self
223 .parser
224 .parse(&content, path)
225 .map_err(|e| CollectorError::Parse {
226 path: path.to_path_buf(),
227 message: e.to_string(),
228 })?;
229
230 Ok(Page::from_parsed(parsed, &content_path))
231 }
232
233 pub fn pages_by_date(content: &SiteContent) -> Vec<&Page> {
235 let mut pages: Vec<_> = content.pages.values().collect();
236 pages.sort_by(|a, b| match (&b.date, &a.date) {
237 (Some(b_date), Some(a_date)) => b_date.cmp(a_date),
238 (Some(_), None) => std::cmp::Ordering::Less,
239 (None, Some(_)) => std::cmp::Ordering::Greater,
240 (None, None) => a.title.cmp(&b.title),
241 });
242 pages
243 }
244
245 pub fn section_pages<'a>(content: &'a SiteContent, section: &str) -> Vec<&'a Page> {
247 let mut pages: Vec<_> = content
248 .sections
249 .get(section)
250 .map(|urls| urls.iter().filter_map(|u| content.pages.get(u)).collect())
251 .unwrap_or_default();
252
253 pages.sort_by(|a, b| match (&b.date, &a.date) {
254 (Some(b_date), Some(a_date)) => b_date.cmp(a_date),
255 (Some(_), None) => std::cmp::Ordering::Less,
256 (None, Some(_)) => std::cmp::Ordering::Greater,
257 (None, None) => a.title.cmp(&b.title),
258 });
259 pages
260 }
261
262 pub fn taxonomy_pages<'a>(
264 content: &'a SiteContent,
265 taxonomy: &str,
266 term: &str,
267 ) -> Vec<&'a Page> {
268 let urls = match taxonomy {
269 "tags" => content.taxonomies.tags.get(term),
270 "categories" => content.taxonomies.categories.get(term),
271 _ => None,
272 };
273
274 let mut pages: Vec<_> = urls
275 .map(|u| u.iter().filter_map(|url| content.pages.get(url)).collect())
276 .unwrap_or_default();
277
278 pages.sort_by(|a, b| match (&b.date, &a.date) {
279 (Some(b_date), Some(a_date)) => b_date.cmp(a_date),
280 (Some(_), None) => std::cmp::Ordering::Less,
281 (None, Some(_)) => std::cmp::Ordering::Greater,
282 (None, None) => a.title.cmp(&b.title),
283 });
284 pages
285 }
286}
287
288pub fn paginate<T>(items: &[T], page: usize, per_page: usize) -> (&[T], usize) {
290 let total_pages = items.len().div_ceil(per_page);
291 let start = (page - 1) * per_page;
292 let end = (start + per_page).min(items.len());
293
294 if start >= items.len() {
295 (&[], total_pages)
296 } else {
297 (&items[start..end], total_pages)
298 }
299}
300
301#[cfg(test)]
302mod tests {
303 use std::collections::HashMap;
304
305 use super::*;
306
307 #[allow(dead_code)]
308 fn test_config() -> Config {
309 Config {
310 site: typstify_core::config::SiteConfig {
311 title: "Test Site".to_string(),
312 base_url: "https://example.com".to_string(),
313 default_language: "en".to_string(),
314 description: None,
315 author: None,
316 },
317 languages: HashMap::new(),
318 build: typstify_core::config::BuildConfig {
319 drafts: false,
320 ..Default::default()
321 },
322 search: typstify_core::config::SearchConfig::default(),
323 rss: typstify_core::config::RssConfig::default(),
324 robots: typstify_core::config::RobotsConfig::default(),
325 taxonomies: typstify_core::config::TaxonomyConfig::default(),
326 }
327 }
328
329 #[test]
330 fn test_paginate() {
331 let items = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
332
333 let (page1, total) = paginate(&items, 1, 3);
334 assert_eq!(page1, &[1, 2, 3]);
335 assert_eq!(total, 4);
336
337 let (page2, _) = paginate(&items, 2, 3);
338 assert_eq!(page2, &[4, 5, 6]);
339
340 let (page4, _) = paginate(&items, 4, 3);
341 assert_eq!(page4, &[10]);
342
343 let (page5, _) = paginate(&items, 5, 3);
344 assert!(page5.is_empty());
345 }
346
347 #[test]
348 fn test_taxonomy_index() {
349 let mut index = TaxonomyIndex::default();
350 index.tags.insert(
351 "rust".to_string(),
352 vec!["post1".to_string(), "post2".to_string()],
353 );
354 index
355 .tags
356 .insert("web".to_string(), vec!["post2".to_string()]);
357
358 assert_eq!(index.tags.get("rust").unwrap().len(), 2);
359 assert_eq!(index.tags.get("web").unwrap().len(), 1);
360 assert!(!index.tags.contains_key("python"));
361 }
362
363 #[test]
364 fn test_site_content_default() {
365 let content = SiteContent::default();
366 assert!(content.pages.is_empty());
367 assert!(content.sections.is_empty());
368 assert!(content.taxonomies.tags.is_empty());
369 }
370}