1use std::{
6 collections::HashMap,
7 fs,
8 path::{Path, PathBuf},
9};
10
11use rayon::prelude::*;
12use thiserror::Error;
13use tracing::{debug, info, warn};
14use typstify_core::{Config, ContentPath, ContentType, Page};
15use typstify_parser::ParserRegistry;
16
17#[derive(Debug, Error)]
19pub enum CollectorError {
20 #[error("IO error: {0}")]
22 Io(#[from] std::io::Error),
23
24 #[error("parse error in {path}: {message}")]
26 Parse { path: PathBuf, message: String },
27
28 #[error("invalid content path: {0}")]
30 InvalidPath(PathBuf),
31}
32
33pub type Result<T> = std::result::Result<T, CollectorError>;
35
36#[derive(Debug, Default)]
38pub struct SiteContent {
39 pub pages: HashMap<String, Page>,
41
42 pub sections: HashMap<String, Vec<String>>,
44
45 pub taxonomies: TaxonomyIndex,
47}
48
49#[derive(Debug, Default)]
51pub struct TaxonomyIndex {
52 pub tags: HashMap<String, Vec<String>>,
54
55 pub categories: HashMap<String, Vec<String>>,
57}
58
59#[derive(Debug)]
61pub struct ContentCollector {
62 config: Config,
63 parser: ParserRegistry,
64 content_dir: PathBuf,
65}
66
67impl ContentCollector {
68 #[must_use]
70 pub fn new(config: Config, content_dir: impl Into<PathBuf>) -> Self {
71 Self {
72 config,
73 parser: ParserRegistry::new(),
74 content_dir: content_dir.into(),
75 }
76 }
77
78 pub fn collect(&self) -> Result<SiteContent> {
80 info!(dir = %self.content_dir.display(), "collecting content");
81
82 let files = self.find_content_files()?;
84 info!(count = files.len(), "found content files");
85
86 let pages: Vec<_> = files
88 .par_iter()
89 .filter_map(|path| {
90 match self.parse_file(path) {
91 Ok(page) => {
92 if page.draft && !self.config.build.drafts {
94 debug!(url = %page.url, "skipping draft");
95 None
96 } else {
97 Some(page)
98 }
99 }
100 Err(e) => {
101 warn!(path = %path.display(), error = %e, "failed to parse file");
102 None
103 }
104 }
105 })
106 .collect();
107
108 let mut content = SiteContent::default();
110
111 for page in pages {
112 let url = page.url.clone();
113 let slug = url.trim_start_matches('/').to_string();
114
115 let section = slug.split('/').next().unwrap_or("").to_string();
117 if !section.is_empty() {
118 content
119 .sections
120 .entry(section)
121 .or_default()
122 .push(url.clone());
123 }
124
125 for tag in &page.tags {
127 content
128 .taxonomies
129 .tags
130 .entry(tag.clone())
131 .or_default()
132 .push(url.clone());
133 }
134 for category in &page.categories {
135 content
136 .taxonomies
137 .categories
138 .entry(category.clone())
139 .or_default()
140 .push(url.clone());
141 }
142
143 content.pages.insert(url, page);
144 }
145
146 info!(
147 pages = content.pages.len(),
148 sections = content.sections.len(),
149 tags = content.taxonomies.tags.len(),
150 categories = content.taxonomies.categories.len(),
151 "content collection complete"
152 );
153
154 Ok(content)
155 }
156
157 fn find_content_files(&self) -> Result<Vec<PathBuf>> {
159 let mut files = Vec::new();
160 self.walk_dir(&self.content_dir, &mut files)?;
161 Ok(files)
162 }
163
164 fn walk_dir(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<()> {
166 if !dir.exists() {
167 return Ok(());
168 }
169
170 for entry in fs::read_dir(dir)? {
171 let entry = entry?;
172 let path = entry.path();
173
174 if path.is_dir() {
175 if path
177 .file_name()
178 .is_some_and(|n| n.to_string_lossy().starts_with('.'))
179 {
180 continue;
181 }
182 self.walk_dir(&path, files)?;
183 } else if path.is_file() {
184 if let Some(ext) = path.extension()
186 && ContentType::from_extension(&ext.to_string_lossy()).is_some()
187 {
188 files.push(path);
189 }
190 }
191 }
192
193 Ok(())
194 }
195
196 fn parse_file(&self, path: &Path) -> Result<Page> {
198 debug!(path = %path.display(), "parsing file");
199
200 let content = fs::read_to_string(path)?;
202
203 let relative_path = path.strip_prefix(&self.content_dir).unwrap_or(path);
205 let content_path =
206 ContentPath::from_path(relative_path, &self.config.site.default_language)
207 .ok_or_else(|| CollectorError::InvalidPath(path.to_path_buf()))?;
208
209 let parsed = self
211 .parser
212 .parse(&content, path)
213 .map_err(|e| CollectorError::Parse {
214 path: path.to_path_buf(),
215 message: e.to_string(),
216 })?;
217
218 Ok(Page::from_parsed(parsed, &content_path))
219 }
220
221 pub fn pages_by_date(content: &SiteContent) -> Vec<&Page> {
223 let mut pages: Vec<_> = content.pages.values().collect();
224 pages.sort_by(|a, b| match (&b.date, &a.date) {
225 (Some(b_date), Some(a_date)) => b_date.cmp(a_date),
226 (Some(_), None) => std::cmp::Ordering::Less,
227 (None, Some(_)) => std::cmp::Ordering::Greater,
228 (None, None) => a.title.cmp(&b.title),
229 });
230 pages
231 }
232
233 pub fn section_pages<'a>(content: &'a SiteContent, section: &str) -> Vec<&'a Page> {
235 let mut pages: Vec<_> = content
236 .sections
237 .get(section)
238 .map(|urls| urls.iter().filter_map(|u| content.pages.get(u)).collect())
239 .unwrap_or_default();
240
241 pages.sort_by(|a, b| match (&b.date, &a.date) {
242 (Some(b_date), Some(a_date)) => b_date.cmp(a_date),
243 (Some(_), None) => std::cmp::Ordering::Less,
244 (None, Some(_)) => std::cmp::Ordering::Greater,
245 (None, None) => a.title.cmp(&b.title),
246 });
247 pages
248 }
249
250 pub fn taxonomy_pages<'a>(
252 content: &'a SiteContent,
253 taxonomy: &str,
254 term: &str,
255 ) -> Vec<&'a Page> {
256 let urls = match taxonomy {
257 "tags" => content.taxonomies.tags.get(term),
258 "categories" => content.taxonomies.categories.get(term),
259 _ => None,
260 };
261
262 let mut pages: Vec<_> = urls
263 .map(|u| u.iter().filter_map(|url| content.pages.get(url)).collect())
264 .unwrap_or_default();
265
266 pages.sort_by(|a, b| match (&b.date, &a.date) {
267 (Some(b_date), Some(a_date)) => b_date.cmp(a_date),
268 (Some(_), None) => std::cmp::Ordering::Less,
269 (None, Some(_)) => std::cmp::Ordering::Greater,
270 (None, None) => a.title.cmp(&b.title),
271 });
272 pages
273 }
274}
275
276pub fn paginate<T>(items: &[T], page: usize, per_page: usize) -> (&[T], usize) {
278 let total_pages = items.len().div_ceil(per_page);
279 let start = (page - 1) * per_page;
280 let end = (start + per_page).min(items.len());
281
282 if start >= items.len() {
283 (&[], total_pages)
284 } else {
285 (&items[start..end], total_pages)
286 }
287}
288
289#[cfg(test)]
290mod tests {
291 use super::*;
292
293 #[allow(dead_code)]
294 fn test_config() -> Config {
295 Config {
296 site: typstify_core::config::SiteConfig {
297 title: "Test Site".to_string(),
298 base_url: "https://example.com".to_string(),
299 default_language: "en".to_string(),
300 languages: vec!["en".to_string()],
301 description: None,
302 author: None,
303 },
304 build: typstify_core::config::BuildConfig {
305 drafts: false,
306 ..Default::default()
307 },
308 search: typstify_core::config::SearchConfig::default(),
309 rss: typstify_core::config::RssConfig::default(),
310 taxonomies: typstify_core::config::TaxonomyConfig::default(),
311 }
312 }
313
314 #[test]
315 fn test_paginate() {
316 let items = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
317
318 let (page1, total) = paginate(&items, 1, 3);
319 assert_eq!(page1, &[1, 2, 3]);
320 assert_eq!(total, 4);
321
322 let (page2, _) = paginate(&items, 2, 3);
323 assert_eq!(page2, &[4, 5, 6]);
324
325 let (page4, _) = paginate(&items, 4, 3);
326 assert_eq!(page4, &[10]);
327
328 let (page5, _) = paginate(&items, 5, 3);
329 assert!(page5.is_empty());
330 }
331
332 #[test]
333 fn test_taxonomy_index() {
334 let mut index = TaxonomyIndex::default();
335 index.tags.insert(
336 "rust".to_string(),
337 vec!["post1".to_string(), "post2".to_string()],
338 );
339 index
340 .tags
341 .insert("web".to_string(), vec!["post2".to_string()]);
342
343 assert_eq!(index.tags.get("rust").unwrap().len(), 2);
344 assert_eq!(index.tags.get("web").unwrap().len(), 1);
345 assert!(index.tags.get("python").is_none());
346 }
347
348 #[test]
349 fn test_site_content_default() {
350 let content = SiteContent::default();
351 assert!(content.pages.is_empty());
352 assert!(content.sections.is_empty());
353 assert!(content.taxonomies.tags.is_empty());
354 }
355}