rs_web/content/
mod.rs

1mod frontmatter;
2mod page;
3mod post;
4
5pub use page::Page;
6pub use post::{ContentType, Post};
7
8// Re-export for tests
9#[cfg(test)]
10pub use frontmatter::Frontmatter;
11
12use crate::config::{PathsConfig, SectionsConfig};
13use anyhow::Result;
14use ignore::WalkBuilder;
15use log::{debug, trace};
16use regex::Regex;
17use std::collections::HashMap;
18use std::path::Path;
19use walkdir::WalkDir;
20
21/// Default patterns to exclude (common non-content files)
22const DEFAULT_EXCLUDE_PATTERNS: &[&str] = &[
23    r"^README\.md$",
24    r"^LICENSE\.md$",
25    r"^CHANGELOG\.md$",
26    r"^CONTRIBUTING\.md$",
27    r"^CODE_OF_CONDUCT\.md$",
28    r"^\.", // Hidden files/directories
29];
30
31/// Matcher for excluding files and directories based on regex patterns
32pub struct ExcludeMatcher {
33    patterns: Vec<Regex>,
34}
35
36impl ExcludeMatcher {
37    /// Create a new exclude matcher from PathsConfig
38    pub fn from_config(paths: &PathsConfig) -> Result<Self> {
39        let mut patterns = Vec::new();
40
41        // Add default patterns if enabled
42        if paths.exclude_defaults {
43            for pattern in DEFAULT_EXCLUDE_PATTERNS {
44                patterns.push(Regex::new(pattern)?);
45            }
46        }
47
48        // Add user-specified patterns
49        for pattern in &paths.exclude {
50            patterns.push(Regex::new(pattern)?);
51        }
52
53        Ok(Self { patterns })
54    }
55
56    /// Check if a name (file or directory) should be excluded
57    pub fn is_excluded(&self, name: &str) -> bool {
58        self.patterns.iter().any(|p| p.is_match(name))
59    }
60}
61
62/// A section is a subdirectory containing posts (e.g., blog, projects, notes)
63#[derive(Debug, serde::Serialize)]
64pub struct Section {
65    pub name: String,
66    pub posts: Vec<Post>,
67}
68
69/// Content holds the home page, root pages, and all discovered sections
70#[derive(Debug)]
71pub struct Content {
72    pub home: Option<Page>,
73    /// Root-level pages (e.g., 404.md, about.md) excluding the home page
74    pub root_pages: Vec<Page>,
75    pub sections: HashMap<String, Section>,
76}
77
78/// Discover all content files based on paths config
79/// If base_dir is provided, paths are resolved relative to it
80pub fn discover_content(
81    paths: &PathsConfig,
82    sections_config: &SectionsConfig,
83    base_dir: Option<&Path>,
84) -> Result<Content> {
85    debug!("Discovering content from {:?}", paths.content);
86    let content_path = Path::new(&paths.content);
87    let content_dir = if let Some(base) = base_dir {
88        if content_path.is_absolute() {
89            content_path.to_path_buf()
90        } else {
91            base.join(content_path)
92        }
93    } else {
94        content_path.to_path_buf()
95    };
96    trace!("Content directory resolved to: {:?}", content_dir);
97
98    // Create exclude matcher from config
99    let exclude_matcher = ExcludeMatcher::from_config(paths)?;
100
101    // Built-in excluded directories (styles, static, templates)
102    let builtin_excluded: Vec<&str> = vec![&paths.styles, &paths.static_files, &paths.templates];
103    trace!("Built-in excluded directories: {:?}", builtin_excluded);
104
105    // Load home page
106    let home_path = content_dir.join(&paths.home);
107    let home = if home_path.exists() {
108        trace!("Loading home page from {:?}", home_path);
109        Some(Page::from_file(&home_path)?)
110    } else {
111        trace!("No home page found at {:?}", home_path);
112        None
113    };
114
115    // Discover root-level pages (markdown files in content root, excluding home)
116    let home_file_name = Path::new(&paths.home)
117        .file_name()
118        .and_then(|n| n.to_str())
119        .unwrap_or("index.md");
120
121    let root_page_paths: Vec<_> = if paths.respect_gitignore {
122        WalkBuilder::new(&content_dir)
123            .max_depth(Some(1))
124            .hidden(false)
125            .build()
126            .filter_map(|e| e.ok())
127            .filter(|e| {
128                let file_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
129                e.depth() == 1
130                    && e.path().is_file()
131                    && e.path()
132                        .extension()
133                        .is_some_and(|ext| ext == "md" || ext == "html" || ext == "htm")
134                    && file_name != home_file_name
135                    && !exclude_matcher.is_excluded(file_name)
136            })
137            .map(|e| e.into_path())
138            .collect()
139    } else {
140        WalkDir::new(&content_dir)
141            .min_depth(1)
142            .max_depth(1)
143            .into_iter()
144            .filter_map(|e| e.ok())
145            .filter(|e| {
146                let file_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
147                e.path().is_file()
148                    && e.path()
149                        .extension()
150                        .is_some_and(|ext| ext == "md" || ext == "html" || ext == "htm")
151                    && file_name != home_file_name
152                    && !exclude_matcher.is_excluded(file_name)
153            })
154            .map(|e| e.into_path())
155            .collect()
156    };
157
158    let mut root_pages = Vec::new();
159    for page_path in root_page_paths {
160        trace!("Loading root page from {:?}", page_path);
161        let page = Page::from_file(&page_path)?;
162        root_pages.push(page);
163    }
164    debug!("Loaded {} root pages", root_pages.len());
165
166    // Discover all sections (subdirectories)
167    let mut sections = HashMap::new();
168
169    // Collect section paths using appropriate walker
170    let section_paths: Vec<_> = if paths.respect_gitignore {
171        WalkBuilder::new(content_dir)
172            .max_depth(Some(1))
173            .hidden(false) // Don't skip hidden files by default
174            .build()
175            .filter_map(|e| e.ok())
176            .filter(|e| {
177                let dir_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
178                e.depth() == 1 && e.path().is_dir() && !exclude_matcher.is_excluded(dir_name)
179            })
180            .map(|e| e.into_path())
181            .collect()
182    } else {
183        WalkDir::new(content_dir)
184            .min_depth(1)
185            .max_depth(1)
186            .into_iter()
187            .filter_map(|e| e.ok())
188            .filter(|e| {
189                let dir_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
190                e.path().is_dir() && !exclude_matcher.is_excluded(dir_name)
191            })
192            .map(|e| e.into_path())
193            .collect()
194    };
195
196    // Process each section
197    for path in section_paths {
198        process_section(
199            &path,
200            &builtin_excluded,
201            &exclude_matcher,
202            &mut sections,
203            paths,
204            sections_config,
205        )?;
206    }
207
208    debug!(
209        "Content discovery complete: {} sections found, {} root pages",
210        sections.len(),
211        root_pages.len()
212    );
213    Ok(Content {
214        home,
215        root_pages,
216        sections,
217    })
218}
219
220/// Process a section directory and add it to sections map
221fn process_section(
222    path: &Path,
223    builtin_excluded: &[&str],
224    exclude_matcher: &ExcludeMatcher,
225    sections: &mut HashMap<String, Section>,
226    paths: &PathsConfig,
227    sections_config: &SectionsConfig,
228) -> Result<()> {
229    let section_name = path
230        .file_name()
231        .and_then(|n| n.to_str())
232        .unwrap_or("")
233        .to_string();
234
235    // Skip built-in excluded directories (styles, static, templates)
236    if builtin_excluded
237        .iter()
238        .any(|ex| section_name == *ex || path.ends_with(ex))
239    {
240        trace!("Skipping built-in excluded section: {}", section_name);
241        return Ok(());
242    }
243
244    trace!("Processing section: {}", section_name);
245
246    // Get section config
247    let section_config = sections_config.sections.get(&section_name);
248    let iterate_mode = section_config
249        .map(|c| c.iterate.as_str())
250        .unwrap_or("files");
251
252    let mut posts = if iterate_mode == "directories" {
253        // Directory-based iteration: each subdirectory becomes a post
254        process_section_directories(path, &section_name, exclude_matcher, paths)?
255    } else {
256        // File-based iteration (default): find .md/.html files
257        process_section_files(path, &section_name, exclude_matcher, paths)?
258    };
259
260    // Default sort: by date (newest first), then by slug for undated posts
261    // Custom sorting via Lua functions is applied in build.rs
262    posts.sort_by(|a, b| match (&b.frontmatter.date, &a.frontmatter.date) {
263        (Some(d1), Some(d2)) => d1.cmp(d2),
264        (Some(_), None) => std::cmp::Ordering::Less,
265        (None, Some(_)) => std::cmp::Ordering::Greater,
266        (None, None) => a.slug().cmp(b.slug()),
267    });
268
269    if !posts.is_empty() {
270        debug!(
271            "Section '{}': {} posts loaded (mode: {})",
272            section_name,
273            posts.len(),
274            iterate_mode
275        );
276        sections.insert(
277            section_name.clone(),
278            Section {
279                name: section_name,
280                posts,
281            },
282        );
283    } else {
284        trace!("Section '{}': no posts found", section_name);
285    }
286
287    Ok(())
288}
289
290/// Process section using file-based iteration (default)
291/// Finds .md/.html files directly in the section directory
292fn process_section_files(
293    path: &Path,
294    section_name: &str,
295    exclude_matcher: &ExcludeMatcher,
296    paths: &PathsConfig,
297) -> Result<Vec<Post>> {
298    // Collect content file paths (markdown and HTML) using appropriate walker
299    let post_paths: Vec<_> = if paths.respect_gitignore {
300        WalkBuilder::new(path)
301            .max_depth(Some(1))
302            .hidden(false)
303            .build()
304            .filter_map(|e| e.ok())
305            .filter(|e| {
306                let file_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
307                e.depth() == 1
308                    && e.path()
309                        .extension()
310                        .is_some_and(|ext| ext == "md" || ext == "html" || ext == "htm")
311                    && !exclude_matcher.is_excluded(file_name)
312            })
313            .map(|e| e.into_path())
314            .collect()
315    } else {
316        WalkDir::new(path)
317            .min_depth(1)
318            .max_depth(1)
319            .into_iter()
320            .filter_map(|e| e.ok())
321            .filter(|e| {
322                let file_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
323                e.path()
324                    .extension()
325                    .is_some_and(|ext| ext == "md" || ext == "html" || ext == "htm")
326                    && !exclude_matcher.is_excluded(file_name)
327            })
328            .map(|e| e.into_path())
329            .collect()
330    };
331
332    // Load posts
333    let mut posts = Vec::new();
334    for post_path in post_paths {
335        let post = Post::from_file_with_section(&post_path, section_name)?;
336        if !post.frontmatter.draft.unwrap_or(false) {
337            posts.push(post);
338        }
339    }
340
341    Ok(posts)
342}
343
344/// Process section using directory-based iteration
345/// Each subdirectory becomes a post with source_dir set
346fn process_section_directories(
347    path: &Path,
348    section_name: &str,
349    exclude_matcher: &ExcludeMatcher,
350    paths: &PathsConfig,
351) -> Result<Vec<Post>> {
352    use crate::content::frontmatter::Frontmatter;
353    use crate::content::post::ContentType;
354
355    // Collect subdirectory paths
356    let dir_paths: Vec<_> = if paths.respect_gitignore {
357        WalkBuilder::new(path)
358            .max_depth(Some(1))
359            .hidden(false)
360            .build()
361            .filter_map(|e| e.ok())
362            .filter(|e| {
363                let dir_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
364                e.depth() == 1 && e.path().is_dir() && !exclude_matcher.is_excluded(dir_name)
365            })
366            .map(|e| e.into_path())
367            .collect()
368    } else {
369        WalkDir::new(path)
370            .min_depth(1)
371            .max_depth(1)
372            .into_iter()
373            .filter_map(|e| e.ok())
374            .filter(|e| {
375                let dir_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
376                e.path().is_dir() && !exclude_matcher.is_excluded(dir_name)
377            })
378            .map(|e| e.into_path())
379            .collect()
380    };
381
382    let mut posts = Vec::new();
383    for dir_path in dir_paths {
384        let slug = dir_path
385            .file_name()
386            .and_then(|n| n.to_str())
387            .unwrap_or("untitled")
388            .to_string();
389
390        trace!(
391            "Creating directory-based post: {} in section {}",
392            slug, section_name
393        );
394
395        // Create a minimal Post with source_dir set
396        // Templates will use Tera functions to load data from the directory
397        let post = Post {
398            file_slug: slug.clone(),
399            section: section_name.to_string(),
400            frontmatter: Frontmatter {
401                title: slug.clone(), // Default title is the directory name
402                description: None,
403                date: None,
404                tags: None,
405                draft: None,
406                image: None,
407                template: None,
408                slug: Some(slug),
409                permalink: None,
410                encrypted: false,
411                password: None,
412            },
413            content: String::new(),
414            html: String::new(),
415            reading_time: 0,
416            word_count: 0,
417            encrypted_content: None,
418            has_encrypted_blocks: false,
419            content_type: ContentType::Markdown,
420            source_path: dir_path.clone(),
421            source_dir: Some(dir_path),
422        };
423
424        posts.push(post);
425    }
426
427    Ok(posts)
428}