rs_web/content/
mod.rs

1mod frontmatter;
2mod page;
3mod post;
4
5pub use page::Page;
6pub use post::{ContentType, Post};
7
8// Re-export for tests
9#[cfg(test)]
10pub use frontmatter::Frontmatter;
11
12use crate::config::PathsConfig;
13use anyhow::Result;
14use ignore::WalkBuilder;
15use log::{debug, trace};
16use regex::Regex;
17use std::collections::HashMap;
18use std::path::Path;
19use walkdir::WalkDir;
20
21/// Default patterns to exclude (common non-content files)
22const DEFAULT_EXCLUDE_PATTERNS: &[&str] = &[
23    r"^README\.md$",
24    r"^LICENSE\.md$",
25    r"^CHANGELOG\.md$",
26    r"^CONTRIBUTING\.md$",
27    r"^CODE_OF_CONDUCT\.md$",
28    r"^\.", // Hidden files/directories
29];
30
31/// Matcher for excluding files and directories based on regex patterns
32pub struct ExcludeMatcher {
33    patterns: Vec<Regex>,
34}
35
36impl ExcludeMatcher {
37    /// Create a new exclude matcher from PathsConfig
38    pub fn from_config(paths: &PathsConfig) -> Result<Self> {
39        let mut patterns = Vec::new();
40
41        // Add default patterns if enabled
42        if paths.exclude_defaults {
43            for pattern in DEFAULT_EXCLUDE_PATTERNS {
44                patterns.push(Regex::new(pattern)?);
45            }
46        }
47
48        // Add user-specified patterns
49        for pattern in &paths.exclude {
50            patterns.push(Regex::new(pattern)?);
51        }
52
53        Ok(Self { patterns })
54    }
55
56    /// Check if a name (file or directory) should be excluded
57    pub fn is_excluded(&self, name: &str) -> bool {
58        self.patterns.iter().any(|p| p.is_match(name))
59    }
60}
61
62/// A section is a subdirectory containing posts (e.g., blog, projects, notes)
63#[derive(Debug)]
64pub struct Section {
65    pub name: String,
66    pub posts: Vec<Post>,
67}
68
69/// Content holds the home page, root pages, and all discovered sections
70#[derive(Debug)]
71pub struct Content {
72    pub home: Option<Page>,
73    /// Root-level pages (e.g., 404.md, about.md) excluding the home page
74    pub root_pages: Vec<Page>,
75    pub sections: HashMap<String, Section>,
76}
77
78/// Discover all content files based on paths config
79/// If base_dir is provided, paths are resolved relative to it
80pub fn discover_content(paths: &PathsConfig, base_dir: Option<&Path>) -> Result<Content> {
81    debug!("Discovering content from {:?}", paths.content);
82    let content_path = Path::new(&paths.content);
83    let content_dir = if let Some(base) = base_dir {
84        if content_path.is_absolute() {
85            content_path.to_path_buf()
86        } else {
87            base.join(content_path)
88        }
89    } else {
90        content_path.to_path_buf()
91    };
92    trace!("Content directory resolved to: {:?}", content_dir);
93
94    // Create exclude matcher from config
95    let exclude_matcher = ExcludeMatcher::from_config(paths)?;
96
97    // Built-in excluded directories (styles, static, templates)
98    let builtin_excluded: Vec<&str> = vec![&paths.styles, &paths.static_files, &paths.templates];
99    trace!("Built-in excluded directories: {:?}", builtin_excluded);
100
101    // Load home page
102    let home_path = content_dir.join(&paths.home);
103    let home = if home_path.exists() {
104        trace!("Loading home page from {:?}", home_path);
105        Some(Page::from_file(&home_path)?)
106    } else {
107        trace!("No home page found at {:?}", home_path);
108        None
109    };
110
111    // Discover root-level pages (markdown files in content root, excluding home)
112    let home_file_name = Path::new(&paths.home)
113        .file_name()
114        .and_then(|n| n.to_str())
115        .unwrap_or("index.md");
116
117    let root_page_paths: Vec<_> = if paths.respect_gitignore {
118        WalkBuilder::new(&content_dir)
119            .max_depth(Some(1))
120            .hidden(false)
121            .build()
122            .filter_map(|e| e.ok())
123            .filter(|e| {
124                let file_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
125                e.depth() == 1
126                    && e.path().is_file()
127                    && e.path()
128                        .extension()
129                        .is_some_and(|ext| ext == "md" || ext == "html" || ext == "htm")
130                    && file_name != home_file_name
131                    && !exclude_matcher.is_excluded(file_name)
132            })
133            .map(|e| e.into_path())
134            .collect()
135    } else {
136        WalkDir::new(&content_dir)
137            .min_depth(1)
138            .max_depth(1)
139            .into_iter()
140            .filter_map(|e| e.ok())
141            .filter(|e| {
142                let file_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
143                e.path().is_file()
144                    && e.path()
145                        .extension()
146                        .is_some_and(|ext| ext == "md" || ext == "html" || ext == "htm")
147                    && file_name != home_file_name
148                    && !exclude_matcher.is_excluded(file_name)
149            })
150            .map(|e| e.into_path())
151            .collect()
152    };
153
154    let mut root_pages = Vec::new();
155    for page_path in root_page_paths {
156        trace!("Loading root page from {:?}", page_path);
157        let page = Page::from_file(&page_path)?;
158        root_pages.push(page);
159    }
160    debug!("Loaded {} root pages", root_pages.len());
161
162    // Discover all sections (subdirectories)
163    let mut sections = HashMap::new();
164
165    // Collect section paths using appropriate walker
166    let section_paths: Vec<_> = if paths.respect_gitignore {
167        WalkBuilder::new(content_dir)
168            .max_depth(Some(1))
169            .hidden(false) // Don't skip hidden files by default
170            .build()
171            .filter_map(|e| e.ok())
172            .filter(|e| {
173                let dir_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
174                e.depth() == 1 && e.path().is_dir() && !exclude_matcher.is_excluded(dir_name)
175            })
176            .map(|e| e.into_path())
177            .collect()
178    } else {
179        WalkDir::new(content_dir)
180            .min_depth(1)
181            .max_depth(1)
182            .into_iter()
183            .filter_map(|e| e.ok())
184            .filter(|e| {
185                let dir_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
186                e.path().is_dir() && !exclude_matcher.is_excluded(dir_name)
187            })
188            .map(|e| e.into_path())
189            .collect()
190    };
191
192    // Process each section
193    for path in section_paths {
194        process_section(
195            &path,
196            &builtin_excluded,
197            &exclude_matcher,
198            &mut sections,
199            paths,
200        )?;
201    }
202
203    debug!(
204        "Content discovery complete: {} sections found, {} root pages",
205        sections.len(),
206        root_pages.len()
207    );
208    Ok(Content {
209        home,
210        root_pages,
211        sections,
212    })
213}
214
215/// Process a section directory and add it to sections map
216fn process_section(
217    path: &Path,
218    builtin_excluded: &[&str],
219    exclude_matcher: &ExcludeMatcher,
220    sections: &mut HashMap<String, Section>,
221    paths: &PathsConfig,
222) -> Result<()> {
223    let section_name = path
224        .file_name()
225        .and_then(|n| n.to_str())
226        .unwrap_or("")
227        .to_string();
228
229    // Skip built-in excluded directories (styles, static, templates)
230    if builtin_excluded
231        .iter()
232        .any(|ex| section_name == *ex || path.ends_with(ex))
233    {
234        trace!("Skipping built-in excluded section: {}", section_name);
235        return Ok(());
236    }
237
238    trace!("Processing section: {}", section_name);
239
240    // Collect content file paths (markdown and HTML) using appropriate walker
241    let post_paths: Vec<_> = if paths.respect_gitignore {
242        WalkBuilder::new(path)
243            .max_depth(Some(1))
244            .hidden(false)
245            .build()
246            .filter_map(|e| e.ok())
247            .filter(|e| {
248                let file_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
249                e.depth() == 1
250                    && e.path()
251                        .extension()
252                        .is_some_and(|ext| ext == "md" || ext == "html" || ext == "htm")
253                    && !exclude_matcher.is_excluded(file_name)
254            })
255            .map(|e| e.into_path())
256            .collect()
257    } else {
258        WalkDir::new(path)
259            .min_depth(1)
260            .max_depth(1)
261            .into_iter()
262            .filter_map(|e| e.ok())
263            .filter(|e| {
264                let file_name = e.path().file_name().and_then(|n| n.to_str()).unwrap_or("");
265                e.path()
266                    .extension()
267                    .is_some_and(|ext| ext == "md" || ext == "html" || ext == "htm")
268                    && !exclude_matcher.is_excluded(file_name)
269            })
270            .map(|e| e.into_path())
271            .collect()
272    };
273
274    // Load posts (can be parallelized with rayon if needed)
275    let mut posts = Vec::new();
276    for post_path in post_paths {
277        let post = Post::from_file_with_section(&post_path, &section_name)?;
278        if !post.frontmatter.draft.unwrap_or(false) {
279            posts.push(post);
280        }
281    }
282
283    // Sort posts by date (newest first)
284    posts.sort_by(|a, b| b.frontmatter.date.cmp(&a.frontmatter.date));
285
286    if !posts.is_empty() {
287        debug!("Section '{}': {} posts loaded", section_name, posts.len());
288        sections.insert(
289            section_name.clone(),
290            Section {
291                name: section_name,
292                posts,
293            },
294        );
295    } else {
296        trace!("Section '{}': no posts found", section_name);
297    }
298
299    Ok(())
300}