mdbook_rss_feed/
lib.rs

1use anyhow::Result;
2use chrono::{DateTime, NaiveDate, TimeZone, Utc};
3use pulldown_cmark::{html, Options, Parser};
4use rss::{Channel, ChannelBuilder, Guid, Item, ItemBuilder};
5use serde::{Deserialize, Deserializer};
6use std::{fs, path::Path, time::SystemTime};
7use walkdir::WalkDir;
8
9// Minimum body length (in chars) before we prefer it over description
10const MIN_BODY_PREVIEW_CHARS: usize = 80;
11
12// Convert file modification time → UTC
13fn systemtime_to_utc(st: SystemTime) -> DateTime<Utc> {
14    DateTime::<Utc>::from(st)
15}
16
17// Parse front-matter date formats
18fn deserialize_date<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
19where
20    D: Deserializer<'de>,
21{
22    let s: Option<String> = Option::deserialize(deserializer)?;
23
24    if let Some(date_str) = s {
25        if let Ok(dt) = DateTime::parse_from_rfc3339(&date_str) {
26            return Ok(Some(dt.with_timezone(&Utc)));
27        }
28
29        if let Ok(nd) = NaiveDate::parse_from_str(&date_str, "%Y-%m-%d") {
30            return Ok(Some(
31                Utc.from_utc_datetime(&nd.and_hms_opt(0, 0, 0).unwrap()),
32            ));
33        }
34    }
35    Ok(None)
36}
37
38#[derive(Debug, Deserialize, Clone)]
39pub struct FrontMatter {
40    pub title: String,
41
42    #[serde(deserialize_with = "deserialize_date")]
43    pub date: Option<DateTime<Utc>>,
44
45    pub author: Option<String>,
46    pub description: Option<String>, // User-supplied summary (optional)
47}
48
49#[derive(Debug)]
50pub struct Article {
51    pub fm: FrontMatter,
52    pub content: String,
53    pub path: String,
54}
55
56pub fn parse_markdown_file(root: &Path, path: &Path) -> Result<Article> {
57    let text = fs::read_to_string(path)?;
58
59    let mut lines = text.lines();
60    let mut yaml = String::new();
61    let mut in_yaml = false;
62
63    // Extract YAML front matter
64    for line in lines.by_ref() {
65        let trimmed = line.trim();
66        if trimmed == "---" {
67            if !in_yaml {
68                in_yaml = true;
69                continue;
70            } else {
71                break;
72            }
73        }
74        if in_yaml {
75            yaml.push_str(line);
76            yaml.push('\n');
77        }
78    }
79
80    // Markdown content after front matter
81    let content = lines.collect::<Vec<_>>().join("\n") + "\n";
82
83    let fallback_date = path
84        .metadata()
85        .ok()
86        .and_then(|m| m.modified().ok())
87        .map(systemtime_to_utc);
88
89    // Parse front matter
90    let fm = if !yaml.trim().is_empty() {
91        serde_yaml::from_str(&yaml).unwrap_or_else(|_| FrontMatter {
92            title: path.file_stem().unwrap().to_string_lossy().into_owned(),
93            date: fallback_date,
94            author: None,
95            description: Some(content.clone()),
96        })
97    } else {
98        FrontMatter {
99            title: path.file_stem().unwrap().to_string_lossy().into_owned(),
100            date: fallback_date,
101            author: None,
102            description: Some(content.clone()),
103        }
104    };
105
106    let rel_path = path.strip_prefix(root).unwrap_or(path);
107
108    Ok(Article {
109        fm,
110        content,
111        path: rel_path.to_string_lossy().into_owned(),
112    })
113}
114
115pub fn collect_articles(src_dir: &Path) -> Result<Vec<Article>> {
116    let mut articles = Vec::new();
117
118    for entry in WalkDir::new(src_dir).into_iter().filter_map(|e| e.ok()) {
119        let path = entry.path();
120        if !path.is_file() {
121            continue;
122        }
123
124        let ext = path
125            .extension()
126            .and_then(|e| e.to_str())
127            .map(|s| s.to_ascii_lowercase());
128
129        if !matches!(ext.as_deref(), Some("md" | "markdown")) {
130            continue;
131        }
132
133        if path
134            .file_name()
135            .unwrap()
136            .to_string_lossy()
137            .eq_ignore_ascii_case("SUMMARY.md")
138        {
139            continue;
140        }
141
142        if let Ok(article) = parse_markdown_file(src_dir, path) {
143            articles.push(article);
144        }
145    }
146
147    // Sort newest → oldest
148    articles.sort_by_key(|a| a.fm.date);
149    articles.reverse();
150
151    Ok(articles)
152}
153
154fn markdown_to_html(md: &str) -> String {
155    let mut html = String::new();
156    let parser = Parser::new_ext(md, Options::all());
157    html::push_html(&mut html, parser);
158    html
159}
160
161/// Strip obvious leading boilerplate (TOCs, details, long definition blocks)
162/// so previews tend to start at the main intro text instead of metadata.
163fn strip_leading_boilerplate(md: &str) -> &str {
164    let mut seen_heading = false;
165    let mut byte_idx = 0;
166    let mut acc_bytes = 0;
167
168    for (i, line) in md.lines().enumerate() {
169        let line_len_with_nl = line.len() + 1; // assume '\n' separated
170
171        // Skip initial blank lines entirely
172        if i == 0 && line.trim().is_empty() {
173            acc_bytes += line_len_with_nl;
174            continue;
175        }
176
177        if line.trim_start().starts_with('#') {
178            seen_heading = true;
179        }
180
181        if seen_heading && line.trim().is_empty() {
182            // First blank line after heading: start preview after this
183            acc_bytes += line_len_with_nl;
184            byte_idx = acc_bytes;
185            break;
186        }
187
188        acc_bytes += line_len_with_nl;
189    }
190
191    if byte_idx == 0 {
192        md
193    } else {
194        &md[byte_idx.min(md.len())..]
195    }
196}
197
198/// Take at most `max_chars` worth of UTF‑8 text from `s`.
199fn utf8_prefix(s: &str, max_chars: usize) -> &str {
200    if max_chars == 0 {
201        return "";
202    }
203
204    let mut last_byte = 0;
205
206    for (ch_idx, (byte_idx, _)) in s.char_indices().enumerate() {
207        if ch_idx == max_chars {
208            last_byte = byte_idx;
209            break;
210        }
211        last_byte = byte_idx + 1;
212    }
213
214    if last_byte == 0 || last_byte >= s.len() {
215        s
216    } else {
217        &s[..last_byte]
218    }
219}
220
221/// Take up to `max_paragraphs` <p> blocks from HTML, and cap at `max_chars` (UTF-8 safe).
222fn html_first_paragraphs(html: &str, max_paragraphs: usize, max_chars: usize) -> String {
223    let mut out = String::new();
224    let mut start = 0;
225    let mut count = 0;
226
227    while count < max_paragraphs {
228        // Find next <p ...>
229        let rel = match html[start..].find("<p") {
230            Some(i) => i,
231            None => break,
232        };
233        let p_start = start + rel;
234
235        // Find the end of this paragraph
236        let rel_close = match html[p_start..].find("</p>") {
237            Some(i) => i,
238            None => break,
239        };
240        let close = p_start + rel_close + "</p>".len();
241
242        let para = &html[p_start..close];
243        out.push_str(para);
244        count += 1;
245        start = close;
246    }
247
248    // If no <p> found, fall back to original HTML
249    if out.is_empty() {
250        out = html.to_string();
251    }
252
253    // UTF‑8 safe trim by character count
254    if out.chars().count() > max_chars {
255        out.chars().take(max_chars).collect()
256    } else {
257        out
258    }
259}
260
261pub fn build_feed(
262    src_dir: &Path,
263    title: &str,
264    site_url: &str,
265    description: &str,
266) -> Result<Channel> {
267    let articles = collect_articles(src_dir)?;
268
269    let base_url = site_url.trim_end_matches('/');
270
271    let items: Vec<Item> = articles
272        .into_iter()
273        .map(|article| {
274            // Build correct .html path
275            let html_path = article
276                .path
277                .replace('\\', "/")
278                .replace(".md", ".html")
279                .replace("/README.html", "/index.html");
280
281            let link = format!("{base_url}/{html_path}");
282
283            //  Hybrid preview source selection
284            let content_trimmed = article.content.trim();
285
286            // Count chars to decide if body is "very short"
287            let body_len = content_trimmed.chars().count();
288
289            // 1) Choose base markdown (body vs description)
290            let mut source_md =
291                if body_len >= MIN_BODY_PREVIEW_CHARS || article.fm.description.is_none() {
292                    // Use chapter body by default when it has enough content,
293                    // or when there is no description at all.
294                    content_trimmed
295                } else {
296                    // Body is empty/very short AND description exists → use description.
297                    article.fm.description.as_deref().unwrap_or(content_trimmed)
298                };
299
300            // 2) Strip obvious leading boilerplate so we start near the intro text
301            source_md = strip_leading_boilerplate(source_md);
302
303            // 3) Only consider the first slice of markdown for preview
304            const PREVIEW_MD_SLICE_CHARS: usize = 4000;
305            let source_md_slice = utf8_prefix(source_md, PREVIEW_MD_SLICE_CHARS);
306            // -------------------------------------------------------
307
308            // Convert chosen markdown source → HTML
309            let raw_html = markdown_to_html(source_md_slice);
310
311            // Use first few paragraphs (up to 3) as preview, capped to a reasonable length
312            let preview = html_first_paragraphs(&raw_html, 3, 800);
313
314            let mut item = ItemBuilder::default();
315
316            item.title(Some(article.fm.title.clone()));
317            item.link(Some(link.clone()));
318            item.description(Some(preview)); // Stored directly inside CDATA
319            item.guid(Some(Guid {
320                value: link.clone(),
321                permalink: true,
322            }));
323
324            if let Some(date) = article.fm.date {
325                item.pub_date(Some(date.to_rfc2822()));
326            }
327
328            if let Some(author) = article.fm.author {
329                item.author(Some(author));
330            }
331
332            item.build()
333        })
334        .collect();
335
336    // Build the channel
337    let channel = ChannelBuilder::default()
338        .title(title)
339        .link(format!("{base_url}/"))
340        .description(description)
341        .items(items)
342        .generator(Some("mdbook-rss-feed 0.1.0".to_string()))
343        .build();
344
345    Ok(channel)
346}