1use anyhow::Result;
2use chrono::{DateTime, NaiveDate, TimeZone, Utc};
3use pulldown_cmark::{html, Options, Parser};
4use rss::{Channel, ChannelBuilder, Guid, Item, ItemBuilder};
5use serde::{Deserialize, Deserializer};
6use std::{fs, path::Path, time::SystemTime};
7use walkdir::WalkDir;
8
9const MIN_BODY_PREVIEW_CHARS: usize = 80;
11
12fn systemtime_to_utc(st: SystemTime) -> DateTime<Utc> {
14 DateTime::<Utc>::from(st)
15}
16
17fn deserialize_date<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
19where
20 D: Deserializer<'de>,
21{
22 let s: Option<String> = Option::deserialize(deserializer)?;
23
24 if let Some(date_str) = s {
25 if let Ok(dt) = DateTime::parse_from_rfc3339(&date_str) {
26 return Ok(Some(dt.with_timezone(&Utc)));
27 }
28
29 if let Ok(nd) = NaiveDate::parse_from_str(&date_str, "%Y-%m-%d") {
30 return Ok(Some(
31 Utc.from_utc_datetime(&nd.and_hms_opt(0, 0, 0).unwrap()),
32 ));
33 }
34 }
35 Ok(None)
36}
37
38#[derive(Debug, Deserialize, Clone)]
39pub struct FrontMatter {
40 pub title: String,
41
42 #[serde(deserialize_with = "deserialize_date")]
43 pub date: Option<DateTime<Utc>>,
44
45 pub author: Option<String>,
46 pub description: Option<String>, }
48
49#[derive(Debug)]
50pub struct Article {
51 pub fm: FrontMatter,
52 pub content: String,
53 pub path: String,
54}
55
56pub fn parse_markdown_file(root: &Path, path: &Path) -> Result<Article> {
57 let text = fs::read_to_string(path)?;
58
59 let mut lines = text.lines();
60 let mut yaml = String::new();
61 let mut in_yaml = false;
62
63 for line in lines.by_ref() {
65 let trimmed = line.trim();
66 if trimmed == "---" {
67 if !in_yaml {
68 in_yaml = true;
69 continue;
70 } else {
71 break;
72 }
73 }
74 if in_yaml {
75 yaml.push_str(line);
76 yaml.push('\n');
77 }
78 }
79
80 let content = lines.collect::<Vec<_>>().join("\n") + "\n";
82
83 let fallback_date = path
84 .metadata()
85 .ok()
86 .and_then(|m| m.modified().ok())
87 .map(systemtime_to_utc);
88
89 let fm = if !yaml.trim().is_empty() {
91 serde_yaml::from_str(&yaml).unwrap_or_else(|_| FrontMatter {
92 title: path.file_stem().unwrap().to_string_lossy().into_owned(),
93 date: fallback_date,
94 author: None,
95 description: Some(content.clone()),
96 })
97 } else {
98 FrontMatter {
99 title: path.file_stem().unwrap().to_string_lossy().into_owned(),
100 date: fallback_date,
101 author: None,
102 description: Some(content.clone()),
103 }
104 };
105
106 let rel_path = path.strip_prefix(root).unwrap_or(path);
107
108 Ok(Article {
109 fm,
110 content,
111 path: rel_path.to_string_lossy().into_owned(),
112 })
113}
114
115pub fn collect_articles(src_dir: &Path) -> Result<Vec<Article>> {
116 let mut articles = Vec::new();
117
118 for entry in WalkDir::new(src_dir).into_iter().filter_map(|e| e.ok()) {
119 let path = entry.path();
120 if !path.is_file() {
121 continue;
122 }
123
124 let ext = path
125 .extension()
126 .and_then(|e| e.to_str())
127 .map(|s| s.to_ascii_lowercase());
128
129 if !matches!(ext.as_deref(), Some("md" | "markdown")) {
130 continue;
131 }
132
133 if path
134 .file_name()
135 .unwrap()
136 .to_string_lossy()
137 .eq_ignore_ascii_case("SUMMARY.md")
138 {
139 continue;
140 }
141
142 if let Ok(article) = parse_markdown_file(src_dir, path) {
143 articles.push(article);
144 }
145 }
146
147 articles.sort_by_key(|a| a.fm.date);
149 articles.reverse();
150
151 Ok(articles)
152}
153
154fn markdown_to_html(md: &str) -> String {
155 let mut html = String::new();
156 let parser = Parser::new_ext(md, Options::all());
157 html::push_html(&mut html, parser);
158 html
159}
160
161fn strip_leading_boilerplate(md: &str) -> &str {
164 let mut seen_heading = false;
165 let mut byte_idx = 0;
166 let mut acc_bytes = 0;
167
168 for (i, line) in md.lines().enumerate() {
169 let line_len_with_nl = line.len() + 1; if i == 0 && line.trim().is_empty() {
173 acc_bytes += line_len_with_nl;
174 continue;
175 }
176
177 if line.trim_start().starts_with('#') {
178 seen_heading = true;
179 }
180
181 if seen_heading && line.trim().is_empty() {
182 acc_bytes += line_len_with_nl;
184 byte_idx = acc_bytes;
185 break;
186 }
187
188 acc_bytes += line_len_with_nl;
189 }
190
191 if byte_idx == 0 {
192 md
193 } else {
194 &md[byte_idx.min(md.len())..]
195 }
196}
197
198fn utf8_prefix(s: &str, max_chars: usize) -> &str {
200 if max_chars == 0 {
201 return "";
202 }
203
204 let mut last_byte = 0;
205
206 for (ch_idx, (byte_idx, _)) in s.char_indices().enumerate() {
207 if ch_idx == max_chars {
208 last_byte = byte_idx;
209 break;
210 }
211 last_byte = byte_idx + 1;
212 }
213
214 if last_byte == 0 || last_byte >= s.len() {
215 s
216 } else {
217 &s[..last_byte]
218 }
219}
220
221fn html_first_paragraphs(html: &str, max_paragraphs: usize, max_chars: usize) -> String {
223 let mut out = String::new();
224 let mut start = 0;
225 let mut count = 0;
226
227 while count < max_paragraphs {
228 let rel = match html[start..].find("<p") {
230 Some(i) => i,
231 None => break,
232 };
233 let p_start = start + rel;
234
235 let rel_close = match html[p_start..].find("</p>") {
237 Some(i) => i,
238 None => break,
239 };
240 let close = p_start + rel_close + "</p>".len();
241
242 let para = &html[p_start..close];
243 out.push_str(para);
244 count += 1;
245 start = close;
246 }
247
248 if out.is_empty() {
250 out = html.to_string();
251 }
252
253 if out.chars().count() > max_chars {
255 out.chars().take(max_chars).collect()
256 } else {
257 out
258 }
259}
260
261pub fn build_feed(
262 src_dir: &Path,
263 title: &str,
264 site_url: &str,
265 description: &str,
266) -> Result<Channel> {
267 let articles = collect_articles(src_dir)?;
268
269 let base_url = site_url.trim_end_matches('/');
270
271 let items: Vec<Item> = articles
272 .into_iter()
273 .map(|article| {
274 let html_path = article
276 .path
277 .replace('\\', "/")
278 .replace(".md", ".html")
279 .replace("/README.html", "/index.html");
280
281 let link = format!("{base_url}/{html_path}");
282
283 let content_trimmed = article.content.trim();
285
286 let body_len = content_trimmed.chars().count();
288
289 let mut source_md =
291 if body_len >= MIN_BODY_PREVIEW_CHARS || article.fm.description.is_none() {
292 content_trimmed
295 } else {
296 article.fm.description.as_deref().unwrap_or(content_trimmed)
298 };
299
300 source_md = strip_leading_boilerplate(source_md);
302
303 const PREVIEW_MD_SLICE_CHARS: usize = 4000;
305 let source_md_slice = utf8_prefix(source_md, PREVIEW_MD_SLICE_CHARS);
306 let raw_html = markdown_to_html(source_md_slice);
310
311 let preview = html_first_paragraphs(&raw_html, 3, 800);
313
314 let mut item = ItemBuilder::default();
315
316 item.title(Some(article.fm.title.clone()));
317 item.link(Some(link.clone()));
318 item.description(Some(preview)); item.guid(Some(Guid {
320 value: link.clone(),
321 permalink: true,
322 }));
323
324 if let Some(date) = article.fm.date {
325 item.pub_date(Some(date.to_rfc2822()));
326 }
327
328 if let Some(author) = article.fm.author {
329 item.author(Some(author));
330 }
331
332 item.build()
333 })
334 .collect();
335
336 let channel = ChannelBuilder::default()
338 .title(title)
339 .link(format!("{base_url}/"))
340 .description(description)
341 .items(items)
342 .generator(Some("mdbook-rss-feed 0.1.0".to_string()))
343 .build();
344
345 Ok(channel)
346}