1use anyhow::Result;
8use chrono::{DateTime, NaiveDate, TimeZone, Utc};
9use pulldown_cmark::{html, Options, Parser};
10use rss::{Channel, ChannelBuilder, Guid, Item, ItemBuilder};
11use serde::{Deserialize, Deserializer};
12use serde_json::Value as JsonValue;
13use std::{fs, path::Path, time::SystemTime};
14use walkdir::WalkDir;
15
16#[derive(serde::Serialize)]
18pub struct JsonFeed {
19 pub version: String,
20 pub title: String,
21 #[serde(skip_serializing_if = "Option::is_none")]
22 pub home_page_url: Option<String>,
23 #[serde(skip_serializing_if = "Option::is_none")]
24 pub feed_url: Option<String>,
25 #[serde(skip_serializing_if = "Option::is_none")]
26 pub description: Option<String>,
27 #[serde(skip_serializing_if = "Option::is_none")]
28 pub next_url: Option<String>, pub items: Vec<JsonFeedItem>,
30}
31
32#[derive(serde::Serialize)]
33pub struct JsonFeedItem {
34 pub id: String,
35 #[serde(skip_serializing_if = "Option::is_none")]
36 pub url: Option<String>,
37 #[serde(skip_serializing_if = "Option::is_none")]
38 pub title: Option<String>,
39 #[serde(skip_serializing_if = "Option::is_none")]
40 pub content_html: Option<String>,
41 #[serde(skip_serializing_if = "Option::is_none")]
42 pub date_published: Option<String>,
43 #[serde(skip_serializing_if = "Option::is_none")]
44 pub author: Option<JsonValue>, }
46
47use atom_syndication::{
49 Content as AtomContent, Entry as AtomEntry, Feed as AtomFeed, Link as AtomLink,
50 Text as AtomText,
51};
52
53const MIN_BODY_PREVIEW_CHARS: usize = 80;
55
56fn systemtime_to_utc(st: SystemTime) -> DateTime<Utc> {
58 DateTime::<Utc>::from(st)
59}
60
61fn deserialize_date<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
63where
64 D: Deserializer<'de>,
65{
66 let s: Option<String> = Option::deserialize(deserializer)?;
67
68 if let Some(date_str) = s {
69 if let Ok(dt) = DateTime::parse_from_rfc3339(&date_str) {
70 return Ok(Some(dt.with_timezone(&Utc)));
71 }
72
73 if let Ok(nd) = NaiveDate::parse_from_str(&date_str, "%Y-%m-%d") {
74 return Ok(Some(
75 Utc.from_utc_datetime(&nd.and_hms_opt(0, 0, 0).unwrap()),
76 ));
77 }
78 }
79 Ok(None)
80}
81
82#[derive(Debug, Deserialize, Clone)]
90pub struct FrontMatter {
91 pub title: String,
92
93 #[serde(deserialize_with = "deserialize_date")]
94 pub date: Option<DateTime<Utc>>,
95
96 pub author: Option<String>,
97 pub description: Option<String>, }
99
100#[derive(Debug)]
106pub struct Article {
107 pub fm: FrontMatter,
108 pub content: String,
109 pub path: String,
110}
111
112pub fn parse_markdown_file(root: &Path, path: &Path) -> Result<Article> {
120 let text = fs::read_to_string(path)?;
121
122 let mut lines = text.lines();
123 let mut yaml = String::new();
124 let mut in_yaml = false;
125
126 for line in lines.by_ref() {
128 let trimmed = line.trim();
129 if trimmed == "---" {
130 if !in_yaml {
131 in_yaml = true;
132 continue;
133 }
134 break;
135 }
136 if in_yaml {
137 yaml.push_str(line);
138 yaml.push('\n');
139 }
140 }
141
142 let content = lines.collect::<Vec<_>>().join("\n") + "\n";
144
145 let fallback_date = path
146 .metadata()
147 .ok()
148 .and_then(|m| m.modified().ok())
149 .map(systemtime_to_utc);
150
151 let fm = if yaml.trim().is_empty() {
153 FrontMatter {
154 title: path.file_stem().unwrap().to_string_lossy().into_owned(),
155 date: fallback_date,
156 author: None,
157 description: Some(content.clone()),
158 }
159 } else {
160 serde_yaml::from_str(&yaml).unwrap_or_else(|_| FrontMatter {
161 title: path.file_stem().unwrap().to_string_lossy().into_owned(),
162 date: fallback_date,
163 author: None,
164 description: Some(content.clone()),
165 })
166 };
167
168 let rel_path = path.strip_prefix(root).unwrap_or(path);
169
170 Ok(Article {
171 fm,
172 content,
173 path: rel_path.to_string_lossy().into_owned(),
174 })
175}
176
177pub fn collect_articles(src_dir: &Path) -> Result<Vec<Article>> {
189 let mut articles = Vec::new();
190
191 for entry in WalkDir::new(src_dir).into_iter().filter_map(Result::ok) {
192 let path = entry.path();
193 if !path.is_file() {
194 continue;
195 }
196
197 let ext = path
198 .extension()
199 .and_then(|e| e.to_str())
200 .map(str::to_ascii_lowercase);
201
202 if !matches!(ext.as_deref(), Some("md" | "markdown")) {
203 continue;
204 }
205
206 if path
207 .file_name()
208 .unwrap()
209 .to_string_lossy()
210 .eq_ignore_ascii_case("SUMMARY.md")
211 {
212 continue;
213 }
214
215 if let Ok(article) = parse_markdown_file(src_dir, path) {
216 articles.push(article);
217 }
218 }
219
220 articles.sort_by_key(|a| a.fm.date);
222 articles.reverse();
223
224 Ok(articles)
225}
226
227fn markdown_to_html(md: &str) -> String {
232 let mut html = String::new();
233 let parser = Parser::new_ext(md, Options::all());
234 html::push_html(&mut html, parser);
235 html
236}
237
238fn strip_leading_boilerplate(md: &str) -> &str {
242 let mut seen_heading = false;
243 let mut byte_idx = 0;
244 let mut acc_bytes = 0;
245
246 for (i, line) in md.lines().enumerate() {
247 let line_len_with_nl = line.len() + 1; if i == 0 && line.trim().is_empty() {
251 acc_bytes += line_len_with_nl;
252 continue;
253 }
254
255 if line.trim_start().starts_with('#') {
256 seen_heading = true;
257 }
258
259 if seen_heading && line.trim().is_empty() {
260 acc_bytes += line_len_with_nl;
262 byte_idx = acc_bytes;
263 break;
264 }
265
266 acc_bytes += line_len_with_nl;
267 }
268
269 if byte_idx == 0 {
270 md
271 } else {
272 &md[byte_idx.min(md.len())..]
273 }
274}
275
276fn utf8_prefix(s: &str, max_chars: usize) -> &str {
278 if max_chars == 0 {
279 return "";
280 }
281
282 let mut last_byte = 0;
283
284 for (ch_idx, (byte_idx, _)) in s.char_indices().enumerate() {
285 if ch_idx == max_chars {
286 last_byte = byte_idx;
287 break;
288 }
289 last_byte = byte_idx + 1;
290 }
291
292 if last_byte == 0 || last_byte >= s.len() {
293 s
294 } else {
295 &s[..last_byte]
296 }
297}
298
299fn html_first_paragraphs(html: &str, max_paragraphs: usize, max_chars: usize) -> String {
306 let mut out = String::new();
307 let mut start = 0;
308 let mut count = 0;
309
310 while count < max_paragraphs {
311 let Some(rel) = html[start..].find("<p") else {
313 break;
314 };
315 let p_start = start + rel;
316
317 let Some(rel_close) = html[p_start..].find("</p>") else {
319 break;
320 };
321 let close = p_start + rel_close + "</p>".len();
322
323 let para = &html[p_start..close];
324 out.push_str(para);
325 count += 1;
326 start = close;
327 }
328
329 if out.is_empty() {
331 out = html.to_string();
332 }
333
334 if out.chars().count() > max_chars {
336 out.chars().take(max_chars).collect()
337 } else {
338 out
339 }
340}
341
342pub struct FeedPage {
347 pub filename: String, pub channel: Channel,
349}
350
351pub struct BuildResult {
357 pub pages: Vec<FeedPage>,
358}
359
360#[must_use]
364pub fn rss_to_json_feed(
365 channel: &Channel,
366 feed_url: Option<&str>,
367 next_url: Option<&str>,
368) -> JsonFeed {
369 let items: Vec<JsonFeedItem> = channel
370 .items()
371 .iter()
372 .map(|item| {
373 let id = item
374 .guid()
375 .map(|g| g.value().to_string())
376 .or_else(|| item.link().map(std::string::ToString::to_string))
377 .unwrap_or_else(|| item.title().unwrap_or("").to_string());
378
379 let url = item.link().map(std::string::ToString::to_string);
380 let title = item.title().map(std::string::ToString::to_string);
381 let content_html = item.description().map(std::string::ToString::to_string);
382 let date_published = item.pub_date().and_then(|d| {
383 DateTime::parse_from_rfc2822(d)
384 .ok()
385 .map(|dt| dt.to_rfc3339())
386 });
387
388 let author = item.author().map(|a| serde_json::json!({ "name": a }));
389
390 JsonFeedItem {
391 id,
392 url,
393 title,
394 content_html,
395 date_published,
396 author,
397 }
398 })
399 .collect();
400
401 JsonFeed {
402 version: "https://jsonfeed.org/version/1.1".to_string(),
403 title: channel.title().to_string(),
404 home_page_url: Some(channel.link().to_string()),
405 feed_url: feed_url.map(std::string::ToString::to_string),
406 description: Some(channel.description().to_string()),
407 next_url: next_url.map(std::string::ToString::to_string),
408 items,
409 }
410}
411#[must_use]
417pub fn rss_to_atom(channel: &Channel) -> AtomFeed {
418 let entries: Vec<AtomEntry> = channel
419 .items()
420 .iter()
421 .map(|item| {
422 let mut entry = AtomEntry::default();
423
424 let entry_id = item
426 .guid()
427 .map(|g| g.value().to_string())
428 .or_else(|| item.link().map(std::string::ToString::to_string))
429 .unwrap_or_else(|| item.title().unwrap_or("").to_string());
430 entry.set_id(entry_id);
431
432 if let Some(title) = item.title() {
433 entry.set_title(title.to_string());
434 }
435
436 if let Some(link) = item.link() {
437 entry.set_links(vec![AtomLink {
438 href: link.to_string(),
439 ..Default::default()
440 }]);
441 }
442
443 if let Some(desc) = item.description() {
444 let mut content = AtomContent::default();
445 content.set_content_type("html".to_string());
446 content.set_value(Some(desc.to_string()));
447 entry.set_content(Some(content));
448 }
449
450 if let Some(Ok(dt)) = item.pub_date().map(DateTime::parse_from_rfc2822) {
451 entry.set_updated(dt);
452 }
453
454 entry
455 })
456 .collect();
457
458 let mut feed = AtomFeed::default();
459 feed.set_title(channel.title().to_string());
460 feed.set_entries(entries);
461
462 let link = channel.link();
463 if link.is_empty() {
464 feed.set_id(channel.title().to_string());
466 } else {
467 feed.set_links(vec![AtomLink {
468 href: link.to_string(),
469 ..Default::default()
470 }]);
471 feed.set_id(link.to_string());
473 }
474
475 let desc = channel.description();
476 if !desc.is_empty() {
477 feed.set_subtitle(Some(AtomText {
478 value: desc.to_string(),
479 ..Default::default()
480 }));
481 }
482
483 feed
484}
485
486pub fn build_feed(
510 src_dir: &Path,
511 title: &str,
512 site_url: &str,
513 description: &str,
514 full_preview: bool,
515 max_items: usize,
516 paginated: bool,
517) -> Result<BuildResult> {
518 let articles = collect_articles(src_dir)?;
519
520 let base_url = site_url.trim_end_matches('/');
521
522 let items: Vec<Item> = articles
523 .into_iter()
524 .map(|article| {
525 let html_path = article
527 .path
528 .replace('\\', "/")
529 .replace(".md", ".html")
530 .replace("/README.html", "/index.html");
531
532 let link = format!("{base_url}/{html_path}");
533
534 let content_trimmed = article.content.trim();
536
537 let _body_len = content_trimmed.chars().count();
539
540 let mut source_md: &str;
542
543 if full_preview {
544 source_md = article.content.as_str();
546 } else {
547 const PREVIEW_MD_SLICE_CHARS: usize = 4000;
549 let content_trimmed = article.content.trim();
551 let body_len = content_trimmed.chars().count();
552
553 source_md =
554 if body_len >= MIN_BODY_PREVIEW_CHARS || article.fm.description.is_none() {
555 content_trimmed
556 } else {
557 article.fm.description.as_deref().unwrap_or(content_trimmed)
558 };
559
560 source_md = strip_leading_boilerplate(source_md);
562
563 source_md = utf8_prefix(source_md, PREVIEW_MD_SLICE_CHARS);
564 }
565
566 let raw_html = markdown_to_html(source_md);
568
569 let preview = if full_preview {
571 raw_html
572 } else {
573 html_first_paragraphs(&raw_html, 3, 800)
574 };
575
576 let mut item = ItemBuilder::default();
577
578 item.title(Some(article.fm.title.clone()));
579 item.link(Some(link.clone()));
580 item.description(Some(preview)); item.guid(Some(Guid {
582 value: link,
583 permalink: true,
584 }));
585
586 if let Some(date) = article.fm.date {
587 item.pub_date(Some(date.to_rfc2822()));
588 }
589
590 if let Some(author) = article.fm.author {
591 item.author(Some(author));
592 }
593
594 item.build()
595 })
596 .collect();
597
598 let build_channel_for_slice =
600 |slice: &[Item], _page_idx: usize, _total_pages: usize| -> Channel {
601 ChannelBuilder::default()
602 .title(title)
603 .link(format!("{base_url}/"))
604 .description(description)
605 .items(slice.to_vec())
606 .generator(Some("mdbook-rss-feed 1.0.0".to_string()))
607 .build()
608 };
609
610 let mut pages = Vec::new();
611
612 if !paginated || max_items == 0 || items.len() <= max_items {
613 let channel = build_channel_for_slice(&items, 1, 1);
615 pages.push(FeedPage {
616 filename: "rss.xml".to_string(),
617 channel,
618 });
619 } else {
620 let total_pages = items.len().div_ceil(max_items);
622
623 for page_idx in 0..total_pages {
624 let start = page_idx * max_items;
625 let end = (start + max_items).min(items.len());
626 let slice = &items[start..end];
627
628 let filename = if page_idx == 0 {
629 "rss.xml".to_string()
630 } else {
631 format!("rss{}.xml", page_idx + 1)
632 };
633
634 let channel = build_channel_for_slice(slice, page_idx + 1, total_pages);
635
636 pages.push(FeedPage { filename, channel });
637 }
638 }
639
640 Ok(BuildResult { pages })
641}