mdbook_content_collections/
lib.rs1use anyhow::Result;
2use chrono::{DateTime, NaiveDate, TimeZone, Utc};
3use pulldown_cmark::{Options, Parser, html};
4use serde::{Deserialize, Deserializer, Serialize};
5use serde_json::json;
6use std::{fs, path::Path, time::SystemTime};
7use walkdir::WalkDir;
8
9const MIN_BODY_PREVIEW_CHARS: usize = 80;
11
12fn systemtime_to_utc(st: SystemTime) -> DateTime<Utc> {
14 DateTime::<Utc>::from(st)
15}
16
17fn deserialize_date<'de, D>(deserializer: D) -> Result<Option<DateTime<Utc>>, D::Error>
19where
20 D: Deserializer<'de>,
21{
22 let s: Option<String> = Option::deserialize(deserializer)?;
23
24 if let Some(date_str) = s {
25 if let Ok(dt) = DateTime::parse_from_rfc3339(&date_str) {
26 return Ok(Some(dt.with_timezone(&Utc)));
27 }
28
29 if let Ok(nd) = NaiveDate::parse_from_str(&date_str, "%Y-%m-%d") {
30 return Ok(Some(
31 Utc.from_utc_datetime(&nd.and_hms_opt(0, 0, 0).unwrap()),
32 ));
33 }
34 }
35 Ok(None)
36}
37
38#[derive(Debug, Deserialize, Clone)]
39pub struct FrontMatter {
40 pub title: String,
41
42 #[serde(deserialize_with = "deserialize_date")]
43 pub date: Option<DateTime<Utc>>,
44
45 pub author: Option<String>,
46 pub description: Option<String>, pub collection: Option<String>,
50
51 pub tags: Option<Vec<String>>,
53
54 pub draft: Option<bool>,
56}
57
58#[derive(Debug)]
59pub struct Article {
60 pub fm: FrontMatter,
61 pub content: String,
62 pub path: String,
63}
64
65pub fn parse_markdown_file(root: &Path, path: &Path) -> Result<Article> {
76 let text = fs::read_to_string(path)?;
77
78 let mut lines = text.lines();
79 let mut yaml = String::new();
80 let mut in_yaml = false;
81
82 for line in lines.by_ref() {
84 let trimmed = line.trim();
85 if trimmed == "---" {
86 if !in_yaml {
87 in_yaml = true;
88 continue;
89 }
90 break;
91 }
92 if in_yaml {
93 yaml.push_str(line);
94 yaml.push('\n');
95 }
96 }
97
98 let content = lines.collect::<Vec<_>>().join("\n") + "\n";
100
101 let fallback_date = path
102 .metadata()
103 .ok()
104 .and_then(|m| m.modified().ok())
105 .map(systemtime_to_utc);
106
107 let fm = if yaml.trim().is_empty() {
109 FrontMatter {
110 title: path.file_stem().unwrap().to_string_lossy().into_owned(),
111 date: fallback_date,
112 author: None,
113 description: Some(content.clone()),
114 collection: None,
115 tags: None,
116 draft: None,
117 }
118 } else {
119 serde_yml::from_str(&yaml).unwrap_or_else(|_| FrontMatter {
120 title: path.file_stem().unwrap().to_string_lossy().into_owned(),
121 date: fallback_date,
122 author: None,
123 description: Some(content.clone()),
124 collection: None,
125 tags: None,
126 draft: None,
127 })
128 };
129
130 let rel_path = path.strip_prefix(root).unwrap_or(path);
131
132 Ok(Article {
133 fm,
134 content,
135 path: rel_path.to_string_lossy().into_owned(),
136 })
137}
138
139pub fn collect_articles(src_dir: &Path) -> Result<Vec<Article>> {
151 let mut articles = Vec::new();
152
153 for entry in WalkDir::new(src_dir)
154 .into_iter()
155 .filter_map(std::result::Result::ok)
156 {
157 let path = entry.path();
158
159 if !path.is_file() {
161 continue;
162 }
163
164 let is_markdown = path
166 .extension()
167 .and_then(|e| e.to_str())
168 .map(str::to_ascii_lowercase)
169 .is_some_and(|s| s == "md" || s == "markdown");
170
171 if !is_markdown {
172 continue;
173 }
174
175 let is_summary = path
178 .file_name()
179 .map(|name| name.to_string_lossy())
180 .is_some_and(|name| name.eq_ignore_ascii_case("SUMMARY.md"));
181
182 if is_summary {
183 continue;
184 }
185
186 if let Ok(article) = parse_markdown_file(src_dir, path) {
188 articles.push(article);
189 }
190 }
191
192 articles.sort_by_key(|a| a.fm.date);
194 articles.reverse();
195
196 Ok(articles)
197}
198
199fn markdown_to_html(md: &str) -> String {
200 let mut html = String::new();
201 let parser = Parser::new_ext(md, Options::all());
202 html::push_html(&mut html, parser);
203 html
204}
205
206fn strip_leading_boilerplate(md: &str) -> &str {
209 let mut seen_heading = false;
210 let mut byte_idx = 0;
211 let mut acc_bytes = 0;
212
213 for (i, line) in md.lines().enumerate() {
214 let line_len_with_nl = line.len() + 1; if i == 0 && line.trim().is_empty() {
218 acc_bytes += line_len_with_nl;
219 continue;
220 }
221
222 if line.trim_start().starts_with('#') {
223 seen_heading = true;
224 }
225
226 if seen_heading && line.trim().is_empty() {
227 acc_bytes += line_len_with_nl;
229 byte_idx = acc_bytes;
230 break;
231 }
232
233 acc_bytes += line_len_with_nl;
234 }
235
236 if byte_idx == 0 {
237 md
238 } else {
239 &md[byte_idx.min(md.len())..]
240 }
241}
242
243fn utf8_prefix(s: &str, max_chars: usize) -> &str {
245 if max_chars == 0 {
246 return "";
247 }
248
249 let mut last_byte = 0;
250
251 for (ch_idx, (byte_idx, _)) in s.char_indices().enumerate() {
252 if ch_idx == max_chars {
253 last_byte = byte_idx;
254 break;
255 }
256 last_byte = byte_idx + 1;
257 }
258
259 if last_byte == 0 || last_byte >= s.len() {
260 s
261 } else {
262 &s[..last_byte]
263 }
264}
265
266fn html_first_paragraphs(html: &str, max_paragraphs: usize, max_chars: usize) -> String {
268 let mut out = String::new();
269 let mut start = 0;
270 let mut count = 0;
271
272 while count < max_paragraphs {
273 let Some(rel) = html[start..].find("<p") else {
275 break;
276 };
277 let p_start = start + rel;
278
279 let Some(rel_close) = html[p_start..].find("</p>") else {
281 break;
282 };
283 let close = p_start + rel_close + "</p>".len();
284
285 let para = &html[p_start..close];
286 out.push_str(para);
287 count += 1;
288 start = close;
289 }
290
291 if out.is_empty() {
293 out = html.to_string();
294 }
295
296 if out.chars().count() > max_chars {
298 out.chars().take(max_chars).collect()
299 } else {
300 out
301 }
302}
303
304#[derive(Debug, Serialize)]
305pub struct ContentEntry {
306 pub path: String, pub title: String,
308 pub date: Option<String>,
309 pub author: Option<String>,
310 pub description: Option<String>,
311 pub collection: Option<String>,
312 pub tags: Vec<String>,
313 pub draft: bool,
314 pub preview_html: String,
315}
316
317pub fn build_content_index(src_dir: &Path, output_path: &Path) -> Result<()> {
331 const PREVIEW_MD_SLICE_CHARS: usize = 4000;
332 let articles = collect_articles(src_dir)?;
333
334 let entries: Vec<ContentEntry> = articles
335 .into_iter()
336 .map(|article| {
337 let content_trimmed = article.content.trim();
338 let body_len = content_trimmed.chars().count();
339
340 let mut source_md =
341 if body_len >= MIN_BODY_PREVIEW_CHARS || article.fm.description.is_none() {
342 content_trimmed
343 } else {
344 article.fm.description.as_deref().unwrap_or(content_trimmed)
345 };
346
347 source_md = strip_leading_boilerplate(source_md);
348
349 let source_md_slice = utf8_prefix(source_md, PREVIEW_MD_SLICE_CHARS);
350
351 let raw_html = markdown_to_html(source_md_slice);
352 let preview_html = html_first_paragraphs(&raw_html, 3, 800);
353
354 ContentEntry {
355 path: article.path,
356 title: article.fm.title,
357 date: article.fm.date.map(|d| d.to_rfc3339()),
358 author: article.fm.author,
359 description: article.fm.description,
360 collection: article.fm.collection,
361 tags: article.fm.tags.unwrap_or_default(),
362 draft: article.fm.draft.unwrap_or(false),
363 preview_html,
364 }
365 })
366 .collect();
367
368 let index = json!({
370 "entries": entries,
371 });
372
373 if let Some(parent) = output_path.parent() {
374 fs::create_dir_all(parent)?;
375 }
376
377 fs::write(output_path, serde_json::to_vec_pretty(&index)?)?;
378
379 Ok(())
380}