1use crate::error::{Error, Result};
6use pulldown_cmark::{Options, Parser, html};
7use regex::Regex;
8use serde_json::Value;
9use std::collections::BTreeMap;
10use std::path::Path;
11use walkdir::WalkDir;
12
13#[derive(Debug, Clone)]
15pub struct Post {
16 pub data: Value,
18}
19
20impl Post {
21 pub fn from_value(data: Value) -> Self {
23 Post { data }
24 }
25
26 pub fn title(&self) -> Option<&str> {
28 self.data.get("title").and_then(|v| v.as_str())
29 }
30
31 pub fn slug(&self) -> Option<&str> {
33 self.data.get("slug").and_then(|v| v.as_str())
34 }
35
36 pub fn content(&self) -> Option<&str> {
38 self.data.get("content").and_then(|v| v.as_str())
39 }
40
41 pub fn categories(&self) -> Vec<String> {
43 self.data
44 .get("categories")
45 .and_then(|v| v.as_array())
46 .map(|arr| {
47 arr.iter()
48 .filter_map(|v| v.as_str())
49 .map(|s| s.to_string())
50 .collect()
51 })
52 .unwrap_or_default()
53 }
54
55 pub fn tags(&self) -> Vec<String> {
57 let mut tags: Vec<String> = self
58 .data
59 .get("tags")
60 .and_then(|v| v.as_array())
61 .map(|arr| {
62 arr.iter()
63 .filter_map(|v| v.as_str())
64 .map(|s| s.trim().to_string())
65 .filter(|s| !s.is_empty())
66 .collect::<Vec<String>>()
67 })
68 .unwrap_or_default();
69
70 let mut seen = std::collections::HashSet::new();
72 tags.retain(|t| seen.insert(t.clone()));
73 tags
74 }
75
76 pub fn date(&self) -> Option<&str> {
78 self.data.get("date_ymd").and_then(|v| v.as_str())
79 }
80
81 pub fn source_path(&self) -> Option<&str> {
83 self.data.get("source_path").and_then(|v| v.as_str())
84 }
85
86 pub fn modified_epoch(&self) -> Option<i64> {
88 self.data.get("modified_epoch").and_then(|v| v.as_i64())
89 }
90}
91
92pub struct PostParser;
94
95impl PostParser {
96 fn extract_title_from_markdown(markdown: &str) -> Option<String> {
98 let mut in_code_fence = false;
100 for line in markdown.lines() {
101 let trimmed = line.trim();
102 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
103 in_code_fence = !in_code_fence;
104 continue;
105 }
106 if in_code_fence {
107 continue;
108 }
109 if trimmed.starts_with("# ") {
110 let title = trimmed[2..].trim();
111 if !title.is_empty() {
112 return Some(title.to_string());
113 }
114 }
115 }
116 in_code_fence = false;
118 for line in markdown.lines() {
119 let trimmed = line.trim();
120 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
121 in_code_fence = !in_code_fence;
122 continue;
123 }
124 if in_code_fence {
125 continue;
126 }
127 if trimmed.starts_with('#') {
128 let hashes = trimmed.chars().take_while(|c| *c == '#').count();
129 if hashes >= 1 {
130 let title = trimmed[hashes..].trim();
131 if !title.is_empty() {
132 return Some(title.to_string());
133 }
134 }
135 }
136 }
137 None
138 }
139
140 pub fn list_posts<P: AsRef<Path>>(md_dir: P) -> Result<Vec<Post>> {
142 let mut posts = Vec::new();
143 let content_dir = md_dir.as_ref();
144
145 if !content_dir.exists() {
147 println!(
148 "警告: Markdown目录 '{}' 不存在,创建空目录...",
149 content_dir.display()
150 );
151 std::fs::create_dir_all(content_dir)?;
152 }
153
154 let mut draft_dirs = std::collections::HashSet::new();
156 let mut book_dirs = std::collections::HashMap::new(); for entry in WalkDir::new(content_dir).into_iter().filter_map(|e| e.ok()) {
158 let path = entry.path();
159 if path.file_name().map_or(false, |n| n == "README.md") {
160 if let Ok(content) = std::fs::read_to_string(path) {
161 if let Ok(Some(post_data)) = Self::parse_post(&content, path, content_dir) {
162 let is_draft = post_data.get("draft").and_then(|v| v.as_bool()).unwrap_or(false);
163 if is_draft {
164 if let Some(parent) = path.parent() {
165 draft_dirs.insert(parent.to_path_buf());
166 }
167 }
168
169 let cats = Self::extract_categories_from_path(path, content_dir);
171 if cats.len() == 2 && cats[0] == "docs" {
172 if let Some("doc") = post_data.get("layout").and_then(|v| v.as_str()) {
173 if let Some(parent) = path.parent() {
174 let mut cover_path = post_data.get("cover").and_then(|v| v.as_str()).map(|s| s.to_string());
175
176 if let Some(cp) = cover_path.as_mut() {
178 if !cp.starts_with('/') && !cp.starts_with("http") {
179 if let Ok(rel_dir) = parent.strip_prefix(content_dir) {
180 *cp = format!("/{}", rel_dir.join(&cp).to_string_lossy());
181 }
182 }
183 }
184
185 if cover_path.is_none() {
187 let candidates = [
188 parent.join("cover.jpg"), parent.join("cover.png"),
189 parent.join("assets").join("cover.jpg"), parent.join("assets").join("cover.png")
190 ];
191 for cand in candidates {
192 if cand.exists() {
193 if let Ok(rel) = cand.strip_prefix(content_dir) {
194 cover_path = Some(format!("/{}", rel.to_string_lossy()));
195 break;
196 }
197 }
198 }
199 }
200 book_dirs.insert(parent.to_path_buf(), cover_path);
201 }
202 }
203 }
204 }
205 }
206 }
207 }
208
209 for entry in WalkDir::new(content_dir).into_iter().filter_map(|e| e.ok()) {
210 if entry.path().extension().map_or(false, |ext| ext == "md") {
211 if draft_dirs.iter().any(|d| entry.path().starts_with(d)) {
213 continue;
214 }
215
216 let hidden = entry.file_name().to_string_lossy().starts_with('.');
218 if hidden {
219 continue;
220 }
221 let content = std::fs::read_to_string(entry.path())
222 .map_err(|e| Error::Other(format!("无法读取文件 {:?}: {}", entry.path(), e)))?;
223 if let Ok(Some(mut post)) = Self::parse_post(&content, entry.path(), content_dir) {
224 let is_draft = post
226 .get("draft")
227 .and_then(|v| v.as_bool())
228 .unwrap_or(false);
229 if is_draft {
230 continue;
231 }
232
233 let cats = Post::from_value(post.clone()).categories();
235 if let Some(obj) = post.as_object_mut() {
236 if !obj.contains_key("layout") {
238 if cats.first().map(|c| c == "projects").unwrap_or(false) {
239 obj.insert("layout".to_string(), Value::String("project".to_string()));
240 } else if book_dirs.iter().any(|(d, _)| entry.path().starts_with(d)) {
241 obj.insert("layout".to_string(), Value::String("doc".to_string()));
242 }
243 }
244
245 if entry.path().file_name().map_or(false, |n| n == "README.md") {
247 if let Some((_, cover_opt)) = book_dirs.iter().find(|(d, _)| entry.path().starts_with(d)) {
248 if let Some(cp) = cover_opt {
249 obj.insert("cover".to_string(), Value::String(cp.clone()));
251 }
252 }
253 }
254 }
255 posts.push(Post::from_value(post));
256 }
257 }
258 }
259
260 posts.sort_by(|a, b| {
262 let date_a = a.date().unwrap_or("");
263 let date_b = b.date().unwrap_or("");
264 date_b.cmp(date_a)
265 });
266
267 Ok(posts)
268 }
269
270 fn parse_post<P: AsRef<Path>>(content: &str, path: P, md_dir: P) -> Result<Option<Value>> {
272 let path = path.as_ref();
273 let md_dir = md_dir.as_ref();
274
275 let (fm_marker, end_marker) = if content.starts_with("+++") {
277 ("+++", "+++\n")
278 } else if content.starts_with("---") {
279 ("---", "---\n")
280 } else {
281 return Ok(None);
282 };
283
284 let start = fm_marker.len();
286 let end = if let Some(pos) = content[start..].find(end_marker) {
287 start + pos
288 } else if let Some(pos) = content[start..].find(fm_marker) {
289 start + pos
290 } else {
291 return Ok(None);
292 };
293
294 let front_matter = &content[start..end];
295 let body = &content[end + fm_marker.len()..];
296
297 let metadata_json = if fm_marker == "+++" {
299 let metadata: toml::Value = toml::from_str(front_matter).map_err(|e| {
300 Error::Markdown(format!("解析TOML front matter失败 {:?}: {}", path, e))
301 })?;
302 serde_json::to_value(metadata)?
303 } else {
304 let mut fixed_front_matter = front_matter.replace(':', ":");
307
308 let re = Regex::new(r"(?m)^([ \t]*[a-zA-Z0-9_-]+):([^\s].*)$").unwrap();
310 fixed_front_matter = re.replace_all(&fixed_front_matter, "${1}: ${2}").to_string();
311
312 let metadata: serde_yaml::Value =
313 serde_yaml::from_str(&fixed_front_matter).map_err(|e| {
314 Error::Markdown(format!("解析YAML front matter失败 {:?}: {}", path, e))
316 })?;
317 serde_json::to_value(metadata)?
318 };
319
320 let html = Self::markdown_to_html(body);
322
323 let mut slug = path
325 .file_stem()
326 .and_then(|s| s.to_str())
327 .unwrap_or("")
328 .to_string();
329
330 if slug == "README" {
331 slug = "index".to_string();
332 }
333
334 if let Value::Object(ref obj) = metadata_json {
335 if let Some(Value::String(s)) = obj.get("slug") {
336 if !s.is_empty() {
337 slug = s.clone();
338 }
339 }
340 }
341
342 let categories = Self::extract_categories_from_path(path, md_dir);
344 let categories_json: Vec<Value> = categories
345 .iter()
346 .map(|cat| Value::String(cat.clone()))
347 .collect();
348
349 let url = if categories.is_empty() {
351 format!("/{}.html", slug)
352 } else {
353 format!("/{}/{}.html", categories.join("/"), slug)
354 };
355
356 let mut post = match metadata_json {
358 Value::Object(mut obj) => {
359 obj.insert("content".to_string(), Value::String(html));
360 obj.insert("slug".to_string(), Value::String(slug));
361 obj.insert("url".to_string(), Value::String(url));
362 obj.insert("categories".to_string(), Value::Array(categories_json));
363 Value::Object(obj)
364 }
365 _ => {
366 let mut obj = serde_json::Map::new();
367 obj.insert("content".to_string(), Value::String(html));
368 obj.insert("slug".to_string(), Value::String(slug));
369 obj.insert("url".to_string(), Value::String(url));
370 obj.insert("categories".to_string(), Value::Array(categories_json));
371 Value::Object(obj)
372 }
373 };
374
375 if let Some(obj) = post.as_object_mut() {
377 obj.insert(
379 "source_path".to_string(),
380 Value::String(path.to_string_lossy().to_string()),
381 );
382 let modified_epoch = std::fs::metadata(path)
383 .and_then(|m| m.modified())
384 .ok()
385 .and_then(|st| st.duration_since(std::time::UNIX_EPOCH).ok())
386 .map(|d| d.as_secs() as i64)
387 .unwrap_or(0);
388 obj.insert(
389 "modified_epoch".to_string(),
390 Value::Number(modified_epoch.into()),
391 );
392
393 if !obj.contains_key("title") {
395 let content_md_title = Self::extract_title_from_markdown(body).or_else(|| {
396 obj.get("slug")
398 .and_then(|v| v.as_str())
399 .map(|s| s.to_string())
400 });
401 if let Some(title) = content_md_title {
402 obj.insert("title".to_string(), Value::String(title));
403 }
404 }
405
406 if let Some(create_time) = obj
409 .get("createTime")
410 .or_else(|| obj.get("date"))
411 .and_then(|v| v.as_str())
412 {
413 let create_time_str = create_time.to_string();
414 let date_only = if create_time_str.len() >= 10 {
415 &create_time_str[0..10]
416 } else {
417 &create_time_str
418 };
419 let mut normalized = date_only.replace('/', "-").replace('.', "-");
420 if normalized.len() == 10 {
422 let bytes = normalized.as_bytes();
423 let is_digit = |c: u8| c.is_ascii_digit();
424 if !(is_digit(bytes[0])
425 && is_digit(bytes[1])
426 && is_digit(bytes[2])
427 && is_digit(bytes[3])
428 && bytes[4] == b'-'
429 && is_digit(bytes[5])
430 && is_digit(bytes[6])
431 && bytes[7] == b'-'
432 && is_digit(bytes[8])
433 && is_digit(bytes[9]))
434 {
435 let digits: Vec<char> =
437 date_only.chars().filter(|c| c.is_ascii_digit()).collect();
438 if digits.len() >= 8 {
439 let year: String = digits[0..4].iter().collect();
440 let month: String = digits[4..6].iter().collect();
441 let day: String = digits[6..8].iter().collect();
442 normalized = format!("{}-{}-{}", year, month, day);
443 }
444 }
445 }
446 obj.insert("date_ymd".to_string(), Value::String(normalized.clone()));
447 if normalized.len() >= 7 {
448 let year = &normalized[0..4];
449 let ym = &normalized[0..7];
450 obj.insert("year".to_string(), Value::String(year.to_string()));
451 obj.insert("year_month".to_string(), Value::String(ym.to_string()));
452 }
453
454 if let Some(tags_val) = obj.get("tags") {
456 if let Some(arr) = tags_val.as_array() {
457 let mut sanitized: Vec<Value> = arr
458 .iter()
459 .filter_map(|v| v.as_str())
460 .map(|s| s.trim())
461 .filter(|s| !s.is_empty())
462 .map(|s| Value::String(s.to_string()))
463 .collect();
464 let mut seen = std::collections::HashSet::new();
466 sanitized.retain(|v| {
467 if let Some(s) = v.as_str() {
468 seen.insert(s.to_string())
469 } else {
470 false
471 }
472 });
473 if sanitized.is_empty() {
474 obj.remove("tags");
475 } else {
476 obj.insert("tags".to_string(), Value::Array(sanitized));
477 }
478 } else {
479 obj.remove("tags");
481 }
482 }
483 }
484 }
485
486 Ok(Some(post))
487 }
488
489 fn extract_categories_from_path<P: AsRef<Path>>(path: P, md_dir: P) -> Vec<String> {
491 let path = path.as_ref();
492 let md_dir = md_dir.as_ref();
493 let mut categories = Vec::new();
494
495 if let Ok(relative_path) = path.strip_prefix(md_dir) {
497 if let Some(parent) = relative_path.parent() {
499 for component in parent.components() {
501 if let std::path::Component::Normal(os_str) = component {
502 if let Some(category) = os_str.to_str() {
503 categories.push(category.to_string());
504 }
505 }
506 }
507 }
508 }
509
510 categories
511 }
512
513 fn markdown_to_html(markdown: &str) -> String {
515 let mut options = Options::empty();
516 options.insert(Options::ENABLE_TABLES);
517 options.insert(Options::ENABLE_FOOTNOTES);
518 options.insert(Options::ENABLE_STRIKETHROUGH);
519 options.insert(Options::ENABLE_TASKLISTS);
520 options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
521
522 let parser = Parser::new_ext(markdown, options);
523 let mut html = String::new();
524 html::push_html(&mut html, parser);
525
526 html
527 }
528
529 pub fn collect_tags(posts: &[Post]) -> Vec<Value> {
531 let mut tag_to_count: BTreeMap<String, usize> = BTreeMap::new();
532
533 for post in posts {
534 for tag in post.tags() {
535 *tag_to_count.entry(tag).or_insert(0) += 1;
536 }
537 }
538
539 tag_to_count
540 .into_iter()
541 .map(|(name, count)| {
542 let mut obj = serde_json::Map::new();
543 obj.insert("name".to_string(), Value::String(name));
544 obj.insert("count".to_string(), Value::from(count as u64));
545 Value::Object(obj)
546 })
547 .collect()
548 }
549
550 pub fn collect_years(posts: &[Post]) -> Vec<Value> {
552 let mut year_to_count: BTreeMap<String, usize> = BTreeMap::new();
553
554 for post in posts {
555 if let Some(year) = post.data.get("year").and_then(|v| v.as_str()) {
556 *year_to_count.entry(year.to_string()).or_insert(0) += 1;
557 }
558 }
559
560 year_to_count
561 .into_iter()
562 .map(|(name, count)| {
563 let mut obj = serde_json::Map::new();
564 obj.insert("name".to_string(), Value::String(name));
565 obj.insert("count".to_string(), Value::from(count as u64));
566 Value::Object(obj)
567 })
568 .collect()
569 }
570
571 pub fn generate_hierarchical_categories(posts: &[Post]) -> Value {
573 use std::collections::HashMap;
574
575 #[derive(Debug)]
577 struct CategoryNode {
578 name: String,
579 count: usize,
580 children: HashMap<String, CategoryNode>,
581 full_path: Vec<String>,
582 }
583
584 impl CategoryNode {
585 fn new(name: String, full_path: Vec<String>) -> Self {
586 Self {
587 name,
588 count: 0,
589 children: HashMap::new(),
590 full_path,
591 }
592 }
593
594 fn to_json(&self) -> Value {
595 let mut obj = serde_json::Map::new();
596 obj.insert("name".to_string(), Value::String(self.name.clone()));
597 obj.insert("count".to_string(), Value::from(self.count as u64));
598 obj.insert(
599 "path".to_string(),
600 Value::Array(
601 self.full_path
602 .iter()
603 .map(|s| Value::String(s.clone()))
604 .collect(),
605 ),
606 );
607
608 if !self.children.is_empty() {
609 let mut children: Vec<Value> = self
610 .children
611 .values()
612 .map(|child| child.to_json())
613 .collect();
614 children.sort_by(|a, b| {
615 let name_a = a.get("name").and_then(|v| v.as_str()).unwrap_or("");
616 let name_b = b.get("name").and_then(|v| v.as_str()).unwrap_or("");
617 name_a.cmp(name_b)
618 });
619 obj.insert("children".to_string(), Value::Array(children));
620 }
621
622 Value::Object(obj)
623 }
624 }
625
626 let mut root = CategoryNode::new("root".to_string(), vec![]);
627
628 for post in posts {
630 let categories = post.categories();
631 if !categories.is_empty() {
632 let mut current = &mut root;
634 let mut current_path = vec![];
635
636 for category in &categories {
637 current_path.push(category.clone());
638 current = current.children.entry(category.clone()).or_insert_with(|| {
639 CategoryNode::new(category.clone(), current_path.clone())
640 });
641 current.count += 1;
642 }
643 }
644 }
645
646 if root.children.is_empty() {
648 Value::Array(vec![])
649 } else {
650 let mut categories: Vec<Value> = root
651 .children
652 .values()
653 .map(|child| child.to_json())
654 .collect();
655 categories.sort_by(|a, b| {
656 let name_a = a.get("name").and_then(|v| v.as_str()).unwrap_or("");
657 let name_b = b.get("name").and_then(|v| v.as_str()).unwrap_or("");
658 name_a.cmp(name_b)
659 });
660 Value::Array(categories)
661 }
662 }
663
664 pub fn parse_file_content<P: AsRef<Path>>(
669 content: &str,
670 path: P,
671 md_dir: P,
672 ) -> Result<Option<Value>> {
673 Self::parse_post(content, path, md_dir)
674 }
675
676 pub fn get_url_from_path<P: AsRef<Path>>(source_path: P, content_dir: P, content: Option<&str>) -> String {
678 let path = source_path.as_ref();
679 let md_dir = content_dir.as_ref();
680
681 let content_to_use = match content {
683 Some(c) => Some(c.to_string()),
684 None => std::fs::read_to_string(path).ok(),
685 };
686
687 if let Some(c) = content_to_use {
688 if let Ok(Some(post_val)) = Self::parse_post(&c, path, md_dir) {
689 if let Some(url) = post_val.get("url").and_then(|v| v.as_str()) {
690 return url.to_string();
691 }
692 }
693 }
694
695 let mut slug = path
697 .file_stem()
698 .and_then(|s| s.to_str())
699 .unwrap_or("")
700 .to_string();
701
702 if slug == "README" {
703 slug = "index".to_string();
704 }
705
706 let categories = Self::extract_categories_from_path(path, md_dir);
707 if categories.is_empty() {
708 format!("/{}.html", slug)
709 } else {
710 format!("/{}/{}.html", categories.join("/"), slug)
711 }
712 }
713}