rustpress/
post.rs

1//! 文章处理模块
2//! 
3//! 负责解析 Markdown 文件，提取元数据和内容
4
5use crate::error::{Error, Result};
6use pulldown_cmark::{html, Options, Parser};
7use serde_json::Value;
8use std::collections::BTreeMap;
9use std::path::Path;
10use walkdir::WalkDir;
11
12/// 文章结构
13#[derive(Debug, Clone)]
14pub struct Post {
15    /// 文章元数据和内容
16    pub data: Value,
17}
18
19impl Post {
20    /// 从 JSON 值创建文章
21    pub fn from_value(data: Value) -> Self {
22        Post { data }
23    }
24    
25    /// 获取文章标题
26    pub fn title(&self) -> Option<&str> {
27        self.data.get("title").and_then(|v| v.as_str())
28    }
29    
30    /// 获取文章 slug
31    pub fn slug(&self) -> Option<&str> {
32        self.data.get("slug").and_then(|v| v.as_str())
33    }
34    
35    /// 获取文章内容
36    pub fn content(&self) -> Option<&str> {
37        self.data.get("content").and_then(|v| v.as_str())
38    }
39    
40    /// 获取文章分类
41    pub fn categories(&self) -> Vec<String> {
42        self.data
43            .get("categories")
44            .and_then(|v| v.as_array())
45            .map(|arr| {
46                arr.iter()
47                    .filter_map(|v| v.as_str())
48                    .map(|s| s.to_string())
49                    .collect()
50            })
51            .unwrap_or_default()
52    }
53    
54    /// 获取文章标签
55    pub fn tags(&self) -> Vec<String> {
56        self.data
57            .get("tags")
58            .and_then(|v| v.as_array())
59            .map(|arr| {
60                arr.iter()
61                    .filter_map(|v| v.as_str())
62                    .map(|s| s.to_string())
63                    .collect()
64            })
65            .unwrap_or_default()
66    }
67    
68    /// 获取文章日期
69    pub fn date(&self) -> Option<&str> {
70        self.data.get("date_ymd").and_then(|v| v.as_str())
71    }
72
73    /// 获取源文件路径
74    pub fn source_path(&self) -> Option<&str> {
75        self.data.get("source_path").and_then(|v| v.as_str())
76    }
77
78    /// 获取源文件的最后修改时间（UNIX秒）
79    pub fn modified_epoch(&self) -> Option<i64> {
80        self.data.get("modified_epoch").and_then(|v| v.as_i64())
81    }
82}
83
84/// 文章解析器
85pub struct PostParser;
86
87impl PostParser {
88    /// 从 Markdown 文本中提取标题：优先首个 H1（`# 标题`），否则首个任意级别标题
89    fn extract_title_from_markdown(markdown: &str) -> Option<String> {
90        // 先扫描首个 H1
91        let mut in_code_fence = false;
92        for line in markdown.lines() {
93            let trimmed = line.trim();
94            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
95                in_code_fence = !in_code_fence;
96                continue;
97            }
98            if in_code_fence { continue; }
99            if trimmed.starts_with("# ") {
100                let title = trimmed[2..].trim();
101                if !title.is_empty() { return Some(title.to_string()); }
102            }
103        }
104        // 若没有 H1，则退而求其次，找任意级别标题
105        in_code_fence = false;
106        for line in markdown.lines() {
107            let trimmed = line.trim();
108            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
109                in_code_fence = !in_code_fence;
110                continue;
111            }
112            if in_code_fence { continue; }
113            if trimmed.starts_with('#') {
114                let hashes = trimmed.chars().take_while(|c| *c == '#').count();
115                if hashes >= 1 {
116                    let title = trimmed[hashes..].trim();
117                    if !title.is_empty() { return Some(title.to_string()); }
118                }
119            }
120        }
121        None
122    }
123
124    /// 列出指定目录下的所有文章
125    pub fn list_posts<P: AsRef<Path>>(md_dir: P) -> Result<Vec<Post>> {
126        let mut posts = Vec::new();
127        let content_dir = md_dir.as_ref();
128        
129        // 检查目录是否存在
130        if !content_dir.exists() {
131            println!("警告: Markdown目录 '{}' 不存在，创建空目录...", content_dir.display());
132            std::fs::create_dir_all(content_dir)?;
133        }
134        
135        for entry in WalkDir::new(content_dir).into_iter().filter_map(|e| e.ok()) {
136            if entry.path().extension().map_or(false, |ext| ext == "md") {
137                // 跳过隐藏的 Markdown 文件（文件名以点开头）
138                let hidden = entry.file_name().to_string_lossy().starts_with('.');
139                if hidden { continue; }
140                let content = std::fs::read_to_string(entry.path())
141                    .map_err(|e| Error::Other(format!("无法读取文件 {:?}: {}", entry.path(), e)))?;
142                
143                if let Some(post_data) = Self::parse_post(&content, entry.path(), content_dir)? {
144                    posts.push(Post::from_value(post_data));
145                }
146            }
147        }
148        
149        // 按日期排序（最新的在前）
150        posts.sort_by(|a, b| {
151            let date_a = a.date().unwrap_or("");
152            let date_b = b.date().unwrap_or("");
153            date_b.cmp(date_a)
154        });
155        
156        Ok(posts)
157    }
158    
159    /// 解析单篇文章
160    fn parse_post<P: AsRef<Path>>(content: &str, path: P, md_dir: P) -> Result<Option<Value>> {
161        let path = path.as_ref();
162        let md_dir = md_dir.as_ref();
163        
164        // 检查 front matter 类型
165        let (fm_marker, end_marker) = if content.starts_with("+++") {
166            ("+++", "+++\n")
167        } else if content.starts_with("---") {
168            ("---", "---\n")
169        } else {
170            return Ok(None);
171        };
172
173        // 查找 front matter 结束位置
174        let start = fm_marker.len();
175        let end = if let Some(pos) = content[start..].find(end_marker) {
176            start + pos
177        } else if let Some(pos) = content[start..].find(fm_marker) {
178            start + pos
179        } else {
180            return Ok(None);
181        };
182
183        let front_matter = &content[start..end];
184        let body = &content[end + fm_marker.len()..];
185
186        // 解析front matter（YAML）
187        let metadata: serde_yaml::Value = serde_yaml::from_str(front_matter)
188            .map_err(|e| Error::Markdown(format!("解析front matter失败 {:?}: {}", path, e)))?;
189
190        // 转换元数据为JSON
191        let metadata_json = serde_json::to_value(&metadata)?;
192
193        // 解析Markdown为HTML
194        let html = Self::markdown_to_html(body);
195
196        // 优先使用 front matter 中的 slug 字段，否则用文件名
197        let mut slug = path.file_stem()
198            .and_then(|s| s.to_str())
199            .unwrap_or("")
200            .to_string();
201        
202        if let Value::Object(ref obj) = metadata_json {
203            if let Some(Value::String(s)) = obj.get("slug") {
204                if !s.is_empty() {
205                    slug = s.clone();
206                }
207            }
208        }
209
210        // 从文件路径提取分类信息
211        let categories = Self::extract_categories_from_path(path, md_dir);
212        let categories_json: Vec<Value> = categories
213            .into_iter()
214            .map(|cat| Value::String(cat))
215            .collect();
216
217        // 创建完整的文章对象
218        let mut post = match metadata_json {
219            Value::Object(mut obj) => {
220                obj.insert("content".to_string(), Value::String(html));
221                obj.insert("slug".to_string(), Value::String(slug));
222                obj.insert("categories".to_string(), Value::Array(categories_json));
223                Value::Object(obj)
224            },
225            _ => {
226                let mut obj = serde_json::Map::new();
227                obj.insert("content".to_string(), Value::String(html));
228                obj.insert("slug".to_string(), Value::String(slug));
229                obj.insert("categories".to_string(), Value::Array(categories_json));
230                Value::Object(obj)
231            }
232        };
233
234        // 处理日期相关字段
235        if let Some(obj) = post.as_object_mut() {
236            // 记录源文件路径与修改时间戳（用于增量编译）
237            obj.insert(
238                "source_path".to_string(),
239                Value::String(path.to_string_lossy().to_string())
240            );
241            let modified_epoch = std::fs::metadata(path)
242                .and_then(|m| m.modified())
243                .ok()
244                .and_then(|st| st.duration_since(std::time::UNIX_EPOCH).ok())
245                .map(|d| d.as_secs() as i64)
246                .unwrap_or(0);
247            obj.insert("modified_epoch".to_string(), Value::Number(modified_epoch.into()));
248
249            // 如果没有 title 字段，尝试从 Markdown 内容提取标题
250            if !obj.contains_key("title") {
251                let content_md_title = Self::extract_title_from_markdown(body).or_else(|| {
252                    // 兜底：使用 slug
253                    obj.get("slug").and_then(|v| v.as_str()).map(|s| s.to_string())
254                });
255                if let Some(title) = content_md_title { obj.insert("title".to_string(), Value::String(title)); }
256            }
257            
258            // 处理创建时间字段（兼容多分隔符并归一化为 YYYY-MM-DD）
259            if let Some(create_time) = obj.get("createTime").and_then(|v| v.as_str()) {
260                let create_time_str = create_time.to_string();
261                let date_only = if create_time_str.len() >= 10 { &create_time_str[0..10] } else { &create_time_str };
262                let mut normalized = date_only.replace('/', "-").replace('.', "-");
263                // 确保格式长度为10且分隔符在位置4和7
264                if normalized.len() == 10 {
265                    let bytes = normalized.as_bytes();
266                    let is_digit = |c: u8| c.is_ascii_digit();
267                    if !(is_digit(bytes[0]) && is_digit(bytes[1]) && is_digit(bytes[2]) && is_digit(bytes[3]) &&
268                         bytes[4] == b'-' && is_digit(bytes[5]) && is_digit(bytes[6]) &&
269                         bytes[7] == b'-' && is_digit(bytes[8]) && is_digit(bytes[9])) {
270                        // 尝试强制重组为 YYYY-MM-DD
271                        let digits: Vec<char> = date_only.chars().filter(|c| c.is_ascii_digit()).collect();
272                        if digits.len() >= 8 {
273                            let year: String = digits[0..4].iter().collect();
274                            let month: String = digits[4..6].iter().collect();
275                            let day: String = digits[6..8].iter().collect();
276                            normalized = format!("{}-{}-{}", year, month, day);
277                        }
278                    }
279                }
280                obj.insert("date_ymd".to_string(), Value::String(normalized.clone()));
281                if normalized.len() >= 7 {
282                    let year = &normalized[0..4];
283                    let ym = &normalized[0..7];
284                    obj.insert("year".to_string(), Value::String(year.to_string()));
285                    obj.insert("year_month".to_string(), Value::String(ym.to_string()));
286                }
287            }
288        }
289
290        Ok(Some(post))
291    }
292    
293    /// 从文件路径提取分类信息
294    fn extract_categories_from_path<P: AsRef<Path>>(path: P, md_dir: P) -> Vec<String> {
295        let path = path.as_ref();
296        let md_dir = md_dir.as_ref();
297        let mut categories = Vec::new();
298        
299        // 获取相对于md_dir的路径
300        if let Ok(relative_path) = path.strip_prefix(md_dir) {
301            // 获取父目录路径
302            if let Some(parent) = relative_path.parent() {
303                // 将路径组件转换为分类
304                for component in parent.components() {
305                    if let std::path::Component::Normal(os_str) = component {
306                        if let Some(category) = os_str.to_str() {
307                            categories.push(category.to_string());
308                        }
309                    }
310                }
311            }
312        }
313        
314        categories
315    }
316    
317    /// 将Markdown转换为HTML
318    fn markdown_to_html(markdown: &str) -> String {
319        let mut options = Options::empty();
320        options.insert(Options::ENABLE_TABLES);
321        options.insert(Options::ENABLE_FOOTNOTES);
322        options.insert(Options::ENABLE_STRIKETHROUGH);
323        options.insert(Options::ENABLE_TASKLISTS);
324        
325        let parser = Parser::new_ext(markdown, options);
326        let mut html = String::new();
327        html::push_html(&mut html, parser);
328        
329        html
330    }
331    
332    /// 统计所有标签及计数
333    pub fn collect_tags(posts: &[Post]) -> Vec<Value> {
334        let mut tag_to_count: BTreeMap<String, usize> = BTreeMap::new();
335        
336        for post in posts {
337            for tag in post.tags() {
338                *tag_to_count.entry(tag).or_insert(0) += 1;
339            }
340        }
341        
342        tag_to_count
343            .into_iter()
344            .map(|(name, count)| {
345                let mut obj = serde_json::Map::new();
346                obj.insert("name".to_string(), Value::String(name));
347                obj.insert("count".to_string(), Value::from(count as u64));
348                Value::Object(obj)
349            })
350            .collect()
351    }
352    
353    /// 统计所有年份及计数
354    pub fn collect_years(posts: &[Post]) -> Vec<Value> {
355        let mut year_to_count: BTreeMap<String, usize> = BTreeMap::new();
356        
357        for post in posts {
358            if let Some(year) = post.data.get("year").and_then(|v| v.as_str()) {
359                *year_to_count.entry(year.to_string()).or_insert(0) += 1;
360            }
361        }
362        
363        year_to_count
364            .into_iter()
365            .map(|(name, count)| {
366                let mut obj = serde_json::Map::new();
367                obj.insert("name".to_string(), Value::String(name));
368                obj.insert("count".to_string(), Value::from(count as u64));
369                Value::Object(obj)
370            })
371            .collect()
372    }
373    
374    /// 生成层次化的分类结构
375    pub fn generate_hierarchical_categories(posts: &[Post]) -> Value {
376        use std::collections::HashMap;
377        
378        // 构建分类树结构
379        #[derive(Debug)]
380        struct CategoryNode {
381            name: String,
382            count: usize,
383            children: HashMap<String, CategoryNode>,
384            full_path: Vec<String>,
385        }
386        
387        impl CategoryNode {
388            fn new(name: String, full_path: Vec<String>) -> Self {
389                Self {
390                    name,
391                    count: 0,
392                    children: HashMap::new(),
393                    full_path,
394                }
395            }
396            
397            fn to_json(&self) -> Value {
398                let mut obj = serde_json::Map::new();
399                obj.insert("name".to_string(), Value::String(self.name.clone()));
400                obj.insert("count".to_string(), Value::from(self.count as u64));
401                obj.insert("path".to_string(), Value::Array(
402                    self.full_path.iter().map(|s| Value::String(s.clone())).collect()
403                ));
404                
405                if !self.children.is_empty() {
406                    let mut children: Vec<Value> = self.children
407                        .values()
408                        .map(|child| child.to_json())
409                        .collect();
410                    children.sort_by(|a, b| {
411                        let name_a = a.get("name").and_then(|v| v.as_str()).unwrap_or("");
412                        let name_b = b.get("name").and_then(|v| v.as_str()).unwrap_or("");
413                        name_a.cmp(name_b)
414                    });
415                    obj.insert("children".to_string(), Value::Array(children));
416                }
417                
418                Value::Object(obj)
419            }
420        }
421        
422        let mut root = CategoryNode::new("root".to_string(), vec![]);
423        
424        // 遍历所有文章，构建分类树
425        for post in posts {
426            let categories = post.categories();
427            if !categories.is_empty() {
428                // 在分类路径上的每个节点都增加计数
429                let mut current = &mut root;
430                let mut current_path = vec![];
431                
432                for category in &categories {
433                    current_path.push(category.clone());
434                    current = current.children
435                        .entry(category.clone())
436                        .or_insert_with(|| CategoryNode::new(category.clone(), current_path.clone()));
437                    current.count += 1;
438                }
439            }
440        }
441        
442        // 转换为JSON格式
443        if root.children.is_empty() {
444            Value::Array(vec![])
445        } else {
446            let mut categories: Vec<Value> = root.children
447                .values()
448                .map(|child| child.to_json())
449                .collect();
450            categories.sort_by(|a, b| {
451                let name_a = a.get("name").and_then(|v| v.as_str()).unwrap_or("");
452                let name_b = b.get("name").and_then(|v| v.as_str()).unwrap_or("");
453                name_a.cmp(name_b)
454            });
455            Value::Array(categories)
456        }
457    }
458
459    /// 对外公开的单文件Markdown解析包装方法
460    ///
461    /// 用途：当需要解析一个具体的Markdown文件内容（例如 friends.md）时，
462    /// 在模板渲染阶段可调用此方法以获得其 front matter 和 HTML 内容。
463    pub fn parse_file_content<P: AsRef<Path>>(content: &str, path: P, md_dir: P) -> Result<Option<Value>> {
464        Self::parse_post(content, path, md_dir)
465    }
466}
rustpress/post.rs

rustpress/
post.rs