1use crate::error::{Error, Result};
6use pulldown_cmark::{html, Options, Parser};
7use serde_json::Value;
8use std::collections::BTreeMap;
9use std::path::Path;
10use walkdir::WalkDir;
11
12#[derive(Debug, Clone)]
14pub struct Post {
15 pub data: Value,
17}
18
19impl Post {
20 pub fn from_value(data: Value) -> Self {
22 Post { data }
23 }
24
25 pub fn title(&self) -> Option<&str> {
27 self.data.get("title").and_then(|v| v.as_str())
28 }
29
30 pub fn slug(&self) -> Option<&str> {
32 self.data.get("slug").and_then(|v| v.as_str())
33 }
34
35 pub fn content(&self) -> Option<&str> {
37 self.data.get("content").and_then(|v| v.as_str())
38 }
39
40 pub fn categories(&self) -> Vec<String> {
42 self.data
43 .get("categories")
44 .and_then(|v| v.as_array())
45 .map(|arr| {
46 arr.iter()
47 .filter_map(|v| v.as_str())
48 .map(|s| s.to_string())
49 .collect()
50 })
51 .unwrap_or_default()
52 }
53
54 pub fn tags(&self) -> Vec<String> {
56 self.data
57 .get("tags")
58 .and_then(|v| v.as_array())
59 .map(|arr| {
60 arr.iter()
61 .filter_map(|v| v.as_str())
62 .map(|s| s.to_string())
63 .collect()
64 })
65 .unwrap_or_default()
66 }
67
68 pub fn date(&self) -> Option<&str> {
70 self.data.get("date_ymd").and_then(|v| v.as_str())
71 }
72
73 pub fn source_path(&self) -> Option<&str> {
75 self.data.get("source_path").and_then(|v| v.as_str())
76 }
77
78 pub fn modified_epoch(&self) -> Option<i64> {
80 self.data.get("modified_epoch").and_then(|v| v.as_i64())
81 }
82}
83
84pub struct PostParser;
86
87impl PostParser {
88 fn extract_title_from_markdown(markdown: &str) -> Option<String> {
90 let mut in_code_fence = false;
92 for line in markdown.lines() {
93 let trimmed = line.trim();
94 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
95 in_code_fence = !in_code_fence;
96 continue;
97 }
98 if in_code_fence { continue; }
99 if trimmed.starts_with("# ") {
100 let title = trimmed[2..].trim();
101 if !title.is_empty() { return Some(title.to_string()); }
102 }
103 }
104 in_code_fence = false;
106 for line in markdown.lines() {
107 let trimmed = line.trim();
108 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
109 in_code_fence = !in_code_fence;
110 continue;
111 }
112 if in_code_fence { continue; }
113 if trimmed.starts_with('#') {
114 let hashes = trimmed.chars().take_while(|c| *c == '#').count();
115 if hashes >= 1 {
116 let title = trimmed[hashes..].trim();
117 if !title.is_empty() { return Some(title.to_string()); }
118 }
119 }
120 }
121 None
122 }
123
124 pub fn list_posts<P: AsRef<Path>>(md_dir: P) -> Result<Vec<Post>> {
126 let mut posts = Vec::new();
127 let content_dir = md_dir.as_ref();
128
129 if !content_dir.exists() {
131 println!("警告: Markdown目录 '{}' 不存在,创建空目录...", content_dir.display());
132 std::fs::create_dir_all(content_dir)?;
133 }
134
135 for entry in WalkDir::new(content_dir).into_iter().filter_map(|e| e.ok()) {
136 if entry.path().extension().map_or(false, |ext| ext == "md") {
137 let hidden = entry.file_name().to_string_lossy().starts_with('.');
139 if hidden { continue; }
140 let content = std::fs::read_to_string(entry.path())
141 .map_err(|e| Error::Other(format!("无法读取文件 {:?}: {}", entry.path(), e)))?;
142
143 if let Some(post_data) = Self::parse_post(&content, entry.path(), content_dir)? {
144 posts.push(Post::from_value(post_data));
145 }
146 }
147 }
148
149 posts.sort_by(|a, b| {
151 let date_a = a.date().unwrap_or("");
152 let date_b = b.date().unwrap_or("");
153 date_b.cmp(date_a)
154 });
155
156 Ok(posts)
157 }
158
159 fn parse_post<P: AsRef<Path>>(content: &str, path: P, md_dir: P) -> Result<Option<Value>> {
161 let path = path.as_ref();
162 let md_dir = md_dir.as_ref();
163
164 let (fm_marker, end_marker) = if content.starts_with("+++") {
166 ("+++", "+++\n")
167 } else if content.starts_with("---") {
168 ("---", "---\n")
169 } else {
170 return Ok(None);
171 };
172
173 let start = fm_marker.len();
175 let end = if let Some(pos) = content[start..].find(end_marker) {
176 start + pos
177 } else if let Some(pos) = content[start..].find(fm_marker) {
178 start + pos
179 } else {
180 return Ok(None);
181 };
182
183 let front_matter = &content[start..end];
184 let body = &content[end + fm_marker.len()..];
185
186 let metadata: serde_yaml::Value = serde_yaml::from_str(front_matter)
188 .map_err(|e| Error::Markdown(format!("解析front matter失败 {:?}: {}", path, e)))?;
189
190 let metadata_json = serde_json::to_value(&metadata)?;
192
193 let html = Self::markdown_to_html(body);
195
196 let mut slug = path.file_stem()
198 .and_then(|s| s.to_str())
199 .unwrap_or("")
200 .to_string();
201
202 if let Value::Object(ref obj) = metadata_json {
203 if let Some(Value::String(s)) = obj.get("slug") {
204 if !s.is_empty() {
205 slug = s.clone();
206 }
207 }
208 }
209
210 let categories = Self::extract_categories_from_path(path, md_dir);
212 let categories_json: Vec<Value> = categories
213 .into_iter()
214 .map(|cat| Value::String(cat))
215 .collect();
216
217 let mut post = match metadata_json {
219 Value::Object(mut obj) => {
220 obj.insert("content".to_string(), Value::String(html));
221 obj.insert("slug".to_string(), Value::String(slug));
222 obj.insert("categories".to_string(), Value::Array(categories_json));
223 Value::Object(obj)
224 },
225 _ => {
226 let mut obj = serde_json::Map::new();
227 obj.insert("content".to_string(), Value::String(html));
228 obj.insert("slug".to_string(), Value::String(slug));
229 obj.insert("categories".to_string(), Value::Array(categories_json));
230 Value::Object(obj)
231 }
232 };
233
234 if let Some(obj) = post.as_object_mut() {
236 obj.insert(
238 "source_path".to_string(),
239 Value::String(path.to_string_lossy().to_string())
240 );
241 let modified_epoch = std::fs::metadata(path)
242 .and_then(|m| m.modified())
243 .ok()
244 .and_then(|st| st.duration_since(std::time::UNIX_EPOCH).ok())
245 .map(|d| d.as_secs() as i64)
246 .unwrap_or(0);
247 obj.insert("modified_epoch".to_string(), Value::Number(modified_epoch.into()));
248
249 if !obj.contains_key("title") {
251 let content_md_title = Self::extract_title_from_markdown(body).or_else(|| {
252 obj.get("slug").and_then(|v| v.as_str()).map(|s| s.to_string())
254 });
255 if let Some(title) = content_md_title { obj.insert("title".to_string(), Value::String(title)); }
256 }
257
258 if let Some(create_time) = obj.get("createTime").and_then(|v| v.as_str()) {
260 let create_time_str = create_time.to_string();
261 let date_only = if create_time_str.len() >= 10 { &create_time_str[0..10] } else { &create_time_str };
262 let mut normalized = date_only.replace('/', "-").replace('.', "-");
263 if normalized.len() == 10 {
265 let bytes = normalized.as_bytes();
266 let is_digit = |c: u8| c.is_ascii_digit();
267 if !(is_digit(bytes[0]) && is_digit(bytes[1]) && is_digit(bytes[2]) && is_digit(bytes[3]) &&
268 bytes[4] == b'-' && is_digit(bytes[5]) && is_digit(bytes[6]) &&
269 bytes[7] == b'-' && is_digit(bytes[8]) && is_digit(bytes[9])) {
270 let digits: Vec<char> = date_only.chars().filter(|c| c.is_ascii_digit()).collect();
272 if digits.len() >= 8 {
273 let year: String = digits[0..4].iter().collect();
274 let month: String = digits[4..6].iter().collect();
275 let day: String = digits[6..8].iter().collect();
276 normalized = format!("{}-{}-{}", year, month, day);
277 }
278 }
279 }
280 obj.insert("date_ymd".to_string(), Value::String(normalized.clone()));
281 if normalized.len() >= 7 {
282 let year = &normalized[0..4];
283 let ym = &normalized[0..7];
284 obj.insert("year".to_string(), Value::String(year.to_string()));
285 obj.insert("year_month".to_string(), Value::String(ym.to_string()));
286 }
287 }
288 }
289
290 Ok(Some(post))
291 }
292
293 fn extract_categories_from_path<P: AsRef<Path>>(path: P, md_dir: P) -> Vec<String> {
295 let path = path.as_ref();
296 let md_dir = md_dir.as_ref();
297 let mut categories = Vec::new();
298
299 if let Ok(relative_path) = path.strip_prefix(md_dir) {
301 if let Some(parent) = relative_path.parent() {
303 for component in parent.components() {
305 if let std::path::Component::Normal(os_str) = component {
306 if let Some(category) = os_str.to_str() {
307 categories.push(category.to_string());
308 }
309 }
310 }
311 }
312 }
313
314 categories
315 }
316
317 fn markdown_to_html(markdown: &str) -> String {
319 let mut options = Options::empty();
320 options.insert(Options::ENABLE_TABLES);
321 options.insert(Options::ENABLE_FOOTNOTES);
322 options.insert(Options::ENABLE_STRIKETHROUGH);
323 options.insert(Options::ENABLE_TASKLISTS);
324
325 let parser = Parser::new_ext(markdown, options);
326 let mut html = String::new();
327 html::push_html(&mut html, parser);
328
329 html
330 }
331
332 pub fn collect_tags(posts: &[Post]) -> Vec<Value> {
334 let mut tag_to_count: BTreeMap<String, usize> = BTreeMap::new();
335
336 for post in posts {
337 for tag in post.tags() {
338 *tag_to_count.entry(tag).or_insert(0) += 1;
339 }
340 }
341
342 tag_to_count
343 .into_iter()
344 .map(|(name, count)| {
345 let mut obj = serde_json::Map::new();
346 obj.insert("name".to_string(), Value::String(name));
347 obj.insert("count".to_string(), Value::from(count as u64));
348 Value::Object(obj)
349 })
350 .collect()
351 }
352
353 pub fn collect_years(posts: &[Post]) -> Vec<Value> {
355 let mut year_to_count: BTreeMap<String, usize> = BTreeMap::new();
356
357 for post in posts {
358 if let Some(year) = post.data.get("year").and_then(|v| v.as_str()) {
359 *year_to_count.entry(year.to_string()).or_insert(0) += 1;
360 }
361 }
362
363 year_to_count
364 .into_iter()
365 .map(|(name, count)| {
366 let mut obj = serde_json::Map::new();
367 obj.insert("name".to_string(), Value::String(name));
368 obj.insert("count".to_string(), Value::from(count as u64));
369 Value::Object(obj)
370 })
371 .collect()
372 }
373
374 pub fn generate_hierarchical_categories(posts: &[Post]) -> Value {
376 use std::collections::HashMap;
377
378 #[derive(Debug)]
380 struct CategoryNode {
381 name: String,
382 count: usize,
383 children: HashMap<String, CategoryNode>,
384 full_path: Vec<String>,
385 }
386
387 impl CategoryNode {
388 fn new(name: String, full_path: Vec<String>) -> Self {
389 Self {
390 name,
391 count: 0,
392 children: HashMap::new(),
393 full_path,
394 }
395 }
396
397 fn to_json(&self) -> Value {
398 let mut obj = serde_json::Map::new();
399 obj.insert("name".to_string(), Value::String(self.name.clone()));
400 obj.insert("count".to_string(), Value::from(self.count as u64));
401 obj.insert("path".to_string(), Value::Array(
402 self.full_path.iter().map(|s| Value::String(s.clone())).collect()
403 ));
404
405 if !self.children.is_empty() {
406 let mut children: Vec<Value> = self.children
407 .values()
408 .map(|child| child.to_json())
409 .collect();
410 children.sort_by(|a, b| {
411 let name_a = a.get("name").and_then(|v| v.as_str()).unwrap_or("");
412 let name_b = b.get("name").and_then(|v| v.as_str()).unwrap_or("");
413 name_a.cmp(name_b)
414 });
415 obj.insert("children".to_string(), Value::Array(children));
416 }
417
418 Value::Object(obj)
419 }
420 }
421
422 let mut root = CategoryNode::new("root".to_string(), vec![]);
423
424 for post in posts {
426 let categories = post.categories();
427 if !categories.is_empty() {
428 let mut current = &mut root;
430 let mut current_path = vec![];
431
432 for category in &categories {
433 current_path.push(category.clone());
434 current = current.children
435 .entry(category.clone())
436 .or_insert_with(|| CategoryNode::new(category.clone(), current_path.clone()));
437 current.count += 1;
438 }
439 }
440 }
441
442 if root.children.is_empty() {
444 Value::Array(vec![])
445 } else {
446 let mut categories: Vec<Value> = root.children
447 .values()
448 .map(|child| child.to_json())
449 .collect();
450 categories.sort_by(|a, b| {
451 let name_a = a.get("name").and_then(|v| v.as_str()).unwrap_or("");
452 let name_b = b.get("name").and_then(|v| v.as_str()).unwrap_or("");
453 name_a.cmp(name_b)
454 });
455 Value::Array(categories)
456 }
457 }
458
459 pub fn parse_file_content<P: AsRef<Path>>(content: &str, path: P, md_dir: P) -> Result<Option<Value>> {
464 Self::parse_post(content, path, md_dir)
465 }
466}