markdown_readtime/
lib.rs

1use pulldown_cmark::{Event, Parser, Tag, TagEnd};
2
3/// 阅读时间计算结果
4#[derive(Debug, Clone, PartialEq)]
5#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
6pub struct ReadTime {
7    /// 总阅读时间(秒)
8    pub total_seconds: u64,
9    /// 格式化后的阅读时间字符串
10    pub formatted: String,
11    /// 单词数量
12    pub word_count: usize,
13    /// 图片数量
14    pub image_count: usize,
15    /// 代码块数量
16    pub code_block_count: usize,
17}
18
19/// 阅读速度配置
20#[derive(Debug, Clone, Copy)]
21pub struct ReadSpeed {
22    /// 每分钟阅读单词数(默认:200)
23    pub words_per_minute: f64,
24    /// 每张图片额外时间(秒,默认:12)
25    pub seconds_per_image: f64,
26    /// 每个代码块额外时间(秒,默认:20)
27    pub seconds_per_code_block: f64,
28    /// 是否考虑emoji(默认:true)
29    pub count_emoji: bool,
30    /// 是否中文
31    pub chinese: bool,
32}
33
34impl Default for ReadSpeed {
35    fn default() -> Self {
36        Self {
37            words_per_minute: 200.0,
38            seconds_per_image: 12.0,
39            seconds_per_code_block: 20.0,
40            count_emoji: true,
41            chinese: true,
42        }
43    }
44}
45
46impl ReadSpeed {
47    pub fn new(
48        wpm: f64,
49        seconds_per_image: f64,
50        seconds_per_code_block: f64,
51        count_emoji: bool,
52        chinese: bool,
53    ) -> Self {
54        Self {
55            words_per_minute: wpm,
56            seconds_per_image,
57            seconds_per_code_block,
58            count_emoji,
59            chinese,
60        }
61    }
62
63    pub fn wpm(mut self, wpm: f64) -> Self {
64        self.words_per_minute = wpm;
65        self
66    }
67
68    pub fn image_time(mut self, seconds: f64) -> Self {
69        self.seconds_per_image = seconds;
70        self
71    }
72
73    pub fn code_block_time(mut self, seconds: f64) -> Self {
74        self.seconds_per_code_block = seconds;
75        self
76    }
77
78    pub fn emoji(mut self, count: bool) -> Self {
79        self.count_emoji = count;
80        self
81    }
82
83    pub fn chinese(mut self, is_chinese: bool) -> Self {
84        self.chinese = is_chinese;
85        self
86    }
87}
88
89/// 估算Markdown的阅读时间
90pub fn estimate(markdown: &str) -> ReadTime {
91    estimate_with_speed(markdown, &ReadSpeed::default())
92}
93
94/// 使用自定义速度配置估算阅读时间
95pub fn estimate_with_speed(markdown: &str, speed: &ReadSpeed) -> ReadTime {
96    let parser = Parser::new(markdown);
97
98    let mut word_count = 0;
99    let mut image_count = 0;
100    let mut code_block_count = 0;
101    let mut in_code_block = false;
102    let mut in_image_alt = false;
103
104    for event in parser {
105        match event {
106            Event::Start(tag) => match tag {
107                Tag::Image { .. } => {
108                    image_count += 1;
109                    in_image_alt = true;
110                }
111                Tag::CodeBlock(_) => {
112                    code_block_count += 1;
113                    in_code_block = true;
114                }
115                _ => {}
116            },
117            Event::End(tag) => match tag {
118                TagEnd::Image { .. } => {
119                    in_image_alt = false;
120                }
121                TagEnd::CodeBlock => {
122                    in_code_block = false;
123                }
124                _ => {}
125            },
126            Event::Text(text) => {
127                if !in_image_alt && !in_code_block {
128                    if speed.chinese {
129                        word_count += count_words(&text.to_string(), speed.count_emoji);
130                    } else {
131                        word_count += count_english_words(&text.to_string(), speed.count_emoji);
132                    }
133                }
134            }
135            Event::Code(code) => {
136                if !in_code_block {
137                    if speed.chinese {
138                        word_count += count_words(&code.to_string(), speed.count_emoji);
139                    } else {
140                        word_count += count_english_words(&code.to_string(), speed.count_emoji);
141                    }
142                }
143            }
144            _ => {}
145        }
146    }
147
148    // 计算基础阅读时间(基于单词数)
149    let base_seconds = (word_count as f64 / speed.words_per_minute) * 60.0;
150
151    // 添加图片和代码块的额外时间
152    let image_seconds = image_count as f64 * speed.seconds_per_image;
153    let code_seconds = code_block_count as f64 * speed.seconds_per_code_block;
154
155    let total_seconds = (base_seconds + image_seconds + code_seconds).ceil() as u64;
156
157    ReadTime {
158        total_seconds,
159        formatted: format_time(total_seconds),
160        word_count,
161        image_count,
162        code_block_count,
163    }
164}
165
166/// 计算文本中的中文字数
167fn count_words(text: &str, count_emoji: bool) -> usize {
168    if count_emoji {
169        // 对于包含emoji的文本,计算非空白字符数
170        text.chars()
171            .filter(|c| !c.is_whitespace() && (!c.is_control() || c.is_emoji()))
172            .count()
173    } else {
174        // 直接计算非空白字符数,适用于中文等无空格分隔的语言
175        text.chars().filter(|c| !c.is_whitespace()).count()
176    }
177}
178
179/// 计算文本中的英文字数
180fn count_english_words(text: &str, count_emoji: bool) -> usize {
181    if count_emoji {
182        // 计算空格分隔的单词数,并考虑emoji作为独立单位
183        text.split_whitespace()
184            .map(|word| {
185                // 对于每个单词,如果包含emoji,则每个emoji算作一个单位
186                let emoji_count = word.chars().filter(|c| c.is_emoji()).count();
187                if emoji_count > 0 {
188                    // 如果有emoji,将单词拆分为普通字符和emoji
189                    let non_emoji_chars: usize = word
190                        .chars()
191                        .filter(|c| !c.is_emoji() && !c.is_whitespace())
192                        .count();
193                    // 每个非emoji字符算一个单位,每个emoji也算一个单位
194                    non_emoji_chars + emoji_count
195                } else {
196                    // 没有emoji则整个单词算一个单位
197                    1
198                }
199            })
200            .sum()
201    } else {
202        text.split_whitespace().count()
203    }
204}
205
206/// 格式化时间显示
207fn format_time(seconds: u64) -> String {
208    let minutes = seconds / 60;
209    let remaining_seconds = seconds % 60;
210
211    if minutes == 0 {
212        format!("{}秒", seconds)
213    } else if remaining_seconds == 0 {
214        format!("{}分钟", minutes)
215    } else {
216        format!("{}分{}秒", minutes, remaining_seconds)
217    }
218}
219
220/// 快捷函数:获取分钟数
221pub fn minutes(markdown: &str) -> u64 {
222    let read_time = estimate(markdown);
223    (read_time.total_seconds as f64 / 60.0).ceil() as u64
224}
225
226/// 快捷函数:获取单词数
227pub fn words(markdown: &str) -> usize {
228    estimate(markdown).word_count
229}
230
231/// 快捷函数:获取格式化字符串
232pub fn formatted(markdown: &str) -> String {
233    estimate(markdown).formatted
234}
235
236/// emoji支持扩展
237trait CharExt {
238    fn is_emoji(&self) -> bool;
239}
240
241impl CharExt for char {
242    fn is_emoji(&self) -> bool {
243        // 简单的emoji范围检测
244        matches!(*self as u32,
245            0x1F600..=0x1F64F |  // Emoticons
246            0x1F300..=0x1F5FF |  // Miscellaneous Symbols and Pictographs
247            0x1F680..=0x1F6FF |  // Transport and Map Symbols
248            0x1F700..=0x1F77F |  // Alchemical Symbols
249            0x1F780..=0x1F7FF |  // Geometric Shapes Extended
250            0x1F800..=0x1F8FF |  // Supplemental Arrows-C
251            0x1F900..=0x1F9FF |  // Supplemental Symbols and Pictographs
252            0x1FA00..=0x1FA6F |  // Chess Symbols
253            0x1FA70..=0x1FAFF |  // Symbols and Pictographs Extended-A
254            0x2600..=0x26FF   |  // Miscellaneous Symbols
255            0x2700..=0x27BF   |  // Dingbats
256            0x2B50           |  // star
257            0x2B55              // heavy large circle
258        )
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    #[test]
267    fn test_estimate() {
268        let md_txt = r#"
269# 标题
270## 子标题
271### 子子标题
2721. 列表1
2732. 列表2
274"#
275        .trim();
276        let read_time = estimate(md_txt);
277        assert_eq!(read_time.word_count, 15);
278        assert_eq!(read_time.image_count, 0);
279        assert_eq!(read_time.code_block_count, 0);
280        assert_eq!(read_time.total_seconds, 5);
281        assert_eq!(read_time.formatted, "5秒");
282    }
283
284    #[test]
285    fn test_estimate_with_speed() {
286        // 测试中文
287        let md_txt = r#"
288# 标题
289## 子标题
290### 子子标题
2911. 列表1
2922. 列表2
293"#
294        .trim();
295        let speed = ReadSpeed::new(100.0, 10.0, 15.0, true, true);
296        let read_time = estimate_with_speed(md_txt, &speed);
297        assert_eq!(read_time.word_count, 15);
298        assert_eq!(read_time.image_count, 0);
299        assert_eq!(read_time.code_block_count, 0);
300        assert_eq!(read_time.total_seconds, 9);
301        assert_eq!(read_time.formatted, "9秒");
302
303        // 测试英文
304        let md_txt_english = r#"
305# Title
306
307This is a test paragraph. It contains some words.
308"#
309        .trim();
310
311        let speed = ReadSpeed::new(200.0, 10.0, 15.0, true, false);
312        let read_time = estimate_with_speed(md_txt_english, &speed);
313        assert_eq!(read_time.word_count, 10);
314        assert_eq!(read_time.total_seconds, 3);
315        assert_eq!(read_time.formatted, "3秒");
316    }
317
318    #[test]
319    fn test_count_words() {
320        let text = "你好,世界!";
321        let word_count = count_words(text, true);
322        assert_eq!(word_count, 6);
323    }
324
325    #[test]
326    fn test_count_english_words() {
327        let text = "Hello world! This is a test.";
328        let word_count = count_english_words(text, true);
329        assert_eq!(word_count, 6);
330    }
331
332    #[test]
333    fn test_formatted() {
334        let md_txt = r#"
335# 测试标题
336## 子标题
337### 子子标题
338- 列表项1
339- 列表项2
340"#
341        .trim();
342        let formatted_time = formatted(md_txt);
343        assert_eq!(formatted_time, "6秒");
344    }
345}