longcipher_leptos_components/components/editor/
statistics.rs

1//! Document statistics
2//!
3//! Provides word count, character count, and other text metrics.
4
5use serde::{Deserialize, Serialize};
6
7/// Basic text statistics.
8#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
9pub struct TextStats {
10    /// Number of words
11    pub words: usize,
12    /// Number of characters (including whitespace)
13    pub characters: usize,
14    /// Number of characters (excluding whitespace)
15    pub characters_no_spaces: usize,
16    /// Number of lines
17    pub lines: usize,
18    /// Number of paragraphs
19    pub paragraphs: usize,
20}
21
22impl TextStats {
23    /// Calculate statistics from text.
24    #[must_use]
25    pub fn from_text(text: &str) -> Self {
26        if text.is_empty() {
27            return Self {
28                words: 0,
29                characters: 0,
30                characters_no_spaces: 0,
31                lines: 1,
32                paragraphs: 0,
33            };
34        }
35
36        let mut stats = Self::default();
37
38        // Count lines
39        stats.lines = text.chars().filter(|&c| c == '\n').count() + 1;
40
41        // Single pass for words, characters, and paragraphs
42        let mut in_word = false;
43        let mut in_paragraph = false;
44        let mut consecutive_newlines = 0;
45        let mut line_has_content = false;
46
47        for ch in text.chars() {
48            stats.characters += 1;
49
50            if ch.is_whitespace() {
51                if in_word {
52                    in_word = false;
53                }
54
55                if ch == '\n' {
56                    consecutive_newlines += 1;
57
58                    if line_has_content && !in_paragraph {
59                        in_paragraph = true;
60                        stats.paragraphs += 1;
61                    }
62
63                    if consecutive_newlines >= 2 {
64                        in_paragraph = false;
65                    }
66
67                    line_has_content = false;
68                } else {
69                    consecutive_newlines = 0;
70                }
71            } else {
72                stats.characters_no_spaces += 1;
73                consecutive_newlines = 0;
74                line_has_content = true;
75
76                if !in_word {
77                    in_word = true;
78                    stats.words += 1;
79                }
80            }
81        }
82
83        // Handle final paragraph
84        if line_has_content && !in_paragraph {
85            stats.paragraphs += 1;
86        }
87
88        stats
89    }
90
91    /// Format as a compact string for display.
92    #[must_use]
93    pub fn format_compact(&self) -> String {
94        format!(
95            "{} words | {} chars | {} lines",
96            self.words, self.characters, self.lines
97        )
98    }
99}
100
101/// Comprehensive document statistics including markdown elements.
102#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
103pub struct DocumentStats {
104    /// Basic text statistics
105    pub text: TextStats,
106    /// Count of headings by level (index 0 = H1, index 5 = H6)
107    pub headings_by_level: [usize; 6],
108    /// Total number of headings
109    pub heading_count: usize,
110    /// Number of links
111    pub link_count: usize,
112    /// Number of images
113    pub image_count: usize,
114    /// Number of code blocks
115    pub code_block_count: usize,
116    /// Number of tables
117    pub table_count: usize,
118    /// Number of blockquotes
119    pub blockquote_count: usize,
120    /// Number of list items
121    pub list_item_count: usize,
122    /// Estimated reading time in minutes
123    pub reading_time_minutes: u32,
124}
125
126impl DocumentStats {
127    /// Calculate comprehensive statistics from markdown text.
128    #[must_use]
129    pub fn from_text(text: &str) -> Self {
130        let text_stats = TextStats::from_text(text);
131        let mut stats = Self {
132            text: text_stats,
133            ..Default::default()
134        };
135
136        // Calculate reading time (250 WPM average)
137        stats.reading_time_minutes = ((stats.text.words as f32 / 250.0).ceil() as u32).max(1);
138
139        // Parse markdown elements
140        stats.parse_markdown(text);
141
142        stats
143    }
144
145    /// Parse markdown-specific elements.
146    fn parse_markdown(&mut self, text: &str) {
147        let mut in_code_block = false;
148
149        for line in text.lines() {
150            let trimmed = line.trim();
151
152            // Code blocks
153            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
154                if in_code_block {
155                    in_code_block = false;
156                } else {
157                    in_code_block = true;
158                    self.code_block_count += 1;
159                }
160                continue;
161            }
162
163            if in_code_block {
164                continue;
165            }
166
167            // Headings
168            if let Some(level) = Self::heading_level(trimmed) {
169                if level <= 6 {
170                    self.headings_by_level[level - 1] += 1;
171                    self.heading_count += 1;
172                }
173            }
174
175            // Blockquotes
176            if trimmed.starts_with('>') {
177                self.blockquote_count += 1;
178            }
179
180            // List items
181            if trimmed.starts_with("- ")
182                || trimmed.starts_with("* ")
183                || trimmed.starts_with("+ ")
184                || Self::is_ordered_list_item(trimmed)
185            {
186                self.list_item_count += 1;
187            }
188
189            // Count links and images
190            self.link_count += Self::count_links(line);
191            self.image_count += Self::count_images(line);
192
193            // Tables (simplified detection)
194            if trimmed.contains('|') && trimmed.starts_with('|') {
195                self.table_count += 1;
196            }
197        }
198    }
199
200    /// Get heading level from a line.
201    fn heading_level(line: &str) -> Option<usize> {
202        if !line.starts_with('#') {
203            return None;
204        }
205
206        let count = line.chars().take_while(|&c| c == '#').count();
207        if count <= 6 {
208            let after = &line[count..];
209            if after.is_empty() || after.starts_with(' ') {
210                return Some(count);
211            }
212        }
213
214        None
215    }
216
217    /// Check if a line is an ordered list item.
218    fn is_ordered_list_item(line: &str) -> bool {
219        let mut chars = line.chars();
220        let mut has_digit = false;
221
222        while let Some(c) = chars.next() {
223            if c.is_ascii_digit() {
224                has_digit = true;
225            } else if c == '.' && has_digit {
226                return chars.next() == Some(' ');
227            } else {
228                return false;
229            }
230        }
231
232        false
233    }
234
235    /// Count markdown links in a line.
236    fn count_links(line: &str) -> usize {
237        let mut count = 0;
238        let mut chars = line.char_indices().peekable();
239
240        while let Some((i, c)) = chars.next() {
241            // Skip images
242            if c == '!' && line[i + 1..].starts_with('[') {
243                continue;
244            }
245
246            if c == '[' {
247                // Look for ](
248                let rest = &line[i + 1..];
249                if let Some(close) = rest.find("](") {
250                    let after_close = &rest[close + 2..];
251                    if after_close.contains(')') {
252                        count += 1;
253                    }
254                }
255            }
256        }
257
258        count
259    }
260
261    /// Count markdown images in a line.
262    fn count_images(line: &str) -> usize {
263        let mut count = 0;
264        let mut start = 0;
265
266        while let Some(pos) = line[start..].find("![") {
267            let rest = &line[start + pos + 2..];
268            if let Some(close) = rest.find("](") {
269                let after_close = &rest[close + 2..];
270                if after_close.contains(')') {
271                    count += 1;
272                }
273            }
274            start = start + pos + 2;
275        }
276
277        count
278    }
279
280    /// Format reading time for display.
281    #[must_use]
282    pub fn format_reading_time(&self) -> String {
283        if self.reading_time_minutes == 1 {
284            "1 min read".to_string()
285        } else {
286            format!("{} min read", self.reading_time_minutes)
287        }
288    }
289}
290
291#[cfg(test)]
292mod tests {
293    use super::*;
294
295    #[test]
296    fn test_text_stats() {
297        let stats = TextStats::from_text("Hello, World!\n\nNew paragraph.");
298
299        assert_eq!(stats.words, 4);
300        assert_eq!(stats.lines, 3);
301        assert_eq!(stats.paragraphs, 2);
302    }
303
304    #[test]
305    fn test_document_stats() {
306        let text = r#"# Title
307
308Some text with a [link](url).
309
310## Section
311
312- Item 1
313- Item 2
314
315```rust
316let x = 1;
317```
318"#;
319
320        let stats = DocumentStats::from_text(text);
321
322        assert_eq!(stats.heading_count, 2);
323        assert_eq!(stats.headings_by_level[0], 1); // H1
324        assert_eq!(stats.headings_by_level[1], 1); // H2
325        assert_eq!(stats.link_count, 1);
326        assert_eq!(stats.list_item_count, 2);
327        assert_eq!(stats.code_block_count, 1);
328    }
329
330    #[test]
331    fn test_reading_time() {
332        let text = "word ".repeat(500); // 500 words
333        let stats = DocumentStats::from_text(&text);
334
335        assert_eq!(stats.reading_time_minutes, 2); // 500/250 = 2 minutes
336    }
337}