1use bitflags::bitflags;
2use pulldown_cmark::{Event, Options as CmarkOptions, Parser, Tag, TagEnd};
3use unicode_segmentation::UnicodeSegmentation;
4use xmlparser::{Token, Tokenizer};
5
6pub fn count(text: &str) -> u64 {
8 count_with_options(text, Options::DEFAULT)
9}
10
11pub fn count_with_options(text: &str, options: Options) -> u64 {
13 let mut state = State {
14 in_code_block: false,
15 blockquote_level: 0,
16 in_metadata_block: false,
17 in_footnote: false,
18 in_table: false,
19 in_heading: false,
20 };
21
22 let cmark_options = CmarkOptions::all()
24 & !CmarkOptions::ENABLE_OLD_FOOTNOTES
26 | CmarkOptions::ENABLE_FOOTNOTES;
28
29 let parser = Parser::new_ext(text, cmark_options);
30
31 let mut count = 0;
33 for event in parser {
34 use Event::*;
35 match event {
36 Text(text) => {
37 if state.allowed_for(&options) {
38 count += text.unicode_words().count() as u64;
39 }
40 }
41
42 Code(text) => {
43 if options.contains(Options::IncludeInlineCode) {
44 count += text.unicode_words().count() as u64;
45 }
46 }
47
48 Start(tag) => match tag {
49 Tag::CodeBlock(_) => state.in_code_block = true,
50 Tag::BlockQuote => state.blockquote_level += 1,
51 Tag::MetadataBlock(_) => state.in_metadata_block = true,
52 Tag::FootnoteDefinition(_) => state.in_footnote = true,
53 Tag::Table(_) => state.in_table = true,
54 Tag::Heading { .. } => state.in_heading = true,
55 _ => {}
56 },
57
58 End(tag) => match tag {
59 TagEnd::CodeBlock => state.in_code_block = false,
60 TagEnd::BlockQuote => state.blockquote_level -= 1,
61 TagEnd::MetadataBlock(_) => state.in_metadata_block = false,
62 TagEnd::FootnoteDefinition => state.in_footnote = false,
63 TagEnd::Table => state.in_table = false,
64 TagEnd::Heading(_) => state.in_heading = false,
65 _ => {}
66 },
67
68 Html(html) => {
69 if options.contains(Options::IncludeBlockHtml) {
70 for token in Tokenizer::from(html.as_ref()).flatten() {
71 if let Token::Text { text } = token {
72 count += text.unicode_words().count() as u64;
73 }
74 }
75 }
76 }
77
78 InlineHtml(_tag) => {}
80 FootnoteReference(_) => {}
81 SoftBreak => {}
82 HardBreak => {}
83 Rule => {}
84 TaskListMarker(_) => {}
85 }
86 }
87
88 count
89}
90
91pub struct State {
92 in_code_block: bool,
93 blockquote_level: u8,
94 in_metadata_block: bool,
95 in_footnote: bool,
96 in_table: bool,
97 in_heading: bool,
98}
99
100impl State {
101 fn allowed_for(&self, options: &Options) -> bool {
102 (!self.in_code_block || options.contains(Options::IncludeBlockCode))
103 && (!self.in_blockquote() || options.contains(Options::IncludeBlockquotes))
104 && (!self.in_metadata_block || options.contains(Options::IncludeMetadata))
105 && (!self.in_footnote || options.contains(Options::IncludeFootnotes))
106 && (!self.in_table || options.contains(Options::IncludeTables))
107 && (!self.in_heading || options.contains(Options::IncludeHeadings))
108 }
109
110 #[inline(always)]
111 fn in_blockquote(&self) -> bool {
112 self.blockquote_level > 0
113 }
114}
115
116bitflags! {
117 #[repr(transparent)]
118 #[derive(Copy, Clone, PartialEq, Eq)]
119 pub struct Options: u16 {
120 const IncludeInlineCode = 1;
121 const IncludeBlockCode = 1 << 2;
122 const IncludeTables = 1 << 3;
123 const IncludeFootnotes = 1 << 4;
124 const IncludeBlockHtml = 1 << 5;
125 const IncludeBlockquotes = 1 << 6;
126 const IncludeMetadata = 1 << 7;
127 const IncludeHeadings = 1 << 8;
128
129 const DEFAULT =
130 Options::IncludeInlineCode.bits()
131 | Options::IncludeTables.bits()
132 | Options::IncludeFootnotes.bits()
133 | Options::IncludeBlockHtml.bits()
134 | Options::IncludeHeadings.bits()
135 ;
136 }
137}
138
139#[cfg(test)]
140mod tests;