1#![doc = include_str!("../README.md")]
2
3use bitflags::bitflags;
4use pulldown_cmark::{Event, Options as CmarkOptions, Parser, Tag, TagEnd};
5use unicode_segmentation::UnicodeSegmentation;
6use xmlparser::{Token, Tokenizer};
7
8pub fn count(text: &str) -> u64 {
10 count_with_options(text, Options::DEFAULT)
11}
12
13pub fn count_with_options(text: &str, options: Options) -> u64 {
15 let cmark_options = CmarkOptions::all()
17 & !CmarkOptions::ENABLE_OLD_FOOTNOTES
19 | CmarkOptions::ENABLE_FOOTNOTES;
21
22 let mut parser = Parser::new_ext(text, cmark_options);
23
24 count_with_options_and_parser(options, parser.by_ref())
25}
26
27pub fn count_with_options_and_parser(options: Options, parser: &mut Parser) -> u64 {
28 let mut state = State {
29 in_code_block: false,
30 blockquote_level: 0,
31 in_metadata_block: false,
32 in_footnote: false,
33 in_table: false,
34 in_heading: false,
35 };
36
37 let mut count = 0;
39 for event in parser {
40 use Event::*;
41 match event {
42 Text(text) => {
43 if state.allowed_for(&options) {
44 count += text.unicode_words().count() as u64;
45 }
46 }
47
48 Code(text) => {
49 if options.contains(Options::IncludeInlineCode) {
50 count += text.unicode_words().count() as u64;
51 }
52 }
53
54 Start(tag) => match tag {
55 Tag::CodeBlock(_) => state.in_code_block = true,
56 Tag::BlockQuote(_) => state.blockquote_level += 1,
57 Tag::MetadataBlock(_) => state.in_metadata_block = true,
58 Tag::FootnoteDefinition(_) => state.in_footnote = true,
59 Tag::Table(_) => state.in_table = true,
60 Tag::Heading { .. } => state.in_heading = true,
61 _ => {}
62 },
63
64 End(tag) => match tag {
65 TagEnd::CodeBlock => state.in_code_block = false,
66 TagEnd::BlockQuote(_) => state.blockquote_level -= 1,
67 TagEnd::MetadataBlock(_) => state.in_metadata_block = false,
68 TagEnd::FootnoteDefinition => state.in_footnote = false,
69 TagEnd::Table => state.in_table = false,
70 TagEnd::Heading(_) => state.in_heading = false,
71 _ => {}
72 },
73
74 Html(html) => {
75 if options.contains(Options::IncludeBlockHtml) {
76 for token in Tokenizer::from(html.as_ref()).flatten() {
77 if let Token::Text { text } = token {
78 count += text.unicode_words().count() as u64;
79 }
80 }
81 }
82 }
83
84 InlineMath(_) => { }
86 DisplayMath(_) => { }
87
88 InlineHtml(_tag) => {}
90 FootnoteReference(_) => {}
91 SoftBreak => {}
92 HardBreak => {}
93 Rule => {}
94 TaskListMarker(_) => {}
95 }
96 }
97
98 count
99}
100
101pub struct State {
102 in_code_block: bool,
103 blockquote_level: u8,
104 in_metadata_block: bool,
105 in_footnote: bool,
106 in_table: bool,
107 in_heading: bool,
108}
109
110impl State {
111 fn allowed_for(&self, options: &Options) -> bool {
112 (!self.in_code_block || options.contains(Options::IncludeBlockCode))
113 && (!self.in_blockquote() || options.contains(Options::IncludeBlockquotes))
114 && (!self.in_metadata_block || options.contains(Options::IncludeMetadata))
115 && (!self.in_footnote || options.contains(Options::IncludeFootnotes))
116 && (!self.in_table || options.contains(Options::IncludeTables))
117 && (!self.in_heading || options.contains(Options::IncludeHeadings))
118 }
119
120 #[inline(always)]
121 fn in_blockquote(&self) -> bool {
122 self.blockquote_level > 0
123 }
124}
125
126bitflags! {
127 #[repr(transparent)]
128 #[derive(Copy, Clone, PartialEq, Eq)]
129 pub struct Options: u16 {
130 const IncludeInlineCode = 1;
131 const IncludeBlockCode = 1 << 2;
132 const IncludeTables = 1 << 3;
133 const IncludeFootnotes = 1 << 4;
134 const IncludeBlockHtml = 1 << 5;
135 const IncludeBlockquotes = 1 << 6;
136 const IncludeMetadata = 1 << 7;
137 const IncludeHeadings = 1 << 8;
138
139 const DEFAULT =
140 Options::IncludeInlineCode.bits()
141 | Options::IncludeTables.bits()
142 | Options::IncludeFootnotes.bits()
143 | Options::IncludeBlockHtml.bits()
144 | Options::IncludeHeadings.bits()
145 ;
146 }
147}
148
149#[cfg(test)]
150mod tests;