count_md/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use bitflags::bitflags;
4use pulldown_cmark::{Event, Options as CmarkOptions, Parser, Tag, TagEnd};
5use unicode_segmentation::UnicodeSegmentation;
6use xmlparser::{Token, Tokenizer};
7
8/// Count some Markdown, using the default [`Options`].
9pub fn count(text: &str) -> u64 {
10    count_with_options(text, Options::DEFAULT)
11}
12
13/// Count some Markdown, using the supplied [`Options`].
14pub fn count_with_options(text: &str, options: Options) -> u64 {
15    // Turn on everything…
16    let cmark_options = CmarkOptions::all()
17        // …then turn off *old* footnotes…
18        & !CmarkOptions::ENABLE_OLD_FOOTNOTES
19        // …and finally turn back on *new* footnotes.
20        | CmarkOptions::ENABLE_FOOTNOTES;
21
22    let mut parser = Parser::new_ext(text, cmark_options);
23
24    count_with_options_and_parser(options, parser.by_ref())
25}
26
27pub fn count_with_options_and_parser(options: Options, parser: &mut Parser) -> u64 {
28    let mut state = State {
29        in_code_block: false,
30        blockquote_level: 0,
31        in_metadata_block: false,
32        in_footnote: false,
33        in_table: false,
34        in_heading: false,
35    };
36
37    // TODO: check whether items other than blockquotes can be nested!
38    let mut count = 0;
39    for event in parser {
40        use Event::*;
41        match event {
42            Text(text) => {
43                if state.allowed_for(&options) {
44                    count += text.unicode_words().count() as u64;
45                }
46            }
47
48            Code(text) => {
49                if options.contains(Options::IncludeInlineCode) {
50                    count += text.unicode_words().count() as u64;
51                }
52            }
53
54            Start(tag) => match tag {
55                Tag::CodeBlock(_) => state.in_code_block = true,
56                Tag::BlockQuote(_) => state.blockquote_level += 1,
57                Tag::MetadataBlock(_) => state.in_metadata_block = true,
58                Tag::FootnoteDefinition(_) => state.in_footnote = true,
59                Tag::Table(_) => state.in_table = true,
60                Tag::Heading { .. } => state.in_heading = true,
61                _ => {}
62            },
63
64            End(tag) => match tag {
65                TagEnd::CodeBlock => state.in_code_block = false,
66                TagEnd::BlockQuote(_) => state.blockquote_level -= 1,
67                TagEnd::MetadataBlock(_) => state.in_metadata_block = false,
68                TagEnd::FootnoteDefinition => state.in_footnote = false,
69                TagEnd::Table => state.in_table = false,
70                TagEnd::Heading(_) => state.in_heading = false,
71                _ => {}
72            },
73
74            Html(html) => {
75                if options.contains(Options::IncludeBlockHtml) {
76                    for token in Tokenizer::from(html.as_ref()).flatten() {
77                        if let Token::Text { text } = token {
78                            count += text.unicode_words().count() as u64;
79                        }
80                    }
81                }
82            }
83
84            // TODO: add support for these in some sensible-ish way!
85            InlineMath(_) => { /* unimplemented */ }
86            DisplayMath(_) => { /* unimplemented */ }
87
88            // None of these contribute to the final count.
89            InlineHtml(_tag) => {}
90            FootnoteReference(_) => {}
91            SoftBreak => {}
92            HardBreak => {}
93            Rule => {}
94            TaskListMarker(_) => {}
95        }
96    }
97
98    count
99}
100
101pub struct State {
102    in_code_block: bool,
103    blockquote_level: u8,
104    in_metadata_block: bool,
105    in_footnote: bool,
106    in_table: bool,
107    in_heading: bool,
108}
109
110impl State {
111    fn allowed_for(&self, options: &Options) -> bool {
112        (!self.in_code_block || options.contains(Options::IncludeBlockCode))
113            && (!self.in_blockquote() || options.contains(Options::IncludeBlockquotes))
114            && (!self.in_metadata_block || options.contains(Options::IncludeMetadata))
115            && (!self.in_footnote || options.contains(Options::IncludeFootnotes))
116            && (!self.in_table || options.contains(Options::IncludeTables))
117            && (!self.in_heading || options.contains(Options::IncludeHeadings))
118    }
119
120    #[inline(always)]
121    fn in_blockquote(&self) -> bool {
122        self.blockquote_level > 0
123    }
124}
125
126bitflags! {
127    #[repr(transparent)]
128    #[derive(Copy, Clone, PartialEq, Eq)]
129    pub struct Options: u16 {
130        const IncludeInlineCode =  1;
131        const IncludeBlockCode =   1 << 2;
132        const IncludeTables =      1 << 3;
133        const IncludeFootnotes =   1 << 4;
134        const IncludeBlockHtml =   1 << 5;
135        const IncludeBlockquotes = 1 << 6;
136        const IncludeMetadata =    1 << 7;
137        const IncludeHeadings =    1 << 8;
138
139        const DEFAULT =
140              Options::IncludeInlineCode.bits()
141            | Options::IncludeTables.bits()
142            | Options::IncludeFootnotes.bits()
143            | Options::IncludeBlockHtml.bits()
144            | Options::IncludeHeadings.bits()
145            ;
146    }
147}
148
149#[cfg(test)]
150mod tests;