count_md/
lib.rs

1use bitflags::bitflags;
2use pulldown_cmark::{Event, Options as CmarkOptions, Parser, Tag, TagEnd};
3use unicode_segmentation::UnicodeSegmentation;
4use xmlparser::{Token, Tokenizer};
5
6/// Count some Markdown, using the default [`Options`].
7pub fn count(text: &str) -> u64 {
8    count_with_options(text, Options::DEFAULT)
9}
10
11/// Count some Markdown, using the supplied [`Options`].
12pub fn count_with_options(text: &str, options: Options) -> u64 {
13    let mut state = State {
14        in_code_block: false,
15        blockquote_level: 0,
16        in_metadata_block: false,
17        in_footnote: false,
18        in_table: false,
19        in_heading: false,
20    };
21
22    // Turn on everything…
23    let cmark_options = CmarkOptions::all()
24        // …then turn off *old* footnotes…
25        & !CmarkOptions::ENABLE_OLD_FOOTNOTES
26        // …and finally turn back on *new* footnotes.
27        | CmarkOptions::ENABLE_FOOTNOTES;
28
29    let parser = Parser::new_ext(text, cmark_options);
30
31    // TODO: check whether items other than blockquotes can be nested!
32    let mut count = 0;
33    for event in parser {
34        use Event::*;
35        match event {
36            Text(text) => {
37                if state.allowed_for(&options) {
38                    count += text.unicode_words().count() as u64;
39                }
40            }
41
42            Code(text) => {
43                if options.contains(Options::IncludeInlineCode) {
44                    count += text.unicode_words().count() as u64;
45                }
46            }
47
48            Start(tag) => match tag {
49                Tag::CodeBlock(_) => state.in_code_block = true,
50                Tag::BlockQuote => state.blockquote_level += 1,
51                Tag::MetadataBlock(_) => state.in_metadata_block = true,
52                Tag::FootnoteDefinition(_) => state.in_footnote = true,
53                Tag::Table(_) => state.in_table = true,
54                Tag::Heading { .. } => state.in_heading = true,
55                _ => {}
56            },
57
58            End(tag) => match tag {
59                TagEnd::CodeBlock => state.in_code_block = false,
60                TagEnd::BlockQuote => state.blockquote_level -= 1,
61                TagEnd::MetadataBlock(_) => state.in_metadata_block = false,
62                TagEnd::FootnoteDefinition => state.in_footnote = false,
63                TagEnd::Table => state.in_table = false,
64                TagEnd::Heading(_) => state.in_heading = false,
65                _ => {}
66            },
67
68            Html(html) => {
69                if options.contains(Options::IncludeBlockHtml) {
70                    for token in Tokenizer::from(html.as_ref()).flatten() {
71                        if let Token::Text { text } = token {
72                            count += text.unicode_words().count() as u64;
73                        }
74                    }
75                }
76            }
77
78            // None of these contribute to the final count.
79            InlineHtml(_tag) => {}
80            FootnoteReference(_) => {}
81            SoftBreak => {}
82            HardBreak => {}
83            Rule => {}
84            TaskListMarker(_) => {}
85        }
86    }
87
88    count
89}
90
91pub struct State {
92    in_code_block: bool,
93    blockquote_level: u8,
94    in_metadata_block: bool,
95    in_footnote: bool,
96    in_table: bool,
97    in_heading: bool,
98}
99
100impl State {
101    fn allowed_for(&self, options: &Options) -> bool {
102        (!self.in_code_block || options.contains(Options::IncludeBlockCode))
103            && (!self.in_blockquote() || options.contains(Options::IncludeBlockquotes))
104            && (!self.in_metadata_block || options.contains(Options::IncludeMetadata))
105            && (!self.in_footnote || options.contains(Options::IncludeFootnotes))
106            && (!self.in_table || options.contains(Options::IncludeTables))
107            && (!self.in_heading || options.contains(Options::IncludeHeadings))
108    }
109
110    #[inline(always)]
111    fn in_blockquote(&self) -> bool {
112        self.blockquote_level > 0
113    }
114}
115
116bitflags! {
117    #[repr(transparent)]
118    #[derive(Copy, Clone, PartialEq, Eq)]
119    pub struct Options: u16 {
120        const IncludeInlineCode =  1;
121        const IncludeBlockCode =   1 << 2;
122        const IncludeTables =      1 << 3;
123        const IncludeFootnotes =   1 << 4;
124        const IncludeBlockHtml =   1 << 5;
125        const IncludeBlockquotes = 1 << 6;
126        const IncludeMetadata =    1 << 7;
127        const IncludeHeadings =    1 << 8;
128
129        const DEFAULT =
130              Options::IncludeInlineCode.bits()
131            | Options::IncludeTables.bits()
132            | Options::IncludeFootnotes.bits()
133            | Options::IncludeBlockHtml.bits()
134            | Options::IncludeHeadings.bits()
135            ;
136    }
137}
138
139#[cfg(test)]
140mod tests;