mdbook_nocomment/
lib.rs

1//! A simple [mdbook](https://rust-lang.github.io/mdBook/index.html) preprocessors that clean up html comments.
2
3use anyhow::Result;
4use itertools::Itertools;
5use mdbook::{
6    book::Book,
7    preprocess::{Preprocessor, PreprocessorContext},
8    BookItem,
9};
10use pulldown_cmark::{Event, Parser};
11use pulldown_cmark_to_cmark::cmark;
12
13pub struct NoCommentPreprocessor;
14
15impl Preprocessor for NoCommentPreprocessor {
16    fn name(&self) -> &str {
17        "nocomment-preprocessor"
18    }
19
20    fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book> {
21        book.for_each_mut(|item: &mut BookItem| {
22            if let BookItem::Chapter(ref mut chapter) = *item {
23                let content_events =
24                    Parser::new_ext(&chapter.content, pulldown_cmark::Options::all());
25                let events = remove_comment(content_events);
26                let mut buf = String::with_capacity(chapter.content.len());
27                cmark(events, &mut buf).unwrap();
28                chapter.content = buf;
29            }
30        });
31        Ok(book)
32    }
33
34    fn supports_renderer(&self, renderer: &str) -> bool {
35        renderer != "not-supported"
36    }
37}
38
39fn remove_comment<'a>(events: impl Iterator<Item = Event<'a>>) -> impl Iterator<Item = Event<'a>> {
40    const COMMENT_START: &str = "<!--";
41    const COMMENT_END: &str = "-->";
42    let mut filtered = vec![];
43    let mut mp = events.multipeek();
44    while let Some(current_event) = mp.next() {
45        match current_event {
46            Event::Text(ref t1) if t1.as_ref().eq("<") => {
47                let next = mp.peek();
48                match next {
49                    Some(Event::Text(ref t2)) if t2.starts_with("!--") => {
50                        let mut removal = t1.to_string();
51                        removal.push_str(t2);
52                        // Ended at current event
53                        if t2.trim_end().ends_with(COMMENT_END) {
54                            mp.next();
55                            log::debug!("Comment: {}", removal);
56                            continue;
57                        }
58                        // Peek text event for COMMENT_END
59                        let mut found = false;
60                        let mut count = 0;
61                        loop {
62                            let nn = mp.peek();
63                            match nn {
64                                Some(Event::Text(ref c)) => {
65                                    removal.push_str(c);
66                                    count += 1;
67                                    if c.trim_end().ends_with(COMMENT_END) {
68                                        found = true;
69                                        break;
70                                    }
71                                }
72                                None => break,
73                                // May across paragraph
74                                _ => {
75                                    count += 1;
76                                    continue;
77                                }
78                            }
79                        }
80                        if found {
81                            // Skip comment events
82                            for _ in 0..=count {
83                                mp.next();
84                            }
85                            log::debug!("Comment: {}", removal);
86                        } else {
87                            filtered.push(current_event)
88                        }
89                    }
90                    _ => filtered.push(current_event),
91                };
92            }
93            Event::Html(ref html) if html.starts_with(COMMENT_START) => {
94                if html.trim_end().ends_with(COMMENT_END) {
95                    // Ended at current event
96                    continue;
97                }
98                let mut removal = vec![html.to_string()];
99                let mut found = false;
100                let mut cnt = 0;
101                loop {
102                    let next = mp.peek();
103                    match next {
104                        Some(Event::Html(ref h)) => {
105                            removal.push(h.to_string());
106                            cnt += 1;
107                            if h.trim_end().ends_with(COMMENT_END) {
108                                found = true;
109                                for _ in 0..cnt {
110                                    mp.next();
111                                }
112                                log::debug!("{}", removal.join("\n"));
113                                continue;
114                            }
115                        }
116                        _ => break,
117                    }
118                }
119                if !found {
120                    filtered.push(current_event)
121                }
122            }
123            // Not a comment event, push it as is.
124            _ => filtered.push(current_event),
125        }
126    }
127    filtered.into_iter()
128}
129
130#[cfg(test)]
131mod test {
132    #[test]
133    fn remove_comments() {
134        // oneline comment (one Html event)
135        assert_comment_removal("<!-- double-hyphen -->");
136
137        // oneline invalid comment (one Html event)
138        assert_comment_removal("<!-- --double-hyphen -->");
139
140        // multiline invalid comment (multi html events)
141        assert_comment_removal(
142            "<!-- \n\
143            --double-hyphen \n\
144            -->\n",
145        );
146
147        // oneline comment in a paragraph (one Html event)
148        assert_comment_removal("text <!-- double-hyphen -->");
149
150        // oneline invalid comment in paragraph (multi Text event)
151        assert_comment_removal("text <!-- --double-hyphen -->");
152
153        // multiline invalid comment in a paragraph (multi Text event)
154        assert_comment_removal(
155            "text <!-- \n\
156            --double-hyphen \n\
157            \n-->",
158        );
159
160        // multiline invalid comment across multi paragraph (multi Text event)
161        assert_comment_removal(
162            "text <!-- \n\n\
163            --double-hyphen \n\n\
164            \n-->",
165        );
166    }
167
168    fn assert_comment_removal(s: &str) {
169        let parser = mdbook::utils::new_cmark_parser(s, false);
170
171        let events = crate::remove_comment(parser);
172        let mut buf = String::new();
173        pulldown_cmark::html::push_html(&mut buf, events);
174
175        log::debug!("RENDERED: {buf}");
176        assert!(!buf.contains("double-hyphen"));
177        assert!(!buf.contains("--"));
178    }
179}