markdown_to_text/
lib.rs

1#![warn(clippy::all, clippy::pedantic)]
2
3use pulldown_cmark::{Event, Options, Parser, Tag};
4
5#[must_use]
6pub fn convert(markdown: &str) -> String {
7    // GFM tables and tasks lists are not enabled.
8    let mut options = Options::empty();
9    options.insert(Options::ENABLE_STRIKETHROUGH);
10
11    let parser = Parser::new_ext(&markdown, options);
12    let mut tags_stack = Vec::new();
13    let mut buffer = String::new();
14
15    // For each event we push into the buffer to produce the plain text version.
16    for event in parser {
17        match event {
18            // The start and end events don't contain the text inside the tag. That's handled by the `Event::Text` arm.
19            Event::Start(tag) => {
20                start_tag(&tag, &mut buffer, &mut tags_stack);
21                tags_stack.push(tag);
22            }
23            Event::End(tag) => {
24                tags_stack.pop();
25                end_tag(&tag, &mut buffer, &tags_stack);
26            }
27            Event::Text(content) => {
28                if !tags_stack.iter().any(is_strikethrough) {
29                    buffer.push_str(&content)
30                }
31            }
32            Event::Code(content) => buffer.push_str(&content),
33            Event::SoftBreak => buffer.push(' '),
34            _ => (),
35        }
36    }
37    buffer.trim().to_string()
38}
39
40fn start_tag(tag: &Tag, buffer: &mut String, tags_stack: &mut Vec<Tag>) {
41    match tag {
42        Tag::Link(_, _, title) | Tag::Image(_, _, title) => buffer.push_str(&title),
43        Tag::Item => {
44            buffer.push('\n');
45            let mut lists_stack = tags_stack
46                .iter_mut()
47                .filter_map(|tag| match tag {
48                    Tag::List(nb) => Some(nb),
49                    _ => None,
50                })
51                .collect::<Vec<_>>();
52            let prefix_tabs_count = lists_stack.len() - 1;
53            for _ in 0..prefix_tabs_count {
54                buffer.push('\t')
55            }
56            if let Some(Some(nb)) = lists_stack.last_mut() {
57                buffer.push_str(&nb.to_string());
58                buffer.push_str(". ");
59                *nb += 1;
60            } else {
61                buffer.push_str("• ");
62            }
63        }
64        Tag::Paragraph | Tag::CodeBlock(_) | Tag::Heading(_) => buffer.push('\n'),
65        _ => (),
66    }
67}
68
69fn end_tag(tag: &Tag, buffer: &mut String, tags_stack: &[Tag]) {
70    match tag {
71        Tag::Paragraph | Tag::Heading(_) => buffer.push('\n'),
72        Tag::CodeBlock(_) => {
73            if buffer.chars().last() != Some('\n') {
74                buffer.push('\n');
75            }
76        }
77        Tag::List(_) => {
78            let is_sublist = tags_stack.iter().any(|tag| match tag {
79                Tag::List(_) => true,
80                _ => false,
81            });
82            if !is_sublist {
83                buffer.push('\n')
84            }
85        }
86        _ => (),
87    }
88}
89
90fn is_strikethrough(tag: &Tag) -> bool {
91    match tag {
92        Tag::Strikethrough => true,
93        _ => false,
94    }
95}
96
97#[cfg(test)]
98mod tests {
99    use super::convert;
100
101    #[test]
102    fn basic_inline_strong() {
103        let markdown = r#"**Hello**"#;
104        let expected = "Hello";
105        assert_eq!(convert(markdown), expected);
106    }
107
108    #[test]
109    fn basic_inline_emphasis() {
110        let markdown = r#"_Hello_"#;
111        let expected = "Hello";
112        assert_eq!(convert(markdown), expected);
113    }
114
115    #[test]
116    fn basic_header() {
117        let markdown = r#"# Header
118
119## Sub header
120
121End paragraph."#;
122        let expected = "Header
123
124Sub header
125
126End paragraph.";
127        assert_eq!(convert(markdown), expected);
128    }
129
130    #[test]
131    fn alt_header() {
132        let markdown = r#"
133Header
134======
135
136End paragraph."#;
137        let expected = "Header
138
139End paragraph.";
140        assert_eq!(convert(markdown), expected);
141    }
142
143    #[test]
144    fn strong_emphasis() {
145        let markdown = r#"**asterisks and _underscores_**"#;
146        let expected = "asterisks and underscores";
147        assert_eq!(convert(markdown), expected);
148    }
149
150    #[test]
151    fn strikethrough() {
152        let markdown = r#"This was ~~erased~~ deleted."#;
153        let expected = "This was  deleted.";
154        assert_eq!(convert(markdown), expected);
155    }
156
157    #[test]
158    fn mixed_list() {
159        let markdown = r#"Start paragraph.
160
1611. First ordered list item
1622. Another item
1631. Actual numbers don't matter, just that it's a number
164  1. Ordered sub-list
1654. And another item.
166
167End paragraph."#;
168
169        let expected = "Start paragraph.
170
1711. First ordered list item
1722. Another item
1733. Actual numbers don't matter, just that it's a number
1744. Ordered sub-list
1755. And another item.
176
177End paragraph.";
178        assert_eq!(convert(markdown), expected);
179    }
180
181    #[test]
182    fn nested_lists() {
183        let markdown = r#"
184* alpha
185* beta
186    * one
187    * two
188* gamma
189"#;
190        let expected = "• alpha
191• beta
192\t• one
193\t• two
194• gamma";
195        assert_eq!(convert(markdown), expected);
196    }
197
198    #[test]
199    fn list_with_header() {
200        let markdown = r#"# Title
201* alpha
202* beta
203"#;
204        let expected = r#"Title
205
206• alpha
207• beta"#;
208        assert_eq!(convert(markdown), expected);
209    }
210
211    #[test]
212    fn basic_link() {
213        let markdown = "I'm an [inline-style link](https://www.google.com).";
214        let expected = "I'm an inline-style link.";
215        assert_eq!(convert(markdown), expected)
216    }
217
218    #[ignore]
219    #[test]
220    fn link_with_itself() {
221        let markdown = "Go to [https://www.google.com].";
222        let expected = "Go to https://www.google.com.";
223        assert_eq!(convert(markdown), expected)
224    }
225
226    #[test]
227    fn basic_image() {
228        let markdown = "As displayed in ![img alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png).";
229        let expected = "As displayed in img alt text.";
230        assert_eq!(convert(markdown), expected);
231    }
232
233    #[test]
234    fn inline_code() {
235        let markdown = "This is `inline code`.";
236        let expected = "This is inline code.";
237        assert_eq!(convert(markdown), expected);
238    }
239
240    #[test]
241    fn code_block() {
242        let markdown = r#"Start paragraph.
243```javascript
244var s = "JavaScript syntax highlighting";
245alert(s);
246```
247End paragraph."#;
248        let expected = r#"Start paragraph.
249
250var s = "JavaScript syntax highlighting";
251alert(s);
252
253End paragraph."#;
254        assert_eq!(convert(markdown), expected);
255    }
256
257    #[test]
258    fn block_quote() {
259        let markdown = r#"Start paragraph.
260
261> Blockquotes are very handy in email to emulate reply text.
262> This line is part of the same quote.
263
264End paragraph."#;
265        let expected = "Start paragraph.
266
267Blockquotes are very handy in email to emulate reply text. This line is part of the same quote.
268
269End paragraph.";
270        assert_eq!(convert(markdown), expected);
271    }
272
273    #[test]
274    fn paragraphs() {
275        let markdown = r#"Paragraph 1.
276
277Paragraph 2."#;
278        let expected = "Paragraph 1.
279
280Paragraph 2.";
281        assert_eq!(convert(markdown), expected);
282    }
283}