md_to_text/
lib.rs

1#![warn(clippy::all, clippy::pedantic)]
2
3use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
4
5#[must_use]
6pub fn convert(markdown: &str) -> String {
7    // GFM tables and tasks lists are not enabled.
8    let mut options = Options::empty();
9    options.insert(Options::ENABLE_STRIKETHROUGH);
10
11    let parser = Parser::new_ext(markdown, options);
12    let mut tags_stack = Vec::new();
13    let mut buffer = String::new();
14
15    // For each event we push into the buffer to produce the plain text version.
16    for event in parser {
17        match event {
18            // The start and end events don't contain the text inside the tag. That's handled by the `Event::Text` arm.
19            Event::Start(tag) => {
20                start_tag(&tag, &mut buffer, &mut tags_stack);
21                tags_stack.push(tag);
22            }
23            Event::End(tag) => {
24                tags_stack.pop();
25                end_tag(&tag, &mut buffer, &tags_stack);
26            }
27            Event::Text(content) => {
28                if !tags_stack.iter().any(is_strikethrough) {
29                    buffer.push_str(&content);
30                }
31            }
32            Event::Code(content) => buffer.push_str(&content),
33            Event::SoftBreak => buffer.push(' '),
34            _ => (),
35        }
36    }
37    buffer.trim().to_string()
38}
39
40fn start_tag(tag: &Tag, buffer: &mut String, tags_stack: &mut [Tag]) {
41    match tag {
42        Tag::Link { title, .. } | Tag::Image { title, .. } => buffer.push_str(title),
43        Tag::Item => {
44            buffer.push('\n');
45            let mut lists_stack = tags_stack
46                .iter_mut()
47                .filter_map(|tag| match tag {
48                    Tag::List(nb) => Some(nb),
49                    _ => None,
50                })
51                .collect::<Vec<_>>();
52            let prefix_tabs_count = lists_stack.len() - 1;
53            for _ in 0..prefix_tabs_count {
54                buffer.push('\t');
55            }
56            if let Some(Some(nb)) = lists_stack.last_mut() {
57                buffer.push_str(&nb.to_string());
58                buffer.push_str(". ");
59                *nb += 1;
60            } else {
61                buffer.push_str("• ");
62            }
63        }
64        Tag::Paragraph | Tag::CodeBlock(_) | Tag::Heading { .. } => buffer.push('\n'),
65        _ => (),
66    }
67}
68
69fn end_tag(tag: &TagEnd, buffer: &mut String, tags_stack: &[Tag]) {
70    match tag {
71        TagEnd::Paragraph | TagEnd::Heading { .. } => buffer.push('\n'),
72        TagEnd::CodeBlock { .. } => {
73            if !buffer.ends_with('\n') {
74                buffer.push('\n');
75            }
76        }
77        TagEnd::List(_) => {
78            let is_sublist = tags_stack.iter().any(|tag| matches!(tag, Tag::List { .. }));
79            if !is_sublist {
80                buffer.push('\n');
81            }
82        }
83        _ => (),
84    }
85}
86
87fn is_strikethrough(tag: &Tag) -> bool {
88    matches!(tag, Tag::Strikethrough)
89}
90
91#[cfg(test)]
92mod tests {
93    use super::convert;
94
95    #[test]
96    fn basic_inline_strong() {
97        let markdown = r#"**Hello**"#;
98        let expected = "Hello";
99        assert_eq!(convert(markdown), expected);
100    }
101
102    #[test]
103    fn basic_inline_emphasis() {
104        let markdown = r#"_Hello_"#;
105        let expected = "Hello";
106        assert_eq!(convert(markdown), expected);
107    }
108
109    #[test]
110    fn basic_header() {
111        let markdown = r#"# Header
112
113## Sub header
114
115End paragraph."#;
116        let expected = "Header
117
118Sub header
119
120End paragraph.";
121        assert_eq!(convert(markdown), expected);
122    }
123
124    #[test]
125    fn alt_header() {
126        let markdown = r#"
127Header
128======
129
130End paragraph."#;
131        let expected = "Header
132
133End paragraph.";
134        assert_eq!(convert(markdown), expected);
135    }
136
137    #[test]
138    fn strong_emphasis() {
139        let markdown = r#"**asterisks and _underscores_**"#;
140        let expected = "asterisks and underscores";
141        assert_eq!(convert(markdown), expected);
142    }
143
144    #[test]
145    fn strikethrough() {
146        let markdown = r#"This was ~~erased~~ deleted."#;
147        let expected = "This was  deleted.";
148        assert_eq!(convert(markdown), expected);
149    }
150
151    #[test]
152    fn mixed_list() {
153        let markdown = r#"Start paragraph.
154
1551. First ordered list item
1562. Another item
1571. Actual numbers don't matter, just that it's a number
158  1. Ordered sub-list
1594. And another item.
160
161End paragraph."#;
162
163        let expected = "Start paragraph.
164
1651. First ordered list item
1662. Another item
1673. Actual numbers don't matter, just that it's a number
1684. Ordered sub-list
1695. And another item.
170
171End paragraph.";
172        assert_eq!(convert(markdown), expected);
173    }
174
175    #[test]
176    fn nested_lists() {
177        let markdown = r#"
178* alpha
179* beta
180    * one
181    * two
182* gamma
183"#;
184        let expected = "• alpha
185• beta
186\t• one
187\t• two
188• gamma";
189        assert_eq!(convert(markdown), expected);
190    }
191
192    #[test]
193    fn list_with_header() {
194        let markdown = r#"# Title
195* alpha
196* beta
197"#;
198        let expected = r#"Title
199
200• alpha
201• beta"#;
202        assert_eq!(convert(markdown), expected);
203    }
204
205    #[test]
206    fn basic_link() {
207        let markdown = "I'm an [inline-style link](https://www.google.com).";
208        let expected = "I'm an inline-style link.";
209        assert_eq!(convert(markdown), expected);
210    }
211
212    #[ignore]
213    #[test]
214    fn link_with_itself() {
215        let markdown = "Go to [https://www.google.com].";
216        let expected = "Go to https://www.google.com.";
217        assert_eq!(convert(markdown), expected);
218    }
219
220    #[test]
221    fn basic_image() {
222        let markdown = "As displayed in ![img alt text](https://github.com/adam-p/markdown-here/raw/master/src/common/images/icon48.png).";
223        let expected = "As displayed in img alt text.";
224        assert_eq!(convert(markdown), expected);
225    }
226
227    #[test]
228    fn inline_code() {
229        let markdown = "This is `inline code`.";
230        let expected = "This is inline code.";
231        assert_eq!(convert(markdown), expected);
232    }
233
234    #[test]
235    fn code_block() {
236        let markdown = r#"Start paragraph.
237```javascript
238var s = "JavaScript syntax highlighting";
239alert(s);
240```
241End paragraph."#;
242        let expected = r#"Start paragraph.
243
244var s = "JavaScript syntax highlighting";
245alert(s);
246
247End paragraph."#;
248        assert_eq!(convert(markdown), expected);
249    }
250
251    #[test]
252    fn block_quote() {
253        let markdown = r#"Start paragraph.
254
255> Blockquotes are very handy in email to emulate reply text.
256> This line is part of the same quote.
257
258End paragraph."#;
259        let expected = "Start paragraph.
260
261Blockquotes are very handy in email to emulate reply text. This line is part of the same quote.
262
263End paragraph.";
264        assert_eq!(convert(markdown), expected);
265    }
266
267    #[test]
268    fn paragraphs() {
269        let markdown = r#"Paragraph 1.
270
271Paragraph 2."#;
272        let expected = "Paragraph 1.
273
274Paragraph 2.";
275        assert_eq!(convert(markdown), expected);
276    }
277}