zed_util/markdown.rs
1use std::fmt::{Display, Formatter};
2
3/// Indicates that the wrapped `String` is markdown text.
4#[derive(Debug, Clone)]
5pub struct MarkdownString(pub String);
6
7impl Display for MarkdownString {
8 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
9 write!(f, "{}", self.0)
10 }
11}
12
13/// Escapes markdown special characters in markdown text blocks. Markdown code blocks follow
14/// different rules and `MarkdownInlineCode` or `MarkdownCodeBlock` should be used in that case.
15///
16/// Also escapes the following markdown extensions:
17///
18/// * `^` for superscripts
19/// * `$` for inline math
20/// * `~` for strikethrough
21///
22/// Escape of some characters is unnecessary, because while they are involved in markdown syntax,
23/// the other characters involved are escaped:
24///
25/// * `!`, `]`, `(`, and `)` are used in link syntax, but `[` is escaped so these are parsed as
26/// plaintext.
27///
28/// * `;` is used in HTML entity syntax, but `&` is escaped, so they are parsed as plaintext.
29///
30/// TODO: There is one escape this doesn't do currently. Period after numbers at the start of the
31/// line (`[0-9]*\.`) should also be escaped to avoid it being interpreted as a list item.
32pub struct MarkdownEscaped<'a>(pub &'a str);
33
34/// Implements `Display` to format markdown inline code (wrapped in backticks), handling code that
35/// contains backticks and spaces. All whitespace is treated as a single space character. For text
36/// that does not contain whitespace other than ' ', this escaping roundtrips through
37/// pulldown-cmark.
38///
39/// When used in tables, `|` should be escaped like `\|` in the text provided to this function.
40pub struct MarkdownInlineCode<'a>(pub &'a str);
41
42/// Implements `Display` to format markdown code blocks, wrapped in 3 or more backticks as needed.
43pub struct MarkdownCodeBlock<'a> {
44 pub tag: &'a str,
45 pub text: &'a str,
46}
47
48impl Display for MarkdownEscaped<'_> {
49 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
50 let mut start_of_unescaped = None;
51 for (ix, c) in self.0.char_indices() {
52 match c {
53 // Always escaped.
54 '\\' | '`' | '*' | '_' | '[' | '^' | '$' | '~' | '&' |
55 // TODO: these only need to be escaped when they are the first non-whitespace
56 // character of the line of a block. There should probably be both an `escape_block`
57 // which does this and an `escape_inline` method which does not escape these.
58 '#' | '+' | '=' | '-' => {
59 match start_of_unescaped {
60 None => {}
61 Some(start_of_unescaped) => {
62 write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
63 }
64 }
65 write!(formatter, "\\")?;
66 // Can include this char in the "unescaped" text since a
67 // backslash was just emitted.
68 start_of_unescaped = Some(ix);
69 }
70 // Escaped since `<` is used in opening HTML tags. `<` is used since Markdown
71 // supports HTML entities, and this allows the text to be used directly in HTML.
72 '<' => {
73 match start_of_unescaped {
74 None => {}
75 Some(start_of_unescaped) => {
76 write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
77 }
78 }
79 write!(formatter, "<")?;
80 start_of_unescaped = None;
81 }
82 // Escaped since `>` is used for blockquotes. `>` is used since Markdown supports
83 // HTML entities, and this allows the text to be used directly in HTML.
84 '>' => {
85 match start_of_unescaped {
86 None => {}
87 Some(start_of_unescaped) => {
88 write!(formatter, "{}", &self.0[start_of_unescaped..ix])?;
89 }
90 }
91 write!(formatter, ">")?;
92 start_of_unescaped = None;
93 }
94 _ => {
95 if start_of_unescaped.is_none() {
96 start_of_unescaped = Some(ix);
97 }
98 }
99 }
100 }
101 if let Some(start_of_unescaped) = start_of_unescaped {
102 write!(formatter, "{}", &self.0[start_of_unescaped..])?;
103 }
104 Ok(())
105 }
106}
107
108impl Display for MarkdownInlineCode<'_> {
109 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
110 // Apache License 2.0, same as this crate.
111 //
112 // Copied from `pulldown-cmark-to-cmark-20.0.0` with modifications:
113 //
114 // * Handling of all whitespace. pulldown-cmark-to-cmark is anticipating
115 // `Code` events parsed by pulldown-cmark.
116 //
117 // https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L290
118
119 let mut all_whitespace = true;
120 let text = self
121 .0
122 .chars()
123 .map(|c| {
124 if c.is_whitespace() {
125 ' '
126 } else {
127 all_whitespace = false;
128 c
129 }
130 })
131 .collect::<String>();
132
133 // When inline code has leading and trailing ' ' characters, additional space is needed
134 // to escape it, unless all characters are space.
135 if all_whitespace {
136 write!(formatter, "`{text}`")
137 } else {
138 // More backticks are needed to delimit the inline code than the maximum number of
139 // backticks in a consecutive run.
140 let backticks = "`".repeat(count_max_consecutive_chars(&text, '`') + 1);
141 let space = match text.as_bytes() {
142 &[b'`', ..] | &[.., b'`'] => " ", // Space needed to separate backtick.
143 &[b' ', .., b' '] => " ", // Space needed to escape inner space.
144 _ => "", // No space needed.
145 };
146 write!(formatter, "{backticks}{space}{text}{space}{backticks}")
147 }
148 }
149}
150
151impl Display for MarkdownCodeBlock<'_> {
152 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
153 let tag = self.tag;
154 let text = self.text;
155 let backticks = "`".repeat(3.max(count_max_consecutive_chars(text, '`') + 1));
156 write!(formatter, "{backticks}{tag}\n{text}\n{backticks}\n")
157 }
158}
159
160// Copied from `pulldown-cmark-to-cmark-20.0.0` with changed names.
161// https://github.com/Byron/pulldown-cmark-to-cmark/blob/3c850de2d3d1d79f19ca5f375e1089a653cf3ff7/src/lib.rs#L1063
162// Apache License 2.0, same as this code.
163fn count_max_consecutive_chars(text: &str, search: char) -> usize {
164 let mut in_search_chars = false;
165 let mut max_count = 0;
166 let mut cur_count = 0;
167
168 for ch in text.chars() {
169 if ch == search {
170 cur_count += 1;
171 in_search_chars = true;
172 } else if in_search_chars {
173 max_count = max_count.max(cur_count);
174 cur_count = 0;
175 in_search_chars = false;
176 }
177 }
178 max_count.max(cur_count)
179}
180
181#[cfg(test)]
182mod tests {
183 use super::*;
184
185 #[test]
186 fn test_markdown_escaped() {
187 let input = r#"
188 # Heading
189
190 Another heading
191 ===
192
193 Another heading variant
194 ---
195
196 Paragraph with [link](https://example.com) and `code`, *emphasis*, and ~strikethrough~.
197
198 ```
199 code block
200 ```
201
202 List with varying leaders:
203 - Item 1
204 * Item 2
205 + Item 3
206
207 Some math: $`\sqrt{3x-1}+(1+x)^2`$
208
209 HTML entity:
210 "#;
211
212 let expected = r#"
213 \# Heading
214
215 Another heading
216 \=\=\=
217
218 Another heading variant
219 \-\-\-
220
221 Paragraph with \[link](https://example.com) and \`code\`, \*emphasis\*, and \~strikethrough\~.
222
223 \`\`\`
224 code block
225 \`\`\`
226
227 List with varying leaders:
228 \- Item 1
229 \* Item 2
230 \+ Item 3
231
232 Some math: \$\`\\sqrt{3x\-1}\+(1\+x)\^2\`\$
233
234 HTML entity: \
235 "#;
236
237 assert_eq!(MarkdownEscaped(input).to_string(), expected);
238 }
239
240 #[test]
241 fn test_markdown_inline_code() {
242 assert_eq!(MarkdownInlineCode(" ").to_string(), "` `");
243 assert_eq!(MarkdownInlineCode("text").to_string(), "`text`");
244 assert_eq!(MarkdownInlineCode("text ").to_string(), "`text `");
245 assert_eq!(MarkdownInlineCode(" text ").to_string(), "` text `");
246 assert_eq!(MarkdownInlineCode("`").to_string(), "`` ` ``");
247 assert_eq!(MarkdownInlineCode("``").to_string(), "``` `` ```");
248 assert_eq!(MarkdownInlineCode("`text`").to_string(), "`` `text` ``");
249 assert_eq!(
250 MarkdownInlineCode("some `text` no leading or trailing backticks").to_string(),
251 "``some `text` no leading or trailing backticks``"
252 );
253 }
254
255 #[test]
256 fn test_count_max_consecutive_chars() {
257 assert_eq!(
258 count_max_consecutive_chars("``a```b``", '`'),
259 3,
260 "the highest seen consecutive segment of backticks counts"
261 );
262 assert_eq!(
263 count_max_consecutive_chars("```a``b`", '`'),
264 3,
265 "it can't be downgraded later"
266 );
267 }
268}