Skip to main content

mermaid_cli/render/
markdown.rs

1use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
2use ratatui::macros::{line, span};
3use ratatui::style::{Color, Modifier, Style};
4use ratatui::text::{Line, Span};
5use unicode_width::UnicodeWidthStr;
6
7#[derive(Debug, Clone)]
8struct ListState {
9    next_number: Option<u64>,
10}
11
12/// Parse markdown and convert to styled ratatui Lines
13pub fn parse_markdown(input: &str) -> Vec<Line<'static>> {
14    let mut options = Options::empty();
15    options.insert(Options::ENABLE_STRIKETHROUGH);
16    options.insert(Options::ENABLE_TABLES);
17
18    let parser = Parser::new_ext(input, options);
19    let mut lines = Vec::new();
20    let mut current_line_spans = Vec::new();
21    let mut style_stack = vec![Style::default()];
22    let mut in_code_block = false;
23    let mut code_block_content = String::new();
24    let mut list_stack: Vec<ListState> = Vec::new();
25
26    // Table state
27    let mut in_table = false;
28    let mut table_rows: Vec<Vec<String>> = Vec::new();
29    let mut current_row: Vec<String> = Vec::new();
30    let mut current_cell = String::new();
31    let mut table_header_len: usize = 0;
32
33    for event in parser {
34        match event {
35            Event::Start(tag) => {
36                let new_style = match tag {
37                    Tag::Heading { level, .. } => {
38                        // Start new line for headers
39                        if !current_line_spans.is_empty() {
40                            lines.push(Line::from(std::mem::take(&mut current_line_spans)));
41                        }
42
43                        // Add blank line before heading (except for first heading)
44                        if !lines.is_empty() {
45                            lines.push(Line::from(""));
46                        }
47
48                        // Apply style based on level (without visible prefix)
49                        match level {
50                            HeadingLevel::H1 => Style::new().fg(Color::Cyan).bold(),
51                            HeadingLevel::H2 => Style::new().fg(Color::Blue).bold(),
52                            HeadingLevel::H3 => Style::new().fg(Color::Green).bold(),
53                            _ => Style::new().fg(Color::Yellow).bold(),
54                        }
55                    },
56                    Tag::Emphasis => style_stack.last().copied().unwrap_or_default().italic(),
57                    Tag::Strong => style_stack.last().copied().unwrap_or_default().bold(),
58                    Tag::Strikethrough => style_stack
59                        .last()
60                        .copied()
61                        .unwrap_or_default()
62                        .crossed_out(),
63                    Tag::CodeBlock(kind) => {
64                        in_code_block = true;
65                        code_block_content.clear();
66                        // Start new line for code block
67                        if !current_line_spans.is_empty() {
68                            lines.push(Line::from(std::mem::take(&mut current_line_spans)));
69                        }
70                        // Show language label if present, otherwise just a blank separator
71                        let lang = match kind {
72                            CodeBlockKind::Fenced(lang) => lang.to_string(),
73                            CodeBlockKind::Indented => "".to_string(),
74                        };
75                        if !lang.is_empty() {
76                            lines.push(line![span!(Color::DarkGray; &lang)]);
77                        }
78                        Style::default().fg(Color::Gray)
79                    },
80                    Tag::List(start) => {
81                        list_stack.push(ListState { next_number: start });
82                        if !current_line_spans.is_empty() {
83                            lines.push(Line::from(std::mem::take(&mut current_line_spans)));
84                        }
85                        style_stack.last().copied().unwrap_or_default()
86                    },
87                    Tag::Item => {
88                        // Add marker with 2-space base indentation plus nesting levels.
89                        // depth=1: 2 spaces, depth=2: 4 spaces, etc.
90                        let indent = "  ".repeat(list_stack.len());
91                        let marker = if let Some(state) = list_stack.last_mut() {
92                            if let Some(current) = state.next_number {
93                                state.next_number = Some(current + 1);
94                                format!("{}. ", current)
95                            } else {
96                                "• ".to_string()
97                            }
98                        } else {
99                            "• ".to_string()
100                        };
101                        current_line_spans.push(Span::raw(indent));
102                        current_line_spans
103                            .push(Span::styled(marker, Style::default().fg(Color::Yellow)));
104                        style_stack.last().copied().unwrap_or_default()
105                    },
106                    Tag::Table(_alignments) => {
107                        in_table = true;
108                        table_rows.clear();
109                        table_header_len = 0;
110                        // Flush any pending spans
111                        if !current_line_spans.is_empty() {
112                            lines.push(Line::from(std::mem::take(&mut current_line_spans)));
113                        }
114                        style_stack.last().copied().unwrap_or_default()
115                    },
116                    Tag::TableHead => {
117                        current_row.clear();
118                        style_stack.last().copied().unwrap_or_default()
119                    },
120                    Tag::TableRow => {
121                        current_row.clear();
122                        style_stack.last().copied().unwrap_or_default()
123                    },
124                    Tag::TableCell => {
125                        current_cell.clear();
126                        style_stack.last().copied().unwrap_or_default()
127                    },
128                    Tag::Link { .. } => {
129                        current_line_spans
130                            .push(Span::styled("[", Style::default().fg(Color::Blue)));
131                        Style::default()
132                            .fg(Color::Blue)
133                            .add_modifier(Modifier::UNDERLINED)
134                    },
135                    Tag::BlockQuote(_) => {
136                        if !current_line_spans.is_empty() {
137                            lines.push(Line::from(std::mem::take(&mut current_line_spans)));
138                        }
139                        current_line_spans
140                            .push(Span::styled("│ ", Style::default().fg(Color::DarkGray)));
141                        Style::default()
142                            .fg(Color::Gray)
143                            .add_modifier(Modifier::ITALIC)
144                    },
145                    _ => style_stack.last().copied().unwrap_or_default(),
146                };
147                style_stack.push(new_style);
148            },
149            Event::End(tag) => {
150                style_stack.pop();
151                match tag {
152                    TagEnd::Heading(_) => {
153                        if !current_line_spans.is_empty() {
154                            lines.push(Line::from(std::mem::take(&mut current_line_spans)));
155                        }
156                        // Don't add blank line here - let lists flow directly from headings
157                        // Blank line before next heading is added by Tag::Heading
158                    },
159                    TagEnd::Paragraph | TagEnd::Item => {
160                        if !current_line_spans.is_empty() {
161                            lines.push(Line::from(std::mem::take(&mut current_line_spans)));
162                        }
163                    },
164                    TagEnd::CodeBlock => {
165                        in_code_block = false;
166                        // Render code block content (no fence markers)
167                        for line in code_block_content.lines() {
168                            lines.push(Line::from(vec![Span::styled(
169                                line.to_string(),
170                                Style::default().fg(Color::Gray),
171                            )]));
172                        }
173                        code_block_content.clear();
174                    },
175                    TagEnd::List(_) => {
176                        let _ = list_stack.pop();
177                        // Add blank line after list ends (when returning to depth 0)
178                        if list_stack.is_empty() {
179                            lines.push(Line::from(""));
180                        }
181                    },
182                    TagEnd::TableCell => {
183                        current_row.push(std::mem::take(&mut current_cell));
184                    },
185                    TagEnd::TableHead => {
186                        table_header_len = current_row.len();
187                        table_rows.push(std::mem::take(&mut current_row));
188                    },
189                    TagEnd::TableRow => {
190                        table_rows.push(std::mem::take(&mut current_row));
191                    },
192                    TagEnd::Table => {
193                        in_table = false;
194                        // Compute column widths in **display cells**, not bytes.
195                        // CJK / emoji cells (3 bytes per codepoint, 2 cells)
196                        // would otherwise inflate widths by ~3× and misalign
197                        // every other row. Padding below also uses cell-based
198                        // computation rather than format!'s char-count default.
199                        let num_cols = table_rows.iter().map(|r| r.len()).max().unwrap_or(0);
200                        let mut col_widths = vec![0usize; num_cols];
201                        for row in &table_rows {
202                            for (i, cell) in row.iter().enumerate() {
203                                if i < num_cols {
204                                    col_widths[i] = col_widths[i].max(cell.width());
205                                }
206                            }
207                        }
208                        // Minimum column width of 3
209                        for w in &mut col_widths {
210                            *w = (*w).max(3);
211                        }
212
213                        let border_style = Style::default().fg(Color::DarkGray);
214                        let header_style = Style::default().fg(Color::Cyan).bold();
215                        let cell_style = Style::default().fg(Color::White);
216
217                        for (row_idx, row) in table_rows.iter().enumerate() {
218                            let mut spans = Vec::new();
219                            spans.push(Span::styled("| ", border_style));
220                            for (col_idx, cell) in row.iter().enumerate() {
221                                let width = col_widths.get(col_idx).copied().unwrap_or(3);
222                                // Pad to `width` display cells (not chars or
223                                // bytes). `{:<width$}` would pad to chars,
224                                // which is wrong for CJK (each char is 2 cells
225                                // but counted as 1 char by the formatter).
226                                let cell_w = cell.width();
227                                let padding = width.saturating_sub(cell_w);
228                                let padded = format!("{}{}", cell, " ".repeat(padding));
229                                let style = if row_idx == 0 && table_header_len > 0 {
230                                    header_style
231                                } else {
232                                    cell_style
233                                };
234                                spans.push(Span::styled(padded, style));
235                                spans.push(Span::styled(" | ", border_style));
236                            }
237                            lines.push(Line::from(spans));
238
239                            // Add separator after header row. `width` is now
240                            // in cells; "-" is 1 byte / 1 cell so repeating
241                            // produces exactly `width` cells of dashes.
242                            if row_idx == 0 && table_header_len > 0 {
243                                let mut sep_spans = Vec::new();
244                                sep_spans.push(Span::styled("|-", border_style));
245                                for (col_idx, _) in row.iter().enumerate() {
246                                    let width = col_widths.get(col_idx).copied().unwrap_or(3);
247                                    let dashes = "-".repeat(width);
248                                    sep_spans.push(Span::styled(dashes, border_style));
249                                    sep_spans.push(Span::styled("-|-", border_style));
250                                }
251                                lines.push(Line::from(sep_spans));
252                            }
253                        }
254
255                        // Blank line after table
256                        lines.push(Line::from(""));
257                        table_rows.clear();
258                    },
259                    TagEnd::Link => {
260                        current_line_spans
261                            .push(Span::styled("]", Style::default().fg(Color::Blue)));
262                    },
263                    TagEnd::BlockQuote(_) => {
264                        if !current_line_spans.is_empty() {
265                            lines.push(Line::from(std::mem::take(&mut current_line_spans)));
266                        }
267                    },
268                    _ => {},
269                }
270            },
271            Event::Text(text) => {
272                if in_code_block {
273                    code_block_content.push_str(&text);
274                } else if in_table {
275                    current_cell.push_str(&text);
276                } else {
277                    let style = style_stack.last().copied().unwrap_or_default();
278                    current_line_spans.push(Span::styled(text.to_string(), style));
279                }
280            },
281            Event::Code(code) => {
282                if in_table {
283                    current_cell.push_str(&code);
284                } else {
285                    let style = Style::default()
286                        .fg(Color::Yellow)
287                        .bg(Color::Rgb(40, 40, 40));
288                    current_line_spans.push(Span::styled(format!(" {} ", code), style));
289                }
290            },
291            Event::SoftBreak | Event::HardBreak => {
292                if !current_line_spans.is_empty() {
293                    lines.push(Line::from(std::mem::take(&mut current_line_spans)));
294                }
295            },
296            _ => {},
297        }
298    }
299
300    // Add any remaining spans as a line
301    if !current_line_spans.is_empty() {
302        lines.push(Line::from(current_line_spans));
303    }
304
305    lines
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    /// Helper: flatten all spans in all lines into a single string
313    fn lines_to_text(lines: &[Line]) -> String {
314        lines
315            .iter()
316            .map(|line| {
317                line.spans
318                    .iter()
319                    .map(|s| s.content.as_ref())
320                    .collect::<String>()
321            })
322            .collect::<Vec<_>>()
323            .join("\n")
324    }
325
326    #[test]
327    fn test_plain_text() {
328        let lines = parse_markdown("Hello, world!");
329        assert!(!lines.is_empty());
330        assert!(lines_to_text(&lines).contains("Hello, world!"));
331    }
332
333    #[test]
334    fn test_heading_levels() {
335        let lines = parse_markdown("# H1\n## H2\n### H3");
336        let text = lines_to_text(&lines);
337        assert!(text.contains("H1"));
338        assert!(text.contains("H2"));
339        assert!(text.contains("H3"));
340
341        // Headings should have different colors (just check they parse without panic)
342        assert!(lines.len() >= 3);
343    }
344
345    #[test]
346    fn test_code_block() {
347        let input = "```rust\nfn main() {}\n```";
348        let lines = parse_markdown(input);
349        let text = lines_to_text(&lines);
350        assert!(text.contains("fn main() {}"));
351        // Language label should appear
352        assert!(text.contains("rust"));
353    }
354
355    #[test]
356    fn test_code_block_no_lang() {
357        let input = "```\nsome code\n```";
358        let lines = parse_markdown(input);
359        let text = lines_to_text(&lines);
360        assert!(text.contains("some code"));
361    }
362
363    #[test]
364    fn test_inline_code() {
365        let lines = parse_markdown("Use `cargo build` to compile");
366        let text = lines_to_text(&lines);
367        assert!(text.contains("cargo build"));
368    }
369
370    #[test]
371    fn test_unordered_list() {
372        let input = "- Item 1\n- Item 2\n- Item 3";
373        let lines = parse_markdown(input);
374        let text = lines_to_text(&lines);
375        assert!(text.contains("Item 1"));
376        assert!(text.contains("Item 2"));
377        assert!(text.contains("Item 3"));
378        // Should have bullet characters
379        assert!(text.contains("•"));
380    }
381
382    #[test]
383    fn test_ordered_list_preserves_numbers() {
384        let input = "1. First\n2. Second\n3. Third";
385        let lines = parse_markdown(input);
386        let text = lines_to_text(&lines);
387        assert!(text.contains("1. First"));
388        assert!(text.contains("2. Second"));
389        assert!(text.contains("3. Third"));
390        assert!(!text.contains("• First"));
391    }
392
393    #[test]
394    fn test_nested_list() {
395        let input = "- Outer\n  - Inner";
396        let lines = parse_markdown(input);
397        let text = lines_to_text(&lines);
398        assert!(text.contains("Outer"));
399        assert!(text.contains("Inner"));
400    }
401
402    #[test]
403    fn test_bold_and_italic() {
404        let lines = parse_markdown("**bold** and *italic*");
405        let text = lines_to_text(&lines);
406        assert!(text.contains("bold"));
407        assert!(text.contains("italic"));
408    }
409
410    #[test]
411    fn test_link() {
412        let lines = parse_markdown("[click here](https://example.com)");
413        let text = lines_to_text(&lines);
414        assert!(text.contains("click here"));
415        assert!(text.contains("["));
416        assert!(text.contains("]"));
417    }
418
419    #[test]
420    fn test_blockquote() {
421        let lines = parse_markdown("> Quoted text");
422        let text = lines_to_text(&lines);
423        assert!(text.contains("Quoted text"));
424        assert!(text.contains("│"));
425    }
426
427    #[test]
428    fn test_table() {
429        let input = "| Header1 | Header2 |\n|---------|--------|\n| Cell1   | Cell2  |";
430        let lines = parse_markdown(input);
431        let text = lines_to_text(&lines);
432        assert!(text.contains("Header1"));
433        assert!(text.contains("Cell1"));
434        assert!(text.contains("|"));
435    }
436
437    #[test]
438    fn test_strikethrough() {
439        let lines = parse_markdown("~~deleted~~");
440        let text = lines_to_text(&lines);
441        assert!(text.contains("deleted"));
442    }
443
444    #[test]
445    fn test_empty_input() {
446        let lines = parse_markdown("");
447        assert!(lines.is_empty());
448    }
449
450    #[test]
451    fn test_multiple_paragraphs() {
452        let lines = parse_markdown("Paragraph 1\n\nParagraph 2");
453        let text = lines_to_text(&lines);
454        assert!(text.contains("Paragraph 1"));
455        assert!(text.contains("Paragraph 2"));
456    }
457
458    /// Tables with CJK cells used to misalign because `cell.len()` reported
459    /// 3 bytes for each 2-cell character. After the fix, column widths are
460    /// computed in display cells and padding emits enough trailing spaces
461    /// to make every row visually line up.
462    #[test]
463    fn table_column_widths_use_display_cells() {
464        let input = "| Name | Score |\n|------|-------|\n| 你好 | 100   |\n| ab   | 50    |";
465        let lines = parse_markdown(input);
466
467        // Find the body row containing "你好" and the row containing "ab"
468        // (skipping header + separator).
469        let mut cjk_row_width = 0usize;
470        let mut ascii_row_width = 0usize;
471        for line in &lines {
472            let rendered: String = line.spans.iter().map(|s| s.content.as_ref()).collect();
473            if rendered.contains("你好") {
474                cjk_row_width = rendered.width();
475            } else if rendered.contains("ab") && rendered.contains("|") {
476                ascii_row_width = rendered.width();
477            }
478        }
479        assert!(cjk_row_width > 0, "did not find the CJK body row");
480        assert!(ascii_row_width > 0, "did not find the ASCII body row");
481        assert_eq!(
482            cjk_row_width, ascii_row_width,
483            "CJK and ASCII rows must have equal display width for the table to align"
484        );
485    }
486}