Skip to main content

use_markdown/
plain_text.rs

1use crate::code_fence::{FenceDelimiter, is_closing_fence, parse_opening_fence};
2use crate::frontmatter::frontmatter_line_count;
3use crate::heading::parse_heading_line;
4use crate::link::{InlineReferenceKind, parse_inline_reference_at};
5
6/// Converts Markdown into lightweight plain text.
7pub fn markdown_to_plain_text(markdown: &str) -> String {
8    let frontmatter_lines = frontmatter_line_count(markdown);
9    let mut lines = Vec::new();
10    let mut active_fence: Option<FenceDelimiter> = None;
11
12    for (index, line) in markdown.lines().enumerate() {
13        if index < frontmatter_lines {
14            continue;
15        }
16
17        if let Some(delimiter) = active_fence {
18            if is_closing_fence(line, delimiter) {
19                active_fence = None;
20                continue;
21            }
22
23            let trimmed = line.trim();
24            if !trimmed.is_empty() {
25                lines.push(trimmed.to_owned());
26            }
27            continue;
28        }
29
30        if let Some(opening) = parse_opening_fence(line) {
31            active_fence = Some(opening.delimiter);
32            continue;
33        }
34
35        if crate::is_horizontal_rule(line) {
36            continue;
37        }
38
39        let mut candidate = strip_blockquote_markers(line);
40        if let Some((_, heading_text)) = parse_heading_line(candidate) {
41            candidate = heading_text;
42        } else if let Some(content) = crate::ordered_list_item_content(candidate) {
43            candidate = content;
44        } else if let Some(content) = crate::unordered_list_item_content(candidate) {
45            candidate = content;
46        }
47
48        let cleaned = inline_markdown_to_text(candidate);
49        if !cleaned.is_empty() {
50            lines.push(cleaned);
51        }
52    }
53
54    lines.join("\n")
55}
56
57pub(crate) fn inline_markdown_to_text(input: &str) -> String {
58    let mut output = String::new();
59    let mut index = 0usize;
60    let bytes = input.as_bytes();
61
62    while index < bytes.len() {
63        if bytes[index] == b'!'
64            && bytes.get(index + 1) == Some(&b'[')
65            && let Some((reference, next_index)) =
66                parse_inline_reference_at(input, index, InlineReferenceKind::Image, 0)
67        {
68            output.push_str(reference.label.trim());
69            index = next_index;
70            continue;
71        }
72
73        if bytes[index] == b'['
74            && (index == 0 || bytes[index - 1] != b'!')
75            && let Some((reference, next_index)) =
76                parse_inline_reference_at(input, index, InlineReferenceKind::Link, 0)
77        {
78            output.push_str(reference.label.trim());
79            index = next_index;
80            continue;
81        }
82
83        let Some(character) = input[index..].chars().next() else {
84            break;
85        };
86
87        if character == '\\' {
88            let next_index = index + character.len_utf8();
89            if let Some(next_character) = input[next_index..].chars().next() {
90                output.push(next_character);
91                index = next_index + next_character.len_utf8();
92            } else {
93                index = next_index;
94            }
95            continue;
96        }
97
98        if matches!(character, '*' | '_' | '`' | '~') {
99            index += character.len_utf8();
100            continue;
101        }
102
103        output.push(character);
104        index += character.len_utf8();
105    }
106
107    collapse_whitespace(&output)
108}
109
110fn strip_blockquote_markers(line: &str) -> &str {
111    let mut candidate = line.trim_start();
112    while let Some(stripped) = candidate.strip_prefix('>') {
113        candidate = stripped.trim_start();
114    }
115    candidate
116}
117
118fn collapse_whitespace(input: &str) -> String {
119    input.split_whitespace().collect::<Vec<_>>().join(" ")
120}