use_markdown/
plain_text.rs1use crate::code_fence::{FenceDelimiter, is_closing_fence, parse_opening_fence};
2use crate::frontmatter::frontmatter_line_count;
3use crate::heading::parse_heading_line;
4use crate::link::{InlineReferenceKind, parse_inline_reference_at};
5
6pub fn markdown_to_plain_text(markdown: &str) -> String {
8 let frontmatter_lines = frontmatter_line_count(markdown);
9 let mut lines = Vec::new();
10 let mut active_fence: Option<FenceDelimiter> = None;
11
12 for (index, line) in markdown.lines().enumerate() {
13 if index < frontmatter_lines {
14 continue;
15 }
16
17 if let Some(delimiter) = active_fence {
18 if is_closing_fence(line, delimiter) {
19 active_fence = None;
20 continue;
21 }
22
23 let trimmed = line.trim();
24 if !trimmed.is_empty() {
25 lines.push(trimmed.to_owned());
26 }
27 continue;
28 }
29
30 if let Some(opening) = parse_opening_fence(line) {
31 active_fence = Some(opening.delimiter);
32 continue;
33 }
34
35 if crate::is_horizontal_rule(line) {
36 continue;
37 }
38
39 let mut candidate = strip_blockquote_markers(line);
40 if let Some((_, heading_text)) = parse_heading_line(candidate) {
41 candidate = heading_text;
42 } else if let Some(content) = crate::ordered_list_item_content(candidate) {
43 candidate = content;
44 } else if let Some(content) = crate::unordered_list_item_content(candidate) {
45 candidate = content;
46 }
47
48 let cleaned = inline_markdown_to_text(candidate);
49 if !cleaned.is_empty() {
50 lines.push(cleaned);
51 }
52 }
53
54 lines.join("\n")
55}
56
57pub(crate) fn inline_markdown_to_text(input: &str) -> String {
58 let mut output = String::new();
59 let mut index = 0usize;
60 let bytes = input.as_bytes();
61
62 while index < bytes.len() {
63 if bytes[index] == b'!'
64 && bytes.get(index + 1) == Some(&b'[')
65 && let Some((reference, next_index)) =
66 parse_inline_reference_at(input, index, InlineReferenceKind::Image, 0)
67 {
68 output.push_str(reference.label.trim());
69 index = next_index;
70 continue;
71 }
72
73 if bytes[index] == b'['
74 && (index == 0 || bytes[index - 1] != b'!')
75 && let Some((reference, next_index)) =
76 parse_inline_reference_at(input, index, InlineReferenceKind::Link, 0)
77 {
78 output.push_str(reference.label.trim());
79 index = next_index;
80 continue;
81 }
82
83 let Some(character) = input[index..].chars().next() else {
84 break;
85 };
86
87 if character == '\\' {
88 let next_index = index + character.len_utf8();
89 if let Some(next_character) = input[next_index..].chars().next() {
90 output.push(next_character);
91 index = next_index + next_character.len_utf8();
92 } else {
93 index = next_index;
94 }
95 continue;
96 }
97
98 if matches!(character, '*' | '_' | '`' | '~') {
99 index += character.len_utf8();
100 continue;
101 }
102
103 output.push(character);
104 index += character.len_utf8();
105 }
106
107 collapse_whitespace(&output)
108}
109
110fn strip_blockquote_markers(line: &str) -> &str {
111 let mut candidate = line.trim_start();
112 while let Some(stripped) = candidate.strip_prefix('>') {
113 candidate = stripped.trim_start();
114 }
115 candidate
116}
117
118fn collapse_whitespace(input: &str) -> String {
119 input.split_whitespace().collect::<Vec<_>>().join(" ")
120}