html_to_markdown_rs/
wrapper.rs1use crate::options::ConversionOptions;
7
8pub fn wrap_markdown(markdown: &str, options: &ConversionOptions) -> String {
16 if !options.wrap {
17 return markdown.to_string();
18 }
19
20 let mut result = String::with_capacity(markdown.len());
21 let mut in_code_block = false;
22 let mut in_paragraph = false;
23 let mut paragraph_buffer = String::new();
24
25 for line in markdown.lines() {
26 let trimmed = line.trim_start();
27 let is_code_fence = trimmed.starts_with("```");
28 let is_indented_code = line.starts_with(" ")
29 && !is_list_like(trimmed)
30 && !is_numbered_list(trimmed)
31 && !is_heading(trimmed)
32 && !trimmed.starts_with('>')
33 && !trimmed.starts_with('|');
34
35 if is_code_fence || is_indented_code {
36 if in_paragraph && !paragraph_buffer.is_empty() {
37 result.push_str(&wrap_line(¶graph_buffer, options.wrap_width));
38 result.push_str("\n\n");
39 paragraph_buffer.clear();
40 in_paragraph = false;
41 }
42
43 if is_code_fence {
44 in_code_block = !in_code_block;
45 }
46 result.push_str(line);
47 result.push('\n');
48 continue;
49 }
50
51 if in_code_block {
52 result.push_str(line);
53 result.push('\n');
54 continue;
55 }
56
57 let is_structural = is_heading(trimmed)
58 || is_list_like(trimmed)
59 || is_numbered_list(trimmed)
60 || trimmed.starts_with('>')
61 || trimmed.starts_with('|')
62 || trimmed.starts_with('=');
63
64 if is_structural {
65 if in_paragraph && !paragraph_buffer.is_empty() {
66 result.push_str(&wrap_line(¶graph_buffer, options.wrap_width));
67 result.push_str("\n\n");
68 paragraph_buffer.clear();
69 in_paragraph = false;
70 }
71
72 result.push_str(line);
73 result.push('\n');
74 continue;
75 }
76
77 if line.trim().is_empty() {
78 if in_paragraph && !paragraph_buffer.is_empty() {
79 result.push_str(&wrap_line(¶graph_buffer, options.wrap_width));
80 result.push_str("\n\n");
81 paragraph_buffer.clear();
82 in_paragraph = false;
83 } else if !in_paragraph {
84 result.push('\n');
85 }
86 continue;
87 }
88
89 if in_paragraph {
90 paragraph_buffer.push(' ');
91 }
92 paragraph_buffer.push_str(line.trim());
93 in_paragraph = true;
94 }
95
96 if in_paragraph && !paragraph_buffer.is_empty() {
97 result.push_str(&wrap_line(¶graph_buffer, options.wrap_width));
98 result.push_str("\n\n");
99 }
100
101 result
102}
103
104fn is_list_like(trimmed: &str) -> bool {
105 matches!(trimmed.chars().next(), Some('-' | '*' | '+'))
106}
107
108fn is_numbered_list(trimmed: &str) -> bool {
109 let token = trimmed.split_whitespace().next().unwrap_or("");
110 if token.is_empty() || !(token.ends_with('.') || token.ends_with(')')) {
111 return false;
112 }
113
114 let digits = token.trim_end_matches(['.', ')']);
115 !digits.is_empty() && digits.chars().all(|c| c.is_ascii_digit())
116}
117
118fn is_heading(trimmed: &str) -> bool {
119 trimmed.starts_with('#')
120}
121
122fn wrap_line(text: &str, width: usize) -> String {
127 if text.len() <= width {
128 return text.to_string();
129 }
130
131 let mut result = String::new();
132 let mut current_line = String::new();
133 let words: Vec<&str> = text.split_whitespace().collect();
134
135 for word in words {
136 if current_line.is_empty() {
137 current_line.push_str(word);
138 } else if current_line.len() + 1 + word.len() <= width {
139 current_line.push(' ');
140 current_line.push_str(word);
141 } else {
142 if !result.is_empty() {
143 result.push('\n');
144 }
145 result.push_str(¤t_line);
146 current_line.clear();
147 current_line.push_str(word);
148 }
149 }
150
151 if !current_line.is_empty() {
152 if !result.is_empty() {
153 result.push('\n');
154 }
155 result.push_str(¤t_line);
156 }
157
158 result
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164 use crate::options::ConversionOptions;
165
166 #[test]
167 fn test_wrap_line_short() {
168 let text = "Short text";
169 let wrapped = wrap_line(text, 80);
170 assert_eq!(wrapped, "Short text");
171 }
172
173 #[test]
174 fn test_wrap_line_long() {
175 let text = "123456789 123456789";
176 let wrapped = wrap_line(text, 10);
177 assert_eq!(wrapped, "123456789\n123456789");
178 }
179
180 #[test]
181 fn test_wrap_line_no_break_long_words() {
182 let text = "12345678901 12345";
183 let wrapped = wrap_line(text, 10);
184 assert_eq!(wrapped, "12345678901\n12345");
185 }
186
187 #[test]
188 fn test_wrap_markdown_disabled() {
189 let markdown = "This is a very long line that would normally be wrapped at 40 characters";
190 let options = ConversionOptions {
191 wrap: false,
192 ..Default::default()
193 };
194 let result = wrap_markdown(markdown, &options);
195 assert_eq!(result, markdown);
196 }
197
198 #[test]
199 fn test_wrap_markdown_paragraph() {
200 let markdown = "This is a very long line that would normally be wrapped at 40 characters\n\n";
201 let options = ConversionOptions {
202 wrap: true,
203 wrap_width: 40,
204 ..Default::default()
205 };
206 let result = wrap_markdown(markdown, &options);
207 assert!(result.lines().all(|line| line.len() <= 40 || line.trim().is_empty()));
208 }
209
210 #[test]
211 fn test_wrap_markdown_preserves_code() {
212 let markdown = "```\nThis is a very long line in a code block that should not be wrapped\n```\n";
213 let options = ConversionOptions {
214 wrap: true,
215 wrap_width: 40,
216 ..Default::default()
217 };
218 let result = wrap_markdown(markdown, &options);
219 assert!(result.contains("This is a very long line in a code block that should not be wrapped"));
220 }
221
222 #[test]
223 fn test_wrap_markdown_preserves_headings() {
224 let markdown = "# This is a very long heading that should not be wrapped even if it exceeds the width\n\n";
225 let options = ConversionOptions {
226 wrap: true,
227 wrap_width: 40,
228 ..Default::default()
229 };
230 let result = wrap_markdown(markdown, &options);
231 assert!(
232 result.contains("# This is a very long heading that should not be wrapped even if it exceeds the width")
233 );
234 }
235
236 #[test]
237 fn wrap_markdown_preserves_indented_lists_with_links() {
238 let markdown = "- [A](#a)\n - [B](#b)\n - [C](#c)\n";
239 let options = ConversionOptions {
240 wrap: true,
241 wrap_width: 20,
242 ..Default::default()
243 };
244
245 let result = wrap_markdown(markdown, &options);
246 let expected = "- [A](#a)\n - [B](#b)\n - [C](#c)\n";
247 assert_eq!(result, expected);
248 }
249}