html_to_markdown_rs/
wrapper.rs

1//! Text wrapping functionality for Markdown output.
2//!
3//! This module provides text wrapping capabilities similar to Python's textwrap.fill(),
4//! specifically designed to work with Markdown content while preserving formatting.
5
6use crate::options::ConversionOptions;
7
8/// Wrap text at specified width while preserving Markdown formatting.
9///
10/// This function wraps paragraphs of text at the specified width, but:
11/// - Does not break long words
12/// - Does not break on hyphens
13/// - Preserves Markdown formatting (links, bold, etc.)
14/// - Only wraps paragraph content, not headers, lists, code blocks, etc.
15pub fn wrap_markdown(markdown: &str, options: &ConversionOptions) -> String {
16    if !options.wrap {
17        return markdown.to_string();
18    }
19
20    let mut result = String::with_capacity(markdown.len());
21    let mut in_code_block = false;
22    let mut in_paragraph = false;
23    let mut paragraph_buffer = String::new();
24
25    for line in markdown.lines() {
26        let trimmed = line.trim_start();
27        let is_code_fence = trimmed.starts_with("```");
28        let is_indented_code = line.starts_with("    ")
29            && !is_list_like(trimmed)
30            && !is_numbered_list(trimmed)
31            && !is_heading(trimmed)
32            && !trimmed.starts_with('>')
33            && !trimmed.starts_with('|');
34
35        if is_code_fence || is_indented_code {
36            if in_paragraph && !paragraph_buffer.is_empty() {
37                result.push_str(&wrap_line(&paragraph_buffer, options.wrap_width));
38                result.push_str("\n\n");
39                paragraph_buffer.clear();
40                in_paragraph = false;
41            }
42
43            if is_code_fence {
44                in_code_block = !in_code_block;
45            }
46            result.push_str(line);
47            result.push('\n');
48            continue;
49        }
50
51        if in_code_block {
52            result.push_str(line);
53            result.push('\n');
54            continue;
55        }
56
57        let is_structural = is_heading(trimmed)
58            || is_list_like(trimmed)
59            || is_numbered_list(trimmed)
60            || trimmed.starts_with('>')
61            || trimmed.starts_with('|')
62            || trimmed.starts_with('=');
63
64        if is_structural {
65            if in_paragraph && !paragraph_buffer.is_empty() {
66                result.push_str(&wrap_line(&paragraph_buffer, options.wrap_width));
67                result.push_str("\n\n");
68                paragraph_buffer.clear();
69                in_paragraph = false;
70            }
71
72            result.push_str(line);
73            result.push('\n');
74            continue;
75        }
76
77        if line.trim().is_empty() {
78            if in_paragraph && !paragraph_buffer.is_empty() {
79                result.push_str(&wrap_line(&paragraph_buffer, options.wrap_width));
80                result.push_str("\n\n");
81                paragraph_buffer.clear();
82                in_paragraph = false;
83            } else if !in_paragraph {
84                result.push('\n');
85            }
86            continue;
87        }
88
89        if in_paragraph {
90            paragraph_buffer.push(' ');
91        }
92        paragraph_buffer.push_str(line.trim());
93        in_paragraph = true;
94    }
95
96    if in_paragraph && !paragraph_buffer.is_empty() {
97        result.push_str(&wrap_line(&paragraph_buffer, options.wrap_width));
98        result.push_str("\n\n");
99    }
100
101    result
102}
103
104fn is_list_like(trimmed: &str) -> bool {
105    matches!(trimmed.chars().next(), Some('-' | '*' | '+'))
106}
107
108fn is_numbered_list(trimmed: &str) -> bool {
109    let token = trimmed.split_whitespace().next().unwrap_or("");
110    if token.is_empty() || !(token.ends_with('.') || token.ends_with(')')) {
111        return false;
112    }
113
114    let digits = token.trim_end_matches(['.', ')']);
115    !digits.is_empty() && digits.chars().all(|c| c.is_ascii_digit())
116}
117
118fn is_heading(trimmed: &str) -> bool {
119    trimmed.starts_with('#')
120}
121
122/// Wrap a single line of text at the specified width.
123///
124/// This function wraps text without breaking long words or on hyphens,
125/// similar to Python's textwrap.fill() with break_long_words=False and break_on_hyphens=False.
126fn wrap_line(text: &str, width: usize) -> String {
127    if text.len() <= width {
128        return text.to_string();
129    }
130
131    let mut result = String::new();
132    let mut current_line = String::new();
133    let words: Vec<&str> = text.split_whitespace().collect();
134
135    for word in words {
136        if current_line.is_empty() {
137            current_line.push_str(word);
138        } else if current_line.len() + 1 + word.len() <= width {
139            current_line.push(' ');
140            current_line.push_str(word);
141        } else {
142            if !result.is_empty() {
143                result.push('\n');
144            }
145            result.push_str(&current_line);
146            current_line.clear();
147            current_line.push_str(word);
148        }
149    }
150
151    if !current_line.is_empty() {
152        if !result.is_empty() {
153            result.push('\n');
154        }
155        result.push_str(&current_line);
156    }
157
158    result
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use crate::options::ConversionOptions;
165
166    #[test]
167    fn test_wrap_line_short() {
168        let text = "Short text";
169        let wrapped = wrap_line(text, 80);
170        assert_eq!(wrapped, "Short text");
171    }
172
173    #[test]
174    fn test_wrap_line_long() {
175        let text = "123456789 123456789";
176        let wrapped = wrap_line(text, 10);
177        assert_eq!(wrapped, "123456789\n123456789");
178    }
179
180    #[test]
181    fn test_wrap_line_no_break_long_words() {
182        let text = "12345678901 12345";
183        let wrapped = wrap_line(text, 10);
184        assert_eq!(wrapped, "12345678901\n12345");
185    }
186
187    #[test]
188    fn test_wrap_markdown_disabled() {
189        let markdown = "This is a very long line that would normally be wrapped at 40 characters";
190        let options = ConversionOptions {
191            wrap: false,
192            ..Default::default()
193        };
194        let result = wrap_markdown(markdown, &options);
195        assert_eq!(result, markdown);
196    }
197
198    #[test]
199    fn test_wrap_markdown_paragraph() {
200        let markdown = "This is a very long line that would normally be wrapped at 40 characters\n\n";
201        let options = ConversionOptions {
202            wrap: true,
203            wrap_width: 40,
204            ..Default::default()
205        };
206        let result = wrap_markdown(markdown, &options);
207        assert!(result.lines().all(|line| line.len() <= 40 || line.trim().is_empty()));
208    }
209
210    #[test]
211    fn test_wrap_markdown_preserves_code() {
212        let markdown = "```\nThis is a very long line in a code block that should not be wrapped\n```\n";
213        let options = ConversionOptions {
214            wrap: true,
215            wrap_width: 40,
216            ..Default::default()
217        };
218        let result = wrap_markdown(markdown, &options);
219        assert!(result.contains("This is a very long line in a code block that should not be wrapped"));
220    }
221
222    #[test]
223    fn test_wrap_markdown_preserves_headings() {
224        let markdown = "# This is a very long heading that should not be wrapped even if it exceeds the width\n\n";
225        let options = ConversionOptions {
226            wrap: true,
227            wrap_width: 40,
228            ..Default::default()
229        };
230        let result = wrap_markdown(markdown, &options);
231        assert!(
232            result.contains("# This is a very long heading that should not be wrapped even if it exceeds the width")
233        );
234    }
235
236    #[test]
237    fn wrap_markdown_preserves_indented_lists_with_links() {
238        let markdown = "- [A](#a)\n  - [B](#b)\n  - [C](#c)\n";
239        let options = ConversionOptions {
240            wrap: true,
241            wrap_width: 20,
242            ..Default::default()
243        };
244
245        let result = wrap_markdown(markdown, &options);
246        let expected = "- [A](#a)\n  - [B](#b)\n  - [C](#c)\n";
247        assert_eq!(result, expected);
248    }
249}