html_to_markdown_rs/
wrapper.rs

1//! Text wrapping functionality for Markdown output.
2//!
3//! This module provides text wrapping capabilities similar to Python's textwrap.fill(),
4//! specifically designed to work with Markdown content while preserving formatting.
5
6use crate::options::ConversionOptions;
7
8/// Wrap text at specified width while preserving Markdown formatting.
9///
10/// This function wraps paragraphs of text at the specified width, but:
11/// - Does not break long words
12/// - Does not break on hyphens
13/// - Preserves Markdown formatting (links, bold, etc.)
14/// - Only wraps paragraph content, not headers, lists, code blocks, etc.
15pub fn wrap_markdown(markdown: &str, options: &ConversionOptions) -> String {
16    if !options.wrap {
17        return markdown.to_string();
18    }
19
20    let mut result = String::with_capacity(markdown.len());
21    let mut in_code_block = false;
22    let mut in_paragraph = false;
23    let mut paragraph_buffer = String::new();
24
25    for line in markdown.lines() {
26        if line.starts_with("```") || line.starts_with("    ") {
27            if in_paragraph && !paragraph_buffer.is_empty() {
28                result.push_str(&wrap_line(&paragraph_buffer, options.wrap_width));
29                result.push_str("\n\n");
30                paragraph_buffer.clear();
31                in_paragraph = false;
32            }
33
34            if line.starts_with("```") {
35                in_code_block = !in_code_block;
36            }
37            result.push_str(line);
38            result.push('\n');
39            continue;
40        }
41
42        if in_code_block {
43            result.push_str(line);
44            result.push('\n');
45            continue;
46        }
47
48        let is_structural = line.starts_with('#')
49            || line.starts_with('*')
50            || line.starts_with('-')
51            || line.starts_with('+')
52            || line.starts_with('>')
53            || line.starts_with('|')
54            || line.starts_with('=')
55            || line
56                .trim()
57                .chars()
58                .next()
59                .is_some_and(|c| c.is_ascii_digit() && line.contains(". "));
60
61        if is_structural {
62            if in_paragraph && !paragraph_buffer.is_empty() {
63                result.push_str(&wrap_line(&paragraph_buffer, options.wrap_width));
64                result.push_str("\n\n");
65                paragraph_buffer.clear();
66                in_paragraph = false;
67            }
68
69            result.push_str(line);
70            result.push('\n');
71            continue;
72        }
73
74        if line.trim().is_empty() {
75            if in_paragraph && !paragraph_buffer.is_empty() {
76                result.push_str(&wrap_line(&paragraph_buffer, options.wrap_width));
77                result.push_str("\n\n");
78                paragraph_buffer.clear();
79                in_paragraph = false;
80            } else if !in_paragraph {
81                result.push('\n');
82            }
83            continue;
84        }
85
86        if in_paragraph {
87            paragraph_buffer.push(' ');
88        }
89        paragraph_buffer.push_str(line.trim());
90        in_paragraph = true;
91    }
92
93    if in_paragraph && !paragraph_buffer.is_empty() {
94        result.push_str(&wrap_line(&paragraph_buffer, options.wrap_width));
95        result.push_str("\n\n");
96    }
97
98    result
99}
100
101/// Wrap a single line of text at the specified width.
102///
103/// This function wraps text without breaking long words or on hyphens,
104/// similar to Python's textwrap.fill() with break_long_words=False and break_on_hyphens=False.
105fn wrap_line(text: &str, width: usize) -> String {
106    if text.len() <= width {
107        return text.to_string();
108    }
109
110    let mut result = String::new();
111    let mut current_line = String::new();
112    let words: Vec<&str> = text.split_whitespace().collect();
113
114    for word in words {
115        if current_line.is_empty() {
116            current_line.push_str(word);
117        } else if current_line.len() + 1 + word.len() <= width {
118            current_line.push(' ');
119            current_line.push_str(word);
120        } else {
121            if !result.is_empty() {
122                result.push('\n');
123            }
124            result.push_str(&current_line);
125            current_line.clear();
126            current_line.push_str(word);
127        }
128    }
129
130    if !current_line.is_empty() {
131        if !result.is_empty() {
132            result.push('\n');
133        }
134        result.push_str(&current_line);
135    }
136
137    result
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143    use crate::options::ConversionOptions;
144
145    #[test]
146    fn test_wrap_line_short() {
147        let text = "Short text";
148        let wrapped = wrap_line(text, 80);
149        assert_eq!(wrapped, "Short text");
150    }
151
152    #[test]
153    fn test_wrap_line_long() {
154        let text = "123456789 123456789";
155        let wrapped = wrap_line(text, 10);
156        assert_eq!(wrapped, "123456789\n123456789");
157    }
158
159    #[test]
160    fn test_wrap_line_no_break_long_words() {
161        let text = "12345678901 12345";
162        let wrapped = wrap_line(text, 10);
163        assert_eq!(wrapped, "12345678901\n12345");
164    }
165
166    #[test]
167    fn test_wrap_markdown_disabled() {
168        let markdown = "This is a very long line that would normally be wrapped at 40 characters";
169        let options = ConversionOptions {
170            wrap: false,
171            ..Default::default()
172        };
173        let result = wrap_markdown(markdown, &options);
174        assert_eq!(result, markdown);
175    }
176
177    #[test]
178    fn test_wrap_markdown_paragraph() {
179        let markdown = "This is a very long line that would normally be wrapped at 40 characters\n\n";
180        let options = ConversionOptions {
181            wrap: true,
182            wrap_width: 40,
183            ..Default::default()
184        };
185        let result = wrap_markdown(markdown, &options);
186        assert!(result.lines().all(|line| line.len() <= 40 || line.trim().is_empty()));
187    }
188
189    #[test]
190    fn test_wrap_markdown_preserves_code() {
191        let markdown = "```\nThis is a very long line in a code block that should not be wrapped\n```\n";
192        let options = ConversionOptions {
193            wrap: true,
194            wrap_width: 40,
195            ..Default::default()
196        };
197        let result = wrap_markdown(markdown, &options);
198        assert!(result.contains("This is a very long line in a code block that should not be wrapped"));
199    }
200
201    #[test]
202    fn test_wrap_markdown_preserves_headings() {
203        let markdown = "# This is a very long heading that should not be wrapped even if it exceeds the width\n\n";
204        let options = ConversionOptions {
205            wrap: true,
206            wrap_width: 40,
207            ..Default::default()
208        };
209        let result = wrap_markdown(markdown, &options);
210        assert!(
211            result.contains("# This is a very long heading that should not be wrapped even if it exceeds the width")
212        );
213    }
214}