debian_changelog/
textwrap.rs

1//! Text wrapping functions
2//!
3//! These functions are used to wrap text for use in a changelog.
4//! The main function is `textwrap`, which takes a string and wraps it to a
5//! specified width, without breaking in between "Closes: #XXXXXX" fragments.
6
7use lazy_regex::regex_captures;
8use std::borrow::Cow;
9use textwrap::core::Word;
10
11/// Default width for text wrapping
12pub const DEFAULT_WIDTH: usize = 78;
13
14/// Initial indent for text wrapping
15pub const INITIAL_INDENT: &str = "* ";
16
17#[inline]
18fn can_break_word(line: &str, pos: usize) -> bool {
19    // Don't break if we're not at a space
20    if !line[pos..].starts_with(' ') {
21        return false;
22    }
23
24    // Check if breaking here would split "Closes: #" or "LP: #"
25    // We need to look at the context around this position
26
27    // Pattern: "Closes: #" - don't break between "Closes:" and "#"
28    // or between ":" and " #"
29    if pos >= 7 && &line[pos.saturating_sub(8)..pos] == "Closes: " && line[pos..].starts_with(" #")
30    {
31        // Don't break right after "Closes: " if followed by "#"
32        return false;
33    }
34
35    // Also check if we're right after "Closes:" (before the space)
36    if pos >= 7 && line[pos.saturating_sub(7)..pos].ends_with("Closes:") {
37        return false;
38    }
39
40    // Pattern: "LP: #" - don't break between "LP:" and "#"
41    if pos >= 3 && &line[pos.saturating_sub(4)..pos] == "LP: " && line[pos..].starts_with(" #") {
42        return false;
43    }
44
45    if pos >= 3 && line[pos.saturating_sub(3)..pos].ends_with("LP:") {
46        return false;
47    }
48
49    true
50}
51
52#[cfg(test)]
53mod can_break_word_tests {
54    #[test]
55    fn test_can_break_word() {
56        assert!(super::can_break_word("foo bar", 3));
57        assert!(!super::can_break_word("foo bar", 0));
58        assert!(!super::can_break_word("foo bar", 5));
59    }
60
61    #[test]
62    fn test_can_break_word_edge_cases() {
63        // Test position at end of string
64        assert!(!super::can_break_word("foo", 3));
65
66        // Test empty string
67        assert!(!super::can_break_word("", 0));
68    }
69
70    #[test]
71    fn test_closes() {
72        // Test "Closes: #" at the start of line
73        assert!(
74            !super::can_break_word("Closes: #123456", 6),
75            "Should not break after 'Closes:'"
76        );
77        assert!(
78            !super::can_break_word("Closes: #123456", 7),
79            "Should not break between 'Closes:' and '#'"
80        );
81        assert!(
82            super::can_break_word("Closes: #123456 foo", 15),
83            "Should break after bug number"
84        );
85
86        // Test "Closes: #" in the middle of line (the bug scenario)
87        assert!(
88            !super::can_break_word("Fix bug (Closes: #123456)", 16),
89            "Should not break after 'Closes:' in middle of line"
90        );
91        assert!(
92            !super::can_break_word("Fix bug (Closes: #123456)", 17),
93            "Should not break between 'Closes:' and '#' in middle"
94        );
95
96        // Test that we can break before "(Closes:"
97        assert!(
98            super::can_break_word("Fix bug (Closes: #123456)", 7),
99            "Should be able to break before '(Closes:'"
100        );
101    }
102
103    #[test]
104    fn test_lp() {
105        // Test "LP: #" pattern
106        assert!(
107            !super::can_break_word("LP: #123456", 2),
108            "Should not break after 'LP:'"
109        );
110        assert!(
111            !super::can_break_word("LP: #123456", 3),
112            "Should not break between 'LP:' and '#'"
113        );
114        assert!(
115            super::can_break_word("LP: #123456 foo", 11),
116            "Should break after bug number"
117        );
118
119        // Test "LP: #" in the middle of line
120        assert!(
121            !super::can_break_word("Fix bug (LP: #123456)", 12),
122            "Should not break after 'LP:' in middle of line"
123        );
124        assert!(
125            !super::can_break_word("Fix bug (LP: #123456)", 13),
126            "Should not break between 'LP:' and '#' in middle"
127        );
128    }
129}
130
131fn find_words<'a>(line: &'a str) -> Box<dyn Iterator<Item = Word<'a>> + 'a> {
132    let mut start = 0;
133    let mut can_break = false;
134    let mut char_indices = line.char_indices();
135
136    Box::new(std::iter::from_fn(move || {
137        for (idx, ch) in char_indices.by_ref() {
138            let word_finished = can_break && ch != ' ';
139            can_break = can_break_word(&line[start..], idx - start);
140            if word_finished {
141                let word = Word::from(&line[start..idx]);
142                start = idx;
143                return Some(word);
144            }
145        }
146
147        if start < line.len() {
148            let word = Word::from(&line[start..]);
149            start = line.len();
150            return Some(word);
151        }
152
153        None
154    }))
155}
156
157#[cfg(test)]
158mod find_words_tests {
159    use super::find_words;
160    use textwrap::core::Word;
161    use textwrap::WordSeparator;
162    #[test]
163    fn test_find_words() {
164        let ws = WordSeparator::Custom(find_words);
165        assert_eq!(
166            vec![Word::from("foo")],
167            ws.find_words("foo").collect::<Vec<_>>()
168        );
169        assert_eq!(
170            vec![Word::from("foo "), Word::from("bar")],
171            ws.find_words("foo bar").collect::<Vec<_>>()
172        );
173    }
174
175    #[test]
176    fn test_split_closes() {
177        let ws = WordSeparator::Custom(find_words);
178        assert_eq!(
179            vec![
180                Word::from("This "),
181                Word::from("test "),
182                Word::from("Closes: #123456 "),
183                Word::from("foo"),
184            ],
185            ws.find_words("This test Closes: #123456 foo")
186                .collect::<Vec<_>>()
187        );
188
189        assert_eq!(
190            vec![
191                Word::from("This "),
192                Word::from("test "),
193                Word::from("Closes: #123456"),
194            ],
195            ws.find_words("This test Closes: #123456")
196                .collect::<Vec<_>>()
197        );
198    }
199}
200
201fn options<'a>(
202    width: Option<usize>,
203    initial_indent: Option<&'a str>,
204    subsequent_indent: Option<&'a str>,
205) -> textwrap::Options<'a> {
206    let width = width.unwrap_or(DEFAULT_WIDTH);
207    let mut options = textwrap::Options::new(width)
208        .break_words(false)
209        .word_splitter(textwrap::WordSplitter::NoHyphenation)
210        .word_separator(textwrap::WordSeparator::Custom(find_words));
211    if let Some(initial_indent) = initial_indent {
212        options = options.initial_indent(initial_indent);
213    }
214    if let Some(subsequent_indent) = subsequent_indent {
215        options = options.subsequent_indent(subsequent_indent);
216    }
217    options
218}
219
220/// Wrap a string of text, without breaking in between "Closes: #XXXXXX" fragments
221pub fn textwrap<'a>(
222    text: &'a str,
223    width: Option<usize>,
224    initial_indent: Option<&str>,
225    subsequent_indent: Option<&str>,
226) -> Vec<Cow<'a, str>> {
227    let options = options(width, initial_indent, subsequent_indent);
228    // Actual text wrapping using textwrap crate
229    textwrap::wrap(text, options)
230}
231
232#[cfg(test)]
233mod textwrap_tests {
234    #[test]
235    fn test_wrap_closes() {
236        assert_eq!(
237            vec!["And", "this", "fixes", "something.", "Closes: #123456"],
238            super::textwrap(
239                "And this fixes something. Closes: #123456",
240                Some(5),
241                None,
242                None
243            )
244        );
245    }
246
247    #[test]
248    fn test_wrap() {
249        let ws = textwrap::WordSeparator::Custom(super::find_words);
250        let options = textwrap::Options::new(30)
251            .break_words(false)
252            .word_separator(ws);
253        assert_eq!(
254            vec!["This", "is", "a", "line", "that", "has", "been", "broken"],
255            ws.find_words("This is a line that has been broken")
256                .map(|w| w.to_string())
257                .collect::<Vec<_>>()
258        );
259        assert_eq!(
260            vec!["This is a line that has been", "broken"],
261            textwrap::wrap("This is a line that has been broken", options)
262        );
263
264        assert_eq!(
265            vec!["This is a line that has been", "broken"],
266            super::textwrap("This is a line that has been broken", Some(30), None, None)
267        );
268    }
269}
270
271/// Check if two lines can join
272fn can_join(line1: &str, line2: &str) -> bool {
273    if line1.ends_with(':') {
274        return false;
275    }
276    if let Some(first_char) = line2.chars().next() {
277        if first_char.is_uppercase() {
278            if line1.ends_with(']') || line1.ends_with('}') {
279                return false;
280            }
281            if !line1.ends_with('.') {
282                return false;
283            }
284        }
285    }
286    if line2.trim_start().starts_with('*')
287        || line2.trim_start().starts_with('-')
288        || line2.trim_start().starts_with('+')
289    {
290        return false;
291    }
292
293    // don't let lines with different indentation join
294    let line1_indent = line1.len() - line1.trim_start_matches(' ').len();
295    let line2_indent = line2.len() - line2.trim_start_matches(' ').len();
296    if line1_indent != line2_indent {
297        return false;
298    }
299    true
300}
301
302#[cfg(test)]
303mod can_join_tests {
304    #[test]
305    fn test_can_join() {
306        assert!(super::can_join("This is a line.", "This is a line."));
307        assert!(super::can_join(
308            "This is a line.",
309            "This is a line. And this is another."
310        ));
311        assert!(!super::can_join(
312            "This is a line.",
313            "+ This is a submititem."
314        ));
315        assert!(!super::can_join(
316            "This is a line introducing:",
317            "  * A list item."
318        ));
319        assert!(!super::can_join(
320            " Lines with different indentation",
321            "  can not join."
322        ));
323    }
324
325    #[test]
326    fn test_can_join_edge_cases() {
327        // Test line ending with bracket
328        assert!(!super::can_join("Some text]", "Uppercase text"));
329        assert!(!super::can_join("Some text}", "Uppercase text"));
330
331        // Test line ending with period and uppercase next line
332        assert!(super::can_join("End with period.", "Uppercase text"));
333
334        // Test line not ending with period and uppercase next line
335        assert!(!super::can_join("No period", "Uppercase text"));
336
337        // Test line2 starting with bullet points
338        assert!(!super::can_join("Some text", "  * bullet"));
339        assert!(!super::can_join("Some text", "  - bullet"));
340        assert!(!super::can_join("Some text", "  + bullet"));
341
342        // Test line1 ending with colon
343        assert!(!super::can_join("Introduction:", "some text"));
344
345        // Test same indentation
346        assert!(super::can_join("  same indent", "  can join"));
347
348        // Test empty lines
349        assert!(super::can_join("", ""));
350    }
351}
352
353// Check if any lines are longer than the specified width
354fn any_long_lines(lines: &[&str], width: usize) -> bool {
355    lines.iter().any(|line| line.len() > width)
356}
357
358#[derive(Debug, PartialEq)]
359/// Text wrapping error
360pub enum Error {
361    /// Missing bullet point in a line
362    MissingBulletPoint {
363        /// Line with missing bullet point
364        line: String,
365    },
366
367    /// Unexpected indent in a line
368    UnexpectedIndent {
369        /// Line number
370        lineno: usize,
371
372        /// Line with unexpected indent
373        line: String,
374
375        /// Found indent
376        indent: usize,
377    },
378}
379
380impl std::fmt::Display for Error {
381    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
382        match self {
383            Error::MissingBulletPoint { line } => {
384                write!(f, "Missing bullet point in line: {}", line)
385            }
386            Error::UnexpectedIndent {
387                lineno,
388                line,
389                indent,
390            } => write!(
391                f,
392                "Unexpected indent in line {}: {} (expected {} spaces)",
393                lineno, line, indent
394            ),
395        }
396    }
397}
398
399impl std::error::Error for Error {}
400
401// Rewrap lines from a list of changes
402//
403// E.g.:
404//
405// * This is a long line that needs to be wrapped
406//
407// =>
408//
409// * This is a short line that
410//   needs to be wrappd
411//
412fn rewrap_change<'a>(change: &[&'a str], width: Option<usize>) -> Result<Vec<Cow<'a, str>>, Error> {
413    let width = width.unwrap_or(DEFAULT_WIDTH);
414    assert!(width > 4);
415
416    if change.is_empty() {
417        return Ok(vec![]);
418    }
419
420    let mut initial_indent = match regex_captures!(r"^[  ]*[\+\-\*] ", change[0]) {
421        Some(initial_indent) => initial_indent.to_string(),
422        None => {
423            return Err(Error::MissingBulletPoint {
424                line: change[0].to_string(),
425            })
426        }
427    };
428    let prefix_len = initial_indent.len();
429
430    if !any_long_lines(change, width) {
431        return Ok(change.iter().map(|line| (*line).into()).collect());
432    }
433    let mut subsequent_indent = " ".repeat(prefix_len);
434
435    let mut lines = vec![&change[0][prefix_len..]];
436
437    // Strip the leading indentation from continuation lines
438    // Accept any indentation >= 0, to handle varying indentation levels
439    for line in change[1..].iter() {
440        if line.is_empty() {
441            // Empty line
442            lines.push(line);
443        } else if line.starts_with(' ') {
444            // Line with indentation - determine how much to strip
445            let line_indent = line.len() - line.trim_start_matches(' ').len();
446            if line_indent >= prefix_len {
447                // Strip the prefix indentation
448                lines.push(&line[prefix_len..]);
449            } else {
450                // Less indentation than prefix - just use the line as-is
451                lines.push(line);
452            }
453        } else {
454            // No indentation - use line as-is
455            lines.push(line);
456        }
457    }
458
459    let mut ret: Vec<Cow<'a, str>> = Vec::new();
460    let mut todo = vec![lines.remove(0)];
461
462    for line in lines.into_iter() {
463        if can_join(todo.last().unwrap(), line) {
464            todo.push(line);
465        } else {
466            ret.extend(
467                textwrap(
468                    todo.join(" ").as_str(),
469                    Some(width),
470                    Some(initial_indent.as_str()),
471                    Some(subsequent_indent.as_str()),
472                )
473                .iter()
474                .map(|s| Cow::Owned(s.to_string())),
475            );
476            initial_indent =
477                " ".repeat(prefix_len + line.len() - line.trim_start_matches(' ').len());
478            subsequent_indent = " ".repeat(initial_indent.len());
479            todo = vec![line.trim_start_matches(' ')];
480        }
481    }
482    ret.extend(
483        textwrap(
484            todo.join(" ").as_str(),
485            Some(width),
486            Some(initial_indent.as_str()),
487            Some(subsequent_indent.as_str()),
488        )
489        .iter()
490        .map(|s| Cow::Owned(s.to_string())),
491    );
492    Ok(ret)
493}
494
495/// Rewrap lines from an iterator of changes
496///
497/// Returns a Result containing the rewrapped lines or an error if rewrapping fails.
498pub fn try_rewrap_changes<'a>(
499    changes: impl Iterator<Item = &'a str>,
500) -> Result<Vec<Cow<'a, str>>, Error> {
501    let mut change = Vec::new();
502    let mut indent_len: Option<usize> = None;
503    let mut ret = vec![];
504    for line in changes {
505        // Start of a new change
506        if let Some(indent) = regex_captures!(r"^[  ]*[\+\-\*] ", line) {
507            if !change.is_empty() {
508                ret.extend(rewrap_change(change.as_slice(), None)?);
509            }
510            indent_len = Some(indent.len());
511            change = vec![line];
512        } else if let Some(_current_indent) = indent_len {
513            // Continuation line - keep full line with indentation
514            change.push(line);
515        } else {
516            if !change.is_empty() {
517                ret.extend(rewrap_change(change.as_slice(), None)?);
518            }
519            ret.push(line.into());
520            change = vec![];
521        }
522    }
523    if !change.is_empty() {
524        ret.extend(rewrap_change(change.as_slice(), None)?);
525    }
526    Ok(ret)
527}
528
529/// Rewrap lines from an iterator of changes
530///
531/// # Deprecated
532///
533/// This function panics on errors. Use [`try_rewrap_changes`] instead for proper error handling.
534///
535/// # Panics
536///
537/// Panics if rewrapping fails (e.g., due to invalid formatting).
538#[deprecated(
539    since = "0.2.10",
540    note = "Use try_rewrap_changes for proper error handling"
541)]
542pub fn rewrap_changes<'a>(
543    changes: impl Iterator<Item = &'a str>,
544) -> impl Iterator<Item = Cow<'a, str>> {
545    try_rewrap_changes(changes).unwrap().into_iter()
546}
547
548#[cfg(test)]
549mod rewrap_tests {
550    use super::rewrap_change;
551    const LONG_LINE: &str = "This is a very long line that could have been broken and should have been broken but was not broken.";
552
553    #[test]
554    fn test_too_short() {
555        assert_eq!(Vec::<&str>::new(), rewrap_change(&[][..], None).unwrap());
556        assert_eq!(
557            vec!["* Foo bar"],
558            rewrap_change(&["* Foo bar"][..], None).unwrap()
559        );
560        assert_eq!(
561            vec!["* Foo", "  bar"],
562            rewrap_change(&["* Foo", "  bar"][..], None).unwrap()
563        );
564        assert_eq!(
565            vec!["  * Beginning", "  next line"],
566            rewrap_change(&["  * Beginning", "  next line"][..], None).unwrap()
567        );
568    }
569
570    #[test]
571    fn test_no_initial() {
572        let long = "x".repeat(100);
573        assert_eq!(
574            super::Error::MissingBulletPoint { line: long.clone() },
575            rewrap_change(&[&long], None).unwrap_err()
576        );
577    }
578
579    #[test]
580    fn test_wrap() {
581        assert_eq!(
582            vec![
583                super::Cow::Borrowed(
584                    "* This is a very long line that could have been broken and should have been"
585                ),
586                "  broken but was not broken.".into()
587            ],
588            rewrap_change(&[format!("* {}", LONG_LINE).as_str()][..], None).unwrap()
589        );
590        assert_eq!(r###" * Build-Depend on libsdl1.2-dev, libsdl-ttf2.0-dev and libsdl-mixer1.2-dev
591   instead of with the embedded version, add -lSDL_ttf to --with-py-libs in
592   debian/rules and rebootstrap (Closes: #382202)"###.split('\n').collect::<Vec<_>>(), rewrap_change(r###" * Build-Depend on libsdl1.2-dev, libsdl-ttf2.0-dev and libsdl-mixer1.2-dev instead
593   of with the embedded version, add -lSDL_ttf to --with-py-libs in debian/rules
594   and rebootstrap (Closes: #382202)
595"###.split('\n').collect::<Vec<_>>().as_slice(), None).unwrap());
596    }
597
598    #[test]
599    fn test_no_join() {
600        assert_eq!(r###" - Translators know why this sign has been put here:
601        _Choices: ${FOO}, !Other[ You only have to translate Other, remove the
602        exclamation mark and this comment between brackets]
603      Currently text, newt, slang and gtk frontends support this feature."###.split('\n').collect::<Vec<_>>(), rewrap_change(r###" - Translators know why this sign has been put here:
604        _Choices: ${FOO}, !Other[ You only have to translate Other, remove the exclamation mark and this comment between brackets]
605      Currently text, newt, slang and gtk frontends support this feature.
606"###.split('\n').collect::<Vec<_>>().as_slice(), None).unwrap());
607    }
608}
609
610#[cfg(test)]
611mod rewrap_changes_tests {
612    use super::try_rewrap_changes;
613
614    /// Test that long unbreakable lines (e.g., URLs) don't cause errors
615    #[test]
616    fn test_long_url() {
617        let changes = vec![
618            "  * Fix bug",
619            "    https://www.example.com/this/is/a/very/long/url/that/can/not/be/broken/because/it/is/longer/than/80/characters.",
620        ];
621
622        let result = try_rewrap_changes(changes.into_iter());
623        assert!(result.is_ok(), "Should handle long URLs without error");
624
625        let lines = result.unwrap();
626        assert_eq!(
627            lines,
628            vec![
629                "  * Fix bug",
630                "    https://www.example.com/this/is/a/very/long/url/that/can/not/be/broken/because/it/is/longer/than/80/characters."
631            ]
632        );
633    }
634
635    /// Test that continuation lines have proper 4-space indentation after wrapping
636    #[test]
637    fn test_continuation_indent() {
638        let changes = vec![
639            "  * Fix blocks/blockedby of archived bugs (Closes: #XXXXXXX). Thanks to somebody who fixed it.",
640            "  * Provide informative error message when unarchive fails because the bug is not archived.",
641        ];
642
643        let result = try_rewrap_changes(changes.into_iter());
644        assert!(result.is_ok(), "Should wrap successfully");
645
646        let lines = result.unwrap();
647        assert_eq!(
648            lines,
649            vec![
650                "  * Fix blocks/blockedby of archived bugs (Closes: #XXXXXXX). Thanks to",
651                "    somebody who fixed it.",
652                "  * Provide informative error message when unarchive fails because the bug is",
653                "    not archived."
654            ]
655        );
656    }
657
658    /// Test that "Closes: #" pattern stays together when wrapping
659    #[test]
660    fn test_closes_tag_not_broken() {
661        let changes = vec![
662            "  * Fix blocks/blockedby of archived bugs and more blah blah blah bl (Closes: #XXXXXXX).",
663        ];
664
665        let result = try_rewrap_changes(changes.into_iter());
666        assert!(result.is_ok(), "Should wrap successfully");
667
668        let lines = result.unwrap();
669        assert_eq!(
670            lines,
671            vec![
672                "  * Fix blocks/blockedby of archived bugs and more blah blah blah bl",
673                "    (Closes: #XXXXXXX)."
674            ]
675        );
676    }
677
678    /// Test handling of complex nested indentation structures
679    #[test]
680    fn test_complex_nested_indentation() {
681        let changes = vec![
682            "  * Main change item",
683            "    - Sub-item with 4 spaces",
684            "      + Nested sub-item with 6 spaces",
685            "        More text in nested item",
686            "    - Another sub-item",
687        ];
688
689        let result = try_rewrap_changes(changes.into_iter());
690        assert!(result.is_ok(), "Should handle nested indentation");
691
692        let lines = result.unwrap();
693        assert_eq!(
694            lines,
695            vec![
696                "  * Main change item",
697                "    - Sub-item with 4 spaces",
698                "      + Nested sub-item with 6 spaces",
699                "        More text in nested item",
700                "    - Another sub-item",
701            ]
702        );
703    }
704
705    /// Test handling of empty lines between changes
706    #[test]
707    fn test_empty_lines() {
708        let changes = vec!["  * First change", "", "  * Second change"];
709
710        let result = try_rewrap_changes(changes.into_iter());
711        assert!(result.is_ok(), "Should handle empty lines");
712
713        let lines = result.unwrap();
714        assert_eq!(lines, vec!["  * First change", "", "  * Second change"]);
715    }
716}