cargo_deb/util/
wordsplit.rs

1pub trait WordSplit {
2    fn split_by_chars(&self, length: usize) -> Vec<String>;
3}
4
5impl WordSplit for str {
6    // ref: https://www.debian.org/doc/debian-policy/ch-controlfields.html#description
7    //
8    // * Extended description line must have at least one non-whitespace character.
9    //   If you violate this rule, `dpkg -i` will fail.
10    // * Extended description line must not have any tab character.
11    //   If you violate this rule, the effect is not predictable.
12    //
13    // NOTE: as for extended description, this splitting might not be necessary in the first place?
14    // (debian policy seems to say nothing about line length of extended description)
15    fn split_by_chars(&self, length: usize) -> Vec<String> {
16        let output_capacity = self.len() + self.len() % length + 1;
17        let mut lines: Vec<String> = Vec::with_capacity(output_capacity);
18        let mut current_line = String::with_capacity(length);
19        for line in self.lines() {
20            // consider whitespace line as empty
21            if line.chars().all(char::is_whitespace) {
22                lines.push(String::from("."));
23                continue;
24            }
25
26            let line = line.replace('\t', "  ");
27
28            current_line.clear();
29            let mut has_words = false;
30            macro_rules! append_word {
31                ($word:expr) => {{
32                    if has_words {
33                        current_line += " ";
34                    }
35                    has_words = true;
36                    current_line.push_str($word);
37                }};
38            }
39            for word in line.split(' ') {
40                // we need at least one non-whitespace character
41                if current_line.chars().all(char::is_whitespace) {
42                    append_word!(word);
43                    continue;
44                }
45
46                // now current_line has non-whitespace character
47                if current_line.len() + word.len() >= length {
48                    // skip a space
49                    let to_push = std::mem::replace(&mut current_line, word.to_owned());
50                    // if character length met or exceeded
51                    lines.push(to_push);
52                } else {
53                    append_word!(word);
54                }
55            }
56
57            // current_line may be trailing whitespaces
58            if current_line.chars().all(char::is_whitespace) {
59                lines.push(String::from("."));
60            } else {
61                lines.push(current_line.clone());
62            }
63        }
64        lines
65    }
66}
67
68#[test]
69fn test_split_by_chars() {
70    #[allow(non_snake_case)]
71    fn S(s: &'static str) -> String { s.to_owned() }
72
73    assert_eq!("This is a test string for split_by_chars.".split_by_chars(10), vec![
74        S("This is a"),
75        S("test"),
76        S("string for"),
77        S("split_by_chars.")
78    ]);
79
80    assert_eq!("This is a line\n\nthis is also a line.".split_by_chars(79), vec![
81        S("This is a line"),
82        S("."),
83        S("this is also a line."),
84    ]);
85
86    assert_eq!("                                              verylongwordverylongwordverylongwordverylongword\n\nbo".split_by_chars(10), vec![
87        S("                                              verylongwordverylongwordverylongwordverylongword"),
88        S("."),
89        S("bo"),
90    ]);
91
92    assert_eq!("This is a line\n  \nthis is also a line.\n".split_by_chars(79), vec![
93        S("This is a line"),
94        S("."),
95        S("this is also a line."),
96    ]);
97
98    assert_eq!("    This  is an 4-indented line\n".split_by_chars(79), vec![
99        S("    This  is an 4-indented line"),
100    ]);
101
102    assert_eq!("    This  is an 4-indented line\n".split_by_chars(3), vec![
103        S("    This"),
104        S(" is"),
105        S("an"),
106        S("4-indented"),
107        S("line"),
108    ]);
109
110    assert_eq!("    indent,    then space".split_by_chars(4), vec![
111        S("    indent,"),
112        S("   then"),
113        S("space"),
114    ]);
115
116    assert_eq!("  trailing space    ".split_by_chars(12), vec![
117        S("  trailing"),
118        S("space    "),
119    ]);
120
121    assert_eq!("  trailing space    ".split_by_chars(16), vec![
122        S("  trailing space"),
123        S("."),
124    ]);
125
126    assert_eq!("sh\nverylongwordverylongwordverylongwordverylongword\nend".split_by_chars(5), vec![
127        S("sh"),
128        S("verylongwordverylongwordverylongwordverylongword"),
129        S("end"),
130    ]);
131
132    // from alacritty
133    assert_eq!("       src=\"https://cloud.githubusercontent.com/assets/4285147/21585004/2ebd0288-d06c-11e6-95d3-4a2889dbbd6f.png\">".split_by_chars(79), vec![
134        S("       src=\"https://cloud.githubusercontent.com/assets/4285147/21585004/2ebd0288-d06c-11e6-95d3-4a2889dbbd6f.png\">"),
135    ]);
136
137    assert_eq!("\t\ttabs are\treplaced with spaces\t".split_by_chars(10), vec![
138        S("    tabs"),
139        S("are "),
140        S("replaced"),
141        S("with"),
142        S("spaces  "),
143    ]);
144}