justify_string/lib.rs
1/// Assumes the first word of the line is already at the start of the line,
2/// without spaces following it.
3///
4/// # Examples
5///
6/// ```
7/// use justify_string::finalize_current_line;
8///
9/// let mut out = String::from("123");
10/// finalize_current_line(&mut out, &mut vec!["12", "1234"], 3);
11/// assert_eq!(out, String::from("123 12 1234"));
12/// ```
13///
14/// If `words_after_first` is empty, it simply appends extra spaces to the first word.
15///
16/// ```
17/// use justify_string::finalize_current_line;
18///
19/// let mut out = String::from("123");
20/// finalize_current_line(&mut out, &mut vec![], 2);
21/// assert_eq!(out, String::from("123 "));
22/// ```
23pub fn finalize_current_line(
24 output: &mut String,
25 words_after_first: &mut Vec::<&str>,
26 num_extra_spaces: u32,
27) {
28 let append_spaces = |s: &mut String, num_spaces: u32| {
29 for _ in 0..num_spaces {
30 s.push(' ');
31 }
32 };
33 let num_words_after_first = words_after_first.len() as u32;
34
35 if num_words_after_first <= 0 {
36 append_spaces(output, num_extra_spaces);
37 return;
38 }
39
40 // Distribute extra spaces evenly between gaps.
41 let num_gaps_between_words = num_words_after_first;
42 // `+ 1` because it's _extra_ spaces, at least one is necessary.
43 let small_gap_size = 1 + num_extra_spaces / num_gaps_between_words;
44 let big_gap_size = small_gap_size + 1;
45 // This is guaranteed to be smaller than `words_after_first.len()`.
46 let num_big_gaps = num_extra_spaces % num_gaps_between_words;
47 let mut words = words_after_first.iter();
48 let mut append_spaces_and_word = |num_spaces: u32, word: &str| {
49 append_spaces(output, num_spaces);
50 output.push_str(word);
51 };
52 // TODO perf: do `unwrap_unchecked`? Or can slices help somehow?
53 for _ in 0..num_big_gaps {
54 // It's ok to `unwrap` because `num_big_gaps` is guaranteed to be smaller than
55 // `words_after_first.len()`
56 append_spaces_and_word(big_gap_size, words.next().unwrap());
57 }
58 // Now small gaps.
59 // It's guaranteed that there's at least one element left, so it's ok to `unwrap`.
60 // TODO perf: `unwrap_unchecked()`. If not, maybe just put it inside the loop to save
61 // a few lines.
62 let mut word = words.next().unwrap();
63 loop {
64 append_spaces_and_word(small_gap_size, word);
65 word = match words.next() {
66 None => break,
67 Some(w) => w,
68 };
69 };
70}
71
72/**
73# Examples
74
75```
76use justify_string::justify;
77
78assert_eq!(
79 justify("123 12 123456789abc", 8),
80 concat!(
81 "123 12\n",
82 "12345678\n",
83 "9abc ",
84 )
85);
86```
87
88See tests for more.
89*/
90pub fn justify(input: &str, line_width: u32) -> String {
91 // TODO perf: do we need optimizations in case `line_width >= input.chars()len()`?
92 // In case `line_width <= 2`? In case input words are guaranteed to be separated by only a
93 // single space?
94 // Maybe with an argument, or a config var.
95
96 // TODO how about `split_ascii_whitespace`?
97 let mut words = input.split_whitespace();
98 // TODO perf: does the program actually search for the end of the word
99 // when this statement gets executed? If so, it's inefficient - if we're
100 // starting a new line, we can start copying chars of the next word
101 // right away, without trying to first find where it ends.
102 // Same for the next `words.next()` below.
103 let mut curr_word = match words.next() {
104 None => return "".to_string(),
105 Some(w) => w,
106 };
107 // The resulting string is most likely gonna be a little longer than the original one.
108 // But it may also be shorter if there is a lot of spaces.
109 // TODO perf: maybe allocate a little more than this?
110 let mut res = String::with_capacity(input.len());
111 // TODO perf: how about we instead use an array on the stack? This would put a cap
112 // on `line_width` though.
113 // TODO perf: if not, maybe don't allocate the max theoretical capacity?
114 let mut words_after_first = {
115 // TODO refactor: add tests to check if this is calculated correctly?
116 let max_words_per_line = (line_width + 1) / 2;
117 Vec::<&str>::with_capacity(max_words_per_line as usize - 1)
118 };
119 loop {
120 // New line, new word.
121 let mut line_remaining_capacity_chars = line_width;
122 let mut curr_word_iter = curr_word.chars();
123
124 // Put the first word at the beginning of the line.
125 // If `line_width` is exceeded, add a line break.
126 // TODO perf: maybe it's guaranteed by the caller that it's not possible for any word
127 // to be longer than `line_width`? Add a parameter or a config var.
128
129 // Each word consists of at least one char, so it's ok to `unwrap()`.
130 // TODO perf: `unwrap_unchecked`?
131 let mut ch = curr_word_iter.next().unwrap();
132 'first_word_of_line: loop {
133 res.push(ch);
134 line_remaining_capacity_chars -= 1;
135
136 match curr_word_iter.next() {
137 None => break 'first_word_of_line,
138 Some(c) => {
139 ch = c;
140 if line_remaining_capacity_chars <= 0 {
141 res.push('\n');
142 line_remaining_capacity_chars = line_width;
143 }
144 }
145 }
146 }
147
148 // The first word of the line is put, now the following ones.
149 'words_after_first_of_line: loop {
150 curr_word = match words.next() {
151 None => {
152 finalize_current_line(&mut res, &mut words_after_first, line_remaining_capacity_chars);
153 return res;
154 },
155 Some(w) => w,
156 };
157 // `+ 1` because there needs to be a space before this word.
158 let requred_capacity_to_append_curr_word = curr_word.chars().count() as u32 + 1;
159 if requred_capacity_to_append_curr_word <= line_remaining_capacity_chars {
160 words_after_first.push(curr_word);
161 line_remaining_capacity_chars -= requred_capacity_to_append_curr_word;
162 } else {
163 // We'll put this word on a new line.
164 finalize_current_line(&mut res, &mut words_after_first, line_remaining_capacity_chars);
165 res.push('\n');
166 break 'words_after_first_of_line;
167 // TODO refactor: would be cool to somehow ensure that `curr_word` is not used in
168 // this case and is to be handled in the next iteration of the loop.
169 }
170
171 }
172
173 // TODO refactor: Is there a better way? Can we just declare it for each line
174 // without reallocating?
175 words_after_first.clear();
176 }
177}
178
179#[cfg(test)]
180mod tests {
181 use super::justify;
182
183 #[test]
184 fn simple() {
185 let lorem_ipsum = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
186 let test_cases = [
187 ("", 5, ""),
188 ("test", 4, "test"),
189 ("test", 5, "test "),
190 // Not sure about this, maybe we're not supposed to break words.
191 // Maybe we're supposed to just panic.
192 // Also if a word can't fit in one line, do we always put its beginning on a new line?
193 // LibreOffice Writer seems to act this way.
194 ("test", 1, "t\ne\ns\nt"),
195 (" 12345 ", 2, "12\n34\n5 "),
196 (" ", 5, ""),
197 ("a a 123", 4, "a a\n123 "),
198 ("aaa aaa", 5, "aaa \naaa "), // Non-breaking space instead of a regular one. What do?
199 ("a a a 12345", 8, "a a a\n12345 "),
200 ("1234567", 3, "123\n456\n7 "),
201 ("12 123456789abc 1", 5, "12 \n12345\n6789a\nbc 1"),
202
203 (lorem_ipsum, 12,
204 "Lorem ipsum\ndolor sit\namet \nconsectetur \nadipiscing \nelit sed do\neiusmod \ntempor \nincididunt \nut labore et\ndolore magna\naliqua "),
205 (lorem_ipsum, 7,
206 "Lorem \nipsum \ndolor \nsit \namet \nconsect\netur \nadipisc\ning \nelit \nsed do\neiusmod\ntempor \nincidid\nunt ut\nlabore \net \ndolore \nmagna \naliqua "),
207 (lorem_ipsum, 35,
208 "Lorem ipsum dolor sit amet\nconsectetur adipiscing elit sed do\neiusmod tempor incididunt ut labore\net dolore magna aliqua"),
209 ];
210
211 for &(input, line_width, expected) in &test_cases {
212 assert_eq!(
213 justify(input, line_width), expected,
214 "input: \"{}\", width: {}", input, line_width
215 );
216 }
217 }
218 // TODO refactor: add randomized tests with algorithmic correctness checks (i.e.
219 // all lines are of the same width, extra space is distributed evenly).
220 // TODO refactor: use tests from https://github.com/ctrlcctrlv/justify/blob/master/tests/tests.rs
221}