justify_string/
lib.rs

1/// Assumes the first word of the line is already at the start of the line,
2/// without spaces following it.
3///
4/// # Examples
5///
6/// ```
7/// use justify_string::finalize_current_line;
8///
9/// let mut out = String::from("123");
10/// finalize_current_line(&mut out, &mut vec!["12", "1234"], 3);
11/// assert_eq!(out, String::from("123   12  1234"));
12/// ```
13///
14/// If `words_after_first` is empty, it simply appends extra spaces to the first word.
15///
16/// ```
17/// use justify_string::finalize_current_line;
18///
19/// let mut out = String::from("123");
20/// finalize_current_line(&mut out, &mut vec![], 2);
21/// assert_eq!(out, String::from("123  "));
22/// ```
23pub fn finalize_current_line(
24    output: &mut String,
25    words_after_first: &mut Vec::<&str>,
26    num_extra_spaces: u32,
27) {
28    let append_spaces = |s: &mut String, num_spaces: u32| {
29        for _ in 0..num_spaces {
30            s.push(' ');
31        }
32    };
33    let num_words_after_first = words_after_first.len() as u32;
34
35    if num_words_after_first <= 0 {
36        append_spaces(output, num_extra_spaces);
37        return;
38    }
39
40    // Distribute extra spaces evenly between gaps.
41    let num_gaps_between_words = num_words_after_first;
42    // `+ 1` because it's _extra_ spaces, at least one is necessary.
43    let small_gap_size = 1 + num_extra_spaces / num_gaps_between_words;
44    let big_gap_size = small_gap_size + 1;
45    // This is guaranteed to be smaller than `words_after_first.len()`.
46    let num_big_gaps = num_extra_spaces % num_gaps_between_words;
47    let mut words = words_after_first.iter();
48    let mut append_spaces_and_word = |num_spaces: u32, word: &str| {
49        append_spaces(output, num_spaces);
50        output.push_str(word);
51    };
52    // TODO perf: do `unwrap_unchecked`? Or can slices help somehow?
53    for _ in 0..num_big_gaps {
54        // It's ok to `unwrap` because `num_big_gaps` is guaranteed to be smaller than
55        // `words_after_first.len()`
56        append_spaces_and_word(big_gap_size, words.next().unwrap());
57    }
58    // Now small gaps.
59    // It's guaranteed that there's at least one element left, so it's ok to `unwrap`.
60    // TODO perf: `unwrap_unchecked()`. If not, maybe just put it inside the loop to save
61    // a few lines.
62    let mut word = words.next().unwrap();
63    loop {
64        append_spaces_and_word(small_gap_size, word);
65        word = match words.next() {
66            None => break,
67            Some(w) => w,
68        };
69    };
70}
71
72/**
73# Examples
74
75```
76use justify_string::justify;
77
78assert_eq!(
79    justify("123 12 123456789abc", 8),
80    concat!(
81        "123   12\n",
82        "12345678\n",
83        "9abc    ",
84    )
85);
86```
87
88See tests for more.
89*/
90pub fn justify(input: &str, line_width: u32) -> String {
91    // TODO perf: do we need optimizations in case `line_width >= input.chars()len()`?
92    // In case `line_width <= 2`? In case input words are guaranteed to be separated by only a
93    // single space?
94    //  Maybe with an argument, or a config var.
95
96    // TODO how about `split_ascii_whitespace`?
97    let mut words = input.split_whitespace();
98    // TODO perf: does the program actually search for the end of the word
99    // when this statement gets executed? If so, it's inefficient - if we're
100    // starting a new line, we can start copying chars of the next word
101    // right away, without trying to first find where it ends.
102    // Same for the next `words.next()` below.
103    let mut curr_word = match words.next() {
104        None => return "".to_string(),
105        Some(w) => w,
106    };
107    // The resulting string is most likely gonna be a little longer than the original one.
108    // But it may also be shorter if there is a lot of spaces.
109    // TODO perf: maybe allocate a little more than this?
110    let mut res = String::with_capacity(input.len());
111    // TODO perf: how about we instead use an array on the stack? This would put a cap
112    // on `line_width` though.
113    // TODO perf: if not, maybe don't allocate the max theoretical capacity?
114    let mut words_after_first = {
115        // TODO refactor: add tests to check if this is calculated correctly?
116        let max_words_per_line = (line_width + 1) / 2;
117        Vec::<&str>::with_capacity(max_words_per_line as usize - 1)
118    };
119    loop {
120        // New line, new word.
121        let mut line_remaining_capacity_chars = line_width;
122        let mut curr_word_iter = curr_word.chars();
123
124        // Put the first word at the beginning of the line.
125        // If `line_width` is exceeded, add a line break.
126        // TODO perf: maybe it's guaranteed by the caller that it's not possible for any word
127        // to be longer than `line_width`? Add a parameter or a config var.
128
129        // Each word consists of at least one char, so it's ok to `unwrap()`.
130        // TODO perf: `unwrap_unchecked`?
131        let mut ch = curr_word_iter.next().unwrap();
132        'first_word_of_line: loop {
133            res.push(ch);
134            line_remaining_capacity_chars -= 1;
135
136            match curr_word_iter.next() {
137                None => break 'first_word_of_line,
138                Some(c) => {
139                    ch = c;
140                    if line_remaining_capacity_chars <= 0 {
141                        res.push('\n');
142                        line_remaining_capacity_chars = line_width;
143                    }
144                }
145            }
146        }
147
148        // The first word of the line is put, now the following ones.
149        'words_after_first_of_line: loop {
150            curr_word = match words.next() {
151                None => {
152                    finalize_current_line(&mut res, &mut words_after_first, line_remaining_capacity_chars);
153                    return res;
154                },
155                Some(w) => w,
156            };
157            // `+ 1` because there needs to be a space before this word.
158            let requred_capacity_to_append_curr_word = curr_word.chars().count() as u32 + 1;
159            if requred_capacity_to_append_curr_word <= line_remaining_capacity_chars {
160                words_after_first.push(curr_word);
161                line_remaining_capacity_chars -= requred_capacity_to_append_curr_word;
162            } else {
163                // We'll put this word on a new line.
164                finalize_current_line(&mut res, &mut words_after_first, line_remaining_capacity_chars);
165                res.push('\n');
166                break 'words_after_first_of_line;
167                // TODO refactor: would be cool to somehow ensure that `curr_word` is not used in
168                // this case and is to be handled in the next iteration of the loop.
169            }
170
171        }
172
173        // TODO refactor: Is there a better way? Can we just declare it for each line
174        // without reallocating?
175        words_after_first.clear();
176    }
177}
178
179#[cfg(test)]
180mod tests {
181    use super::justify;
182
183    #[test]
184    fn simple() {
185        let lorem_ipsum = "Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua";
186        let test_cases = [
187            ("", 5, ""),
188            ("test", 4, "test"),
189            ("test", 5, "test "),
190            // Not sure about this, maybe we're not supposed to break words.
191            // Maybe we're supposed to just panic.
192            // Also if a word can't fit in one line, do we always put its beginning on a new line?
193            // LibreOffice Writer seems to act this way.
194            ("test", 1, "t\ne\ns\nt"),
195            ("  12345 ", 2, "12\n34\n5 "),
196            ("  ", 5, ""),
197            ("a   a  123", 4, "a  a\n123 "),
198            ("aaa aaa", 5, "aaa  \naaa  "), // Non-breaking space instead of a regular one. What do?
199            ("a a a 12345", 8, "a   a  a\n12345   "),
200            ("1234567", 3, "123\n456\n7  "),
201            ("12 123456789abc 1", 5, "12   \n12345\n6789a\nbc  1"),
202
203            (lorem_ipsum, 12,
204             "Lorem  ipsum\ndolor    sit\namet        \nconsectetur \nadipiscing  \nelit  sed do\neiusmod     \ntempor      \nincididunt  \nut labore et\ndolore magna\naliqua      "),
205            (lorem_ipsum, 7,
206             "Lorem  \nipsum  \ndolor  \nsit    \namet   \nconsect\netur   \nadipisc\ning    \nelit   \nsed  do\neiusmod\ntempor \nincidid\nunt  ut\nlabore \net     \ndolore \nmagna  \naliqua "),
207            (lorem_ipsum, 35,
208             "Lorem    ipsum   dolor   sit   amet\nconsectetur  adipiscing elit sed do\neiusmod tempor incididunt ut labore\net      dolore     magna     aliqua"),
209        ];
210
211        for &(input, line_width, expected) in &test_cases {
212            assert_eq!(
213                justify(input, line_width), expected,
214                "input: \"{}\", width: {}", input, line_width
215            );
216        }
217    }
218    // TODO refactor: add randomized tests with algorithmic correctness checks (i.e.
219    // all lines are of the same width, extra space is distributed evenly).
220    // TODO refactor: use tests from https://github.com/ctrlcctrlv/justify/blob/master/tests/tests.rs
221}