whitespace_rs/
spacer.rs

1//! Report on or fix beginning of line spacing
2//!
3//! To find out the line beginnings given a [`Read`] trait object use [`read_bol_info()`]:
4//!
5//! ```
6//! use std::error::Error;
7//! use std::fs::File;
8//! use whitespace_rs::spacer;
9//!
10//! fn main() -> Result<(), Box<dyn Error>> {
11//!   let mut reader = "abc\n\r\r\n".as_bytes();
12//!   let bol_info = spacer::read_bol_info(&mut reader)?;
13//!
14//!   println!("{:?}", bol_info);
15//!   Ok(())
16//! }
17//! ```
18//!
19//! To normalize line beginnings given a [`Read`] trait object, create a [`Write`] trait object and use [`write_new_bols()`]:
20//!
21//! ```
22//! use std::error::Error;
23//! use std::fs::File;
24//! use whitespace_rs::spacer;
25//!
26//! fn main() -> Result<(), Box<dyn Error>> {
27//!   let mut reader = "abc\n\r\r\n".as_bytes();
28//!   let mut writer = Vec::new();
29//!   let bol_info = spacer::write_new_bols(&mut reader, &mut writer, spacer::BeginningOfLine::Tabs(2, true))?;
30//!
31//!   println!("{:?}", bol_info);
32//!   Ok(())
33//! }
34//! ```
35
36use std::cmp::max;
37use std::error::Error;
38use std::io::{Read, Write};
39use utf8_decode::UnsafeDecoder;
40
41// {grcov-excl-start}
42#[derive(Debug, Clone)]
43/// Types of line beginnings
44pub enum BeginningOfLine {
45    /// Tabs (and spaces if not rounding down extra spaces)
46    Tabs(usize, bool),
47    /// Spaces
48    Spaces(usize),
49}
50// {grcov-excl-end}
51
52#[derive(Debug, PartialEq)]
53/// Information about line beginnings in the file
54pub struct BolInfo {
55    /// Number of lines that have no whitespace at the beginning
56    pub none: usize,
57    /// Number of all space line beginnings
58    pub spaces: usize,
59    /// Number of all tab line beginnings
60    pub tabs: usize,
61    /// Number of mixed space/tab line beginnings
62    pub mixed: usize,
63}
64
65impl Eq for BolInfo {}
66
67impl BolInfo {
68    /// Get the most common beginning of line type in the file
69    pub fn get_common_bol(self: &Self, tab_size: usize, round_down: bool) -> BeginningOfLine {
70        if self.tabs > self.spaces {
71            BeginningOfLine::Tabs(tab_size, round_down)
72        } else {
73            BeginningOfLine::Spaces(tab_size)
74        }
75    }
76}
77
78/// Read beginning of line information
79pub fn read_bol_info(reader: &mut dyn Read) -> Result<BolInfo, Box<dyn Error>> {
80    let mut bol_info = BolInfo {
81        none: 0,
82        spaces: 0,
83        tabs: 0,
84        mixed: 0,
85    };
86    let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
87    let mut at_bol = true;
88    let (mut num_spaces, mut num_tabs) = (0, 0);
89
90    loop {
91        let c;
92        match decoder.next() {
93            Some(value) => c = value?,
94            None => break,
95        };
96
97        if at_bol {
98            if c == ' ' {
99                num_spaces += 1;
100            } else if c == '\t' {
101                num_tabs += 1;
102            } else {
103                if num_spaces == 0 && num_tabs == 0 {
104                    bol_info.none += 1;
105                } else if num_spaces > 0 && num_tabs > 0 {
106                    bol_info.mixed += 1;
107                } else if num_spaces > 0 {
108                    bol_info.spaces += 1;
109                } else {
110                    bol_info.tabs += 1;
111                }
112                at_bol = false;
113            }
114        } else if c == '\n' {
115            num_spaces = 0;
116            num_tabs = 0;
117            at_bol = true;
118        }
119    }
120
121    Ok(bol_info)
122}
123
124/// Write input file out with new beginning-of-lines
125pub fn write_new_bols(
126    reader: &mut dyn Read,
127    writer: &mut dyn Write,
128    new_bol: BeginningOfLine,
129) -> Result<BolInfo, Box<dyn Error>> {
130    let (tab_size, round_down) = match new_bol {
131        BeginningOfLine::Spaces(tab_size) => (max(1, tab_size), false),
132        BeginningOfLine::Tabs(tab_size, round_down) => (max(1, tab_size), round_down),
133    };
134    let mut bol_info = BolInfo {
135        none: 0,
136        spaces: 0,
137        tabs: 0,
138        mixed: 0,
139    };
140    let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
141    let mut buf = [0u8; 4];
142    let mut s = String::new();
143    let mut at_bol = true;
144    let untabify = |s: &str| -> String {
145        let mut t = String::new();
146
147        for c in s.chars() {
148            if c == '\t' {
149                t.push_str(&" ".repeat(tab_size - (t.len() % tab_size)));
150            } else {
151                t.push(c);
152            }
153        }
154
155        t
156    };
157    let tabify = |s: &str| -> (_, _) {
158        let mut num_spaces = 0;
159        let mut t = String::new();
160
161        for c in s.chars() {
162            if c == ' ' {
163                num_spaces += 1;
164            }
165
166            if num_spaces % tab_size == 0 {
167                t.push('\t');
168                num_spaces = 0
169            }
170        }
171
172        if num_spaces > 0 {
173            if !round_down {
174                t.push_str(&" ".repeat(num_spaces));
175            } else {
176                num_spaces = 0;
177            }
178        }
179
180        (t, num_spaces)
181    };
182
183    loop {
184        let c;
185
186        match decoder.next() {
187            Some(value) => c = value?,
188            None => break,
189        };
190        if at_bol {
191            if c == ' ' || c == '\t' {
192                s.push(c);
193            } else {
194                if s.len() == 0 {
195                    bol_info.none += 1
196                } else {
197                    s = untabify(&s);
198
199                    if let BeginningOfLine::Tabs(_, _) = new_bol {
200                        let (t, num_spaces) = tabify(&s);
201
202                        s = t;
203                        if num_spaces > 0 {
204                            bol_info.mixed += 1;
205                        } else {
206                            bol_info.tabs += 1;
207                        }
208                    } else {
209                        bol_info.spaces += 1;
210                    }
211
212                    writer.write(s.as_bytes())?;
213                }
214
215                writer.write(c.encode_utf8(&mut buf).as_bytes())?;
216
217                if c == '\n' {
218                    s.clear();
219                } else {
220                    at_bol = false;
221                }
222            }
223        } else {
224            writer.write(c.encode_utf8(&mut buf).as_bytes())?;
225
226            if c == '\n' {
227                s.clear();
228                at_bol = true;
229            }
230        }
231    }
232    writer.flush()?;
233
234    Ok(bol_info)
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_read_bol_info() {
243        let bol_info = read_bol_info(&mut "a\n\tb\n  c\n \td\n".as_bytes()).unwrap();
244
245        assert_eq!(
246            bol_info,
247            BolInfo {
248                none: 1,
249                spaces: 1,
250                tabs: 1,
251                mixed: 1,
252            }
253        );
254    }
255
256    #[test]
257    fn test_write_new_file_tabs_round_down() {
258        let mut input = "\na\n  b\n     c\n".as_bytes();
259        let mut output = Vec::new();
260        let bol_info =
261            write_new_bols(&mut input, &mut output, BeginningOfLine::Tabs(2, true)).unwrap();
262
263        assert_eq!(
264            bol_info,
265            BolInfo {
266                none: 2,
267                spaces: 0,
268                tabs: 2,
269                mixed: 0
270            }
271        );
272        assert_eq!(String::from_utf8(output).unwrap(), "\na\n\tb\n\t\tc\n");
273    }
274
275    #[test]
276    fn test_write_new_file_tabs_no_round_down() {
277        let mut input = "\na\n  b\n     c\n".as_bytes();
278        let mut output = Vec::new();
279        let bol_info =
280            write_new_bols(&mut input, &mut output, BeginningOfLine::Tabs(2, false)).unwrap();
281
282        assert_eq!(
283            bol_info,
284            BolInfo {
285                none: 2,
286                spaces: 0,
287                tabs: 1,
288                mixed: 1
289            }
290        );
291        assert_eq!(String::from_utf8(output).unwrap(), "\na\n\tb\n\t\t c\n");
292    }
293
294    #[test]
295    fn test_write_new_file_spaces() {
296        let mut input = "\ta\n \t x\n\t\t\n".as_bytes();
297        let mut output = Vec::new();
298        let bol_info = write_new_bols(&mut input, &mut output, BeginningOfLine::Spaces(2)).unwrap();
299
300        assert_eq!(
301            bol_info,
302            BolInfo {
303                none: 0,
304                spaces: 3,
305                tabs: 0,
306                mixed: 0
307            }
308        );
309        assert_eq!(String::from_utf8(output).unwrap(), "  a\n   x\n    \n");
310    }
311}