whitespace_rs/
spacer.rs

1//! Report on or fix beginning of line spacing
2//!
3//! To find out the line beginnings given a [`Read`] trait object use [`read_bol_info()`]:
4//!
5//! ```
6//! use std::error::Error;
7//! use std::fs::File;
8//! use whitespace_rs::spacer;
9//!
10//! fn main() -> Result<(), Box<dyn Error>> {
11//!   let mut reader = "abc\n\r\r\n".as_bytes();
12//!   let bol_info = spacer::read_bol_info(&mut reader)?;
13//!
14//!   println!("{:?}", bol_info);
15//!   Ok(())
16//! }
17//! ```
18//!
19//! To normalize line beginnings given a [`Read`] trait object, create a [`Write`] trait object and use [`write_new_bols()`]:
20//!
21//! ```
22//! use std::error::Error;
23//! use std::fs::File;
24//! use whitespace_rs::spacer;
25//!
26//! fn main() -> Result<(), Box<dyn Error>> {
27//!   let mut reader = "abc\n\r\r\n".as_bytes();
28//!   let mut writer = Vec::new();
29//!   let bol_info = spacer::write_new_bols(&mut reader, &mut writer, spacer::BeginningOfLine::Tabs(2, true))?;
30//!
31//!   println!("{:?}", bol_info);
32//!   Ok(())
33//! }
34//! ```
35
36use std::cmp::max;
37use std::error::Error;
38use std::io::{Read, Write};
39use utf8_decode::UnsafeDecoder;
40
41// {grcov-excl-start}
42#[derive(Debug, PartialEq)]
43/// Types of line beginnings
44pub enum BeginningOfLine {
45  /// Tabs (and spaces if not rounding down extra spaces)
46  Tabs(usize, bool),
47  /// Spaces
48  Spaces(usize),
49}
50// {grcov-excl-end}
51
52#[derive(Debug, PartialEq)]
53/// Information about line beginnings in the file
54pub struct BolInfo {
55  /// Number of lines that have no whitespace at the beginning
56  pub none: usize,
57  /// Number of all space line beginnings
58  pub spaces: usize,
59  /// Number of all tab line beginnings
60  pub tabs: usize,
61  /// Number of mixed space/tab line beginnings
62  pub mixed: usize,
63}
64
65impl Eq for BolInfo {}
66
67impl BolInfo {
68  /// Get the most common beginning of line type in the file
69  pub fn get_common_bol(self: &Self, tab_size: usize, round_down: bool) -> BeginningOfLine {
70    if self.tabs > self.spaces {
71      BeginningOfLine::Tabs(tab_size, round_down)
72    } else {
73      BeginningOfLine::Spaces(tab_size)
74    }
75  }
76}
77
78/// Read beginning of line information
79pub fn read_bol_info(reader: &mut dyn Read) -> Result<BolInfo, Box<dyn Error>> {
80  let mut bol_info = BolInfo {
81    none: 0,
82    spaces: 0,
83    tabs: 0,
84    mixed: 0,
85  };
86  let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
87  let mut at_bol = true;
88  let (mut num_spaces, mut num_tabs) = (0, 0);
89
90  loop {
91    let c;
92    match decoder.next() {
93      Some(value) => c = value?,
94      None => break,
95    };
96
97    if at_bol {
98      if c == ' ' {
99        num_spaces += 1;
100      } else if c == '\t' {
101        num_tabs += 1;
102      } else {
103        if num_spaces == 0 && num_tabs == 0 {
104          bol_info.none += 1;
105        } else if num_spaces > 0 && num_tabs > 0 {
106          bol_info.mixed += 1;
107        } else if num_spaces > 0 {
108          bol_info.spaces += 1;
109        } else {
110          bol_info.tabs += 1;
111        }
112        at_bol = false;
113      }
114    } else if c == '\n' {
115      num_spaces = 0;
116      num_tabs = 0;
117      at_bol = true;
118    }
119  }
120
121  Ok(bol_info)
122}
123
124/// Write input file out with new beginning-of-lines
125pub fn write_new_bols(
126  reader: &mut dyn Read,
127  writer: &mut dyn Write,
128  new_bol: BeginningOfLine,
129) -> Result<BolInfo, Box<dyn Error>> {
130  let (tab_size, round_down) = match new_bol {
131    BeginningOfLine::Spaces(tab_size) => (max(1, tab_size), false),
132    BeginningOfLine::Tabs(tab_size, round_down) => (max(1, tab_size), round_down),
133  };
134  let mut bol_info = BolInfo {
135    none: 0,
136    spaces: 0,
137    tabs: 0,
138    mixed: 0,
139  };
140  let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
141  let mut buf = [0u8; 4];
142  let mut s = String::new();
143  let mut at_bol = true;
144  let untabify = |s: &str| -> String {
145    let mut t = String::new();
146
147    for c in s.chars() {
148      if c == '\t' {
149        t.push_str(&" ".repeat(tab_size - (t.len() % tab_size)));
150      } else {
151        t.push(c);
152      }
153    }
154
155    t
156  };
157  let tabify = |s: &str| -> (_, _) {
158    let mut num_spaces = 0;
159    let mut t = String::new();
160
161    for c in s.chars() {
162      if c == ' ' {
163        num_spaces += 1;
164      }
165
166      if num_spaces % tab_size == 0 {
167        t.push('\t');
168        num_spaces = 0
169      }
170    }
171
172    if num_spaces > 0 {
173      if !round_down {
174        t.push_str(&" ".repeat(num_spaces));
175      } else {
176        num_spaces = 0;
177      }
178    }
179
180    (t, num_spaces)
181  };
182
183  loop {
184    let c;
185
186    match decoder.next() {
187      Some(value) => c = value?,
188      None => break,
189    };
190    if at_bol {
191      if c == ' ' || c == '\t' {
192        s.push(c);
193      } else {
194        if s.len() == 0 {
195          bol_info.none += 1
196        } else {
197          s = untabify(&s);
198
199          if let BeginningOfLine::Tabs(_, _) = new_bol {
200            let (t, num_spaces) = tabify(&s);
201
202            s = t;
203            if num_spaces > 0 {
204              bol_info.mixed += 1;
205            } else {
206              bol_info.tabs += 1;
207            }
208          } else {
209            bol_info.spaces += 1;
210          }
211
212          writer.write(s.as_bytes())?;
213        }
214
215        writer.write(c.encode_utf8(&mut buf).as_bytes())?;
216
217        if c == '\n' {
218          s.clear();
219        } else {
220          at_bol = false;
221        }
222      }
223    } else {
224      writer.write(c.encode_utf8(&mut buf).as_bytes())?;
225
226      if c == '\n' {
227        s.clear();
228        at_bol = true;
229      }
230    }
231  }
232  writer.flush()?;
233
234  Ok(bol_info)
235}
236
237#[cfg(test)]
238mod tests {
239  use super::*;
240
241  #[test]
242  fn test_read_bol_info() {
243    let bol_info = read_bol_info(&mut "a\n\tb\n  c\n \td\n".as_bytes()).unwrap();
244
245    assert_eq!(
246      bol_info,
247      BolInfo {
248        none: 1,
249        spaces: 1,
250        tabs: 1,
251        mixed: 1,
252      }
253    );
254  }
255
256  #[test]
257  fn test_write_new_file_tabs_round_down() {
258    let mut input = "\na\n  b\n     c\n".as_bytes();
259    let mut output = Vec::new();
260    let bol_info = write_new_bols(&mut input, &mut output, BeginningOfLine::Tabs(2, true)).unwrap();
261
262    assert_eq!(
263      bol_info,
264      BolInfo {
265        none: 2,
266        spaces: 0,
267        tabs: 2,
268        mixed: 0
269      }
270    );
271    assert_eq!(String::from_utf8(output).unwrap(), "\na\n\tb\n\t\tc\n");
272  }
273
274  #[test]
275  fn test_write_new_file_tabs_no_round_down() {
276    let mut input = "\na\n  b\n     c\n".as_bytes();
277    let mut output = Vec::new();
278    let bol_info =
279      write_new_bols(&mut input, &mut output, BeginningOfLine::Tabs(2, false)).unwrap();
280
281    assert_eq!(
282      bol_info,
283      BolInfo {
284        none: 2,
285        spaces: 0,
286        tabs: 1,
287        mixed: 1
288      }
289    );
290    assert_eq!(String::from_utf8(output).unwrap(), "\na\n\tb\n\t\t c\n");
291  }
292
293  #[test]
294  fn test_write_new_file_spaces() {
295    let mut input = "\ta\n \t x\n\t\t\n".as_bytes();
296    let mut output = Vec::new();
297    let bol_info = write_new_bols(&mut input, &mut output, BeginningOfLine::Spaces(2)).unwrap();
298
299    assert_eq!(
300      bol_info,
301      BolInfo {
302        none: 0,
303        spaces: 3,
304        tabs: 0,
305        mixed: 0
306      }
307    );
308    assert_eq!(String::from_utf8(output).unwrap(), "  a\n   x\n    \n");
309  }
310}