whitespace_rs/
ender.rs

1//! Report on or fix line endings.
2//!
3//! To find out the line endings given a [`Read`] trait object use [`read_eol_info()`]:
4//!
5//! ```
6//! use std::error::Error;
7//! use std::fs::File;
8//! use whitespace_rs::ender;
9//!
10//! fn main() -> Result<(), Box<dyn Error>> {
11//!   let mut reader = "abc\n\r\r\n".as_bytes();
12//!   let eol_info = ender::read_eol_info(&mut reader)?;
13//!
14//!   println!("{:?}", eol_info);
15//!   Ok(())
16//! }
17//! ```
18//!
19//! To normalize line endings given a [`Read`] trait object, create a [`Write`] trait object and
20//! use [`write_new_eols()`]:
21//!
22//! ```
23//! use std::error::Error;
24//! use std::fs::File;
25//! use whitespace_rs::ender;
26//!
27//! fn main() -> Result<(), Box<dyn Error>> {
28//!   let mut reader = "abc\n\r\r\n".as_bytes();
29//!   let mut writer = Vec::new();
30//!   let num_lines = ender::write_new_eols(&mut reader, &mut writer, ender::EndOfLine::Lf)?;
31//!
32//!   println!("{}", num_lines);
33//!   Ok(())
34//! }
35//! ```
36
37use std::error::Error;
38use std::io::{Read, Write};
39use utf8_decode::UnsafeDecoder;
40
41// {grcov-excl-start}
42#[derive(PartialEq, Debug, Clone, Copy)]
43/// Types of line endings.
44pub enum EndOfLine {
45  /// Carriage return.
46  Cr,
47  /// Line feed.
48  Lf,
49  /// Carriage return and line feed.
50  CrLf,
51}
52// {grcov-excl-end}
53
54/// File line information.
55#[derive(Debug, PartialEq)]
56pub struct EolInfo {
57  /// Number of lines that end in carriage return
58  pub cr: usize,
59  /// Number of lines that end in line feeds
60  pub lf: usize,
61  /// Number of lines that end in carriage return/line feed
62  pub crlf: usize,
63  /// Total number of lines in the file (includes lines with no ending)
64  pub num_lines: usize,
65}
66
67impl Eq for EolInfo {}
68
69impl EolInfo {
70  /// Get the most common end-of-line based on the info.
71  pub fn get_common_eol(self: &Self) -> EndOfLine {
72    let mut n = self.lf;
73    let mut eol = EndOfLine::Lf;
74
75    if self.crlf > n {
76      n = self.crlf;
77      eol = EndOfLine::CrLf;
78    }
79
80    if self.cr > n {
81      eol = EndOfLine::Cr;
82    }
83
84    eol
85  }
86
87  pub fn num_endings(self: &Self) -> usize {
88    (self.cr > 0) as usize + (self.lf > 0) as usize + (self.crlf > 0) as usize
89  }
90}
91
92/// Read end-of-line information for a file.
93pub fn read_eol_info(reader: &mut dyn Read) -> Result<EolInfo, Box<dyn Error>> {
94  let mut eol_info = EolInfo {
95    cr: 0,
96    lf: 0,
97    crlf: 0,
98    num_lines: 1,
99  };
100  let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
101
102  loop {
103    let c;
104    match decoder.next() {
105      Some(value) => c = value?,
106      None => break,
107    };
108    if c == '\r' {
109      if matches!(decoder.peek(), Some(Ok(c)) if *c == '\n') {
110        eol_info.crlf += 1;
111        decoder.next();
112      } else {
113        eol_info.cr += 1;
114      }
115
116      eol_info.num_lines += 1;
117    } else if c == '\n' {
118      eol_info.lf += 1;
119      eol_info.num_lines += 1;
120    }
121  }
122
123  Ok(eol_info)
124}
125
126/// Write input file out with new end-of-lines.
127pub fn write_new_eols(
128  reader: &mut dyn Read,
129  writer: &mut dyn Write,
130  new_eol: EndOfLine,
131) -> Result<usize, Box<dyn Error>> {
132  let mut num_lines = 1;
133  let newline_chars = match new_eol {
134    EndOfLine::Cr => "\r".as_bytes(),
135    EndOfLine::Lf => "\n".as_bytes(),
136    EndOfLine::CrLf => "\r\n".as_bytes(),
137  };
138  let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
139  let mut buf = [0u8; 4];
140
141  loop {
142    let c;
143
144    match decoder.next() {
145      Some(value) => c = value?,
146      None => break,
147    };
148    if c == '\r' {
149      if matches!(decoder.peek(), Some(Ok(c)) if *c == '\n') {
150        decoder.next();
151      }
152
153      num_lines += 1;
154      writer.write(newline_chars)?;
155    } else if c == '\n' {
156      num_lines += 1;
157      writer.write(newline_chars)?;
158    } else {
159      writer.write(c.encode_utf8(&mut buf).as_bytes())?;
160    }
161  }
162  writer.flush()?;
163
164  Ok(num_lines)
165}
166
167#[cfg(test)]
168mod tests {
169  use super::*;
170
171  #[test]
172  fn test_read_eol_info_lf() {
173    let eol_info = read_eol_info(&mut "\n".as_bytes()).unwrap();
174
175    assert_eq!(
176      eol_info,
177      EolInfo {
178        cr: 0,
179        lf: 1,
180        crlf: 0,
181        num_lines: 2,
182      }
183    );
184  }
185
186  #[test]
187  fn test_read_eol_info_cr() {
188    let eol_info = read_eol_info(&mut "\r".as_bytes()).unwrap();
189
190    assert_eq!(
191      eol_info,
192      EolInfo {
193        cr: 1,
194        lf: 0,
195        crlf: 0,
196        num_lines: 2,
197      }
198    );
199  }
200
201  #[test]
202  fn test_read_eol_info_crlf() {
203    let eol_info = read_eol_info(&mut "\r\n".as_bytes()).unwrap();
204
205    assert_eq!(
206      eol_info,
207      EolInfo {
208        cr: 0,
209        lf: 0,
210        crlf: 1,
211        num_lines: 2,
212      }
213    );
214  }
215
216  #[test]
217  fn test_read_eol_info_mixed1() {
218    let eol_info = read_eol_info(&mut "\n\r\n\r".as_bytes()).unwrap();
219
220    assert_eq!(
221      eol_info,
222      EolInfo {
223        cr: 1,
224        lf: 1,
225        crlf: 1,
226        num_lines: 4,
227      }
228    );
229  }
230
231  #[test]
232  fn test_write_new_file() {
233    let mut input = "abc\n\r\r\n".as_bytes();
234    let mut output = Vec::new();
235    let num_lines = write_new_eols(&mut input, &mut output, EndOfLine::CrLf).unwrap();
236
237    assert_eq!(num_lines, 4);
238    assert_eq!(String::from_utf8(output).unwrap(), "abc\r\n\r\n\r\n")
239  }
240}