whitespace_rs/
ender.rs

1//! Report on or fix line endings.
2//!
3//! To find out the line endings given a [`Read`] trait object use [`read_eol_info()`]:
4//!
5//! ```
6//! use std::error::Error;
7//! use std::fs::File;
8//! use whitespace_rs::ender;
9//!
10//! fn main() -> Result<(), Box<dyn Error>> {
11//!   let mut reader = "abc\n\r\r\n".as_bytes();
12//!   let eol_info = ender::read_eol_info(&mut reader)?;
13//!
14//!   println!("{:?}", eol_info);
15//!   Ok(())
16//! }
17//! ```
18//!
19//! To normalize line endings given a [`Read`] trait object, create a [`Write`] trait object and
20//! use [`write_new_eols()`]:
21//!
22//! ```
23//! use std::error::Error;
24//! use std::fs::File;
25//! use whitespace_rs::ender;
26//!
27//! fn main() -> Result<(), Box<dyn Error>> {
28//!   let mut reader = "abc\n\r\r\n".as_bytes();
29//!   let mut writer = Vec::new();
30//!   let num_lines = ender::write_new_eols(&mut reader, &mut writer, ender::EndOfLine::Lf)?;
31//!
32//!   println!("{}", num_lines);
33//!   Ok(())
34//! }
35//! ```
36
37use clap::ValueEnum;
38use std::error::Error;
39use std::io::{Read, Write};
40use utf8_decode::UnsafeDecoder;
41
42// {grcov-excl-start}
43#[derive(PartialEq, Debug, Clone, Copy, ValueEnum)]
44/// Types of line endings.
45pub enum EndOfLine {
46    /// Carriage return.
47    Cr,
48    /// Line feed.
49    Lf,
50    /// Carriage return and line feed.
51    CrLf,
52}
53// {grcov-excl-end}
54
55/// File line information.
56#[derive(Debug, PartialEq)]
57pub struct EolInfo {
58    /// Number of lines that end in carriage return
59    pub cr: usize,
60    /// Number of lines that end in line feeds
61    pub lf: usize,
62    /// Number of lines that end in carriage return/line feed
63    pub crlf: usize,
64    /// Total number of lines in the file (includes lines with no ending)
65    pub num_lines: usize,
66}
67
68impl Eq for EolInfo {}
69
70impl EolInfo {
71    /// Get the most common end-of-line based on the info.
72    pub fn get_common_eol(self: &Self) -> EndOfLine {
73        let mut n = self.lf;
74        let mut eol = EndOfLine::Lf;
75
76        if self.crlf > n {
77            n = self.crlf;
78            eol = EndOfLine::CrLf;
79        }
80
81        if self.cr > n {
82            eol = EndOfLine::Cr;
83        }
84
85        eol
86    }
87
88    pub fn num_endings(self: &Self) -> usize {
89        (self.cr > 0) as usize + (self.lf > 0) as usize + (self.crlf > 0) as usize
90    }
91}
92
93/// Read end-of-line information for a file.
94pub fn read_eol_info(reader: &mut dyn Read) -> Result<EolInfo, Box<dyn Error>> {
95    let mut eol_info = EolInfo {
96        cr: 0,
97        lf: 0,
98        crlf: 0,
99        num_lines: 1,
100    };
101    let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
102
103    loop {
104        let c;
105        match decoder.next() {
106            Some(value) => c = value?,
107            None => break,
108        };
109        if c == '\r' {
110            if matches!(decoder.peek(), Some(Ok(c)) if *c == '\n') {
111                eol_info.crlf += 1;
112                decoder.next();
113            } else {
114                eol_info.cr += 1;
115            }
116
117            eol_info.num_lines += 1;
118        } else if c == '\n' {
119            eol_info.lf += 1;
120            eol_info.num_lines += 1;
121        }
122    }
123
124    Ok(eol_info)
125}
126
127/// Write input file out with new end-of-lines.
128pub fn write_new_eols(
129    reader: &mut dyn Read,
130    writer: &mut dyn Write,
131    new_eol: EndOfLine,
132) -> Result<usize, Box<dyn Error>> {
133    let mut num_lines = 1;
134    let newline_chars = match new_eol {
135        EndOfLine::Cr => "\r".as_bytes(),
136        EndOfLine::Lf => "\n".as_bytes(),
137        EndOfLine::CrLf => "\r\n".as_bytes(),
138    };
139    let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
140    let mut buf = [0u8; 4];
141
142    loop {
143        let c;
144
145        match decoder.next() {
146            Some(value) => c = value?,
147            None => break,
148        };
149        if c == '\r' {
150            if matches!(decoder.peek(), Some(Ok(c)) if *c == '\n') {
151                decoder.next();
152            }
153
154            num_lines += 1;
155            writer.write(newline_chars)?;
156        } else if c == '\n' {
157            num_lines += 1;
158            writer.write(newline_chars)?;
159        } else {
160            writer.write(c.encode_utf8(&mut buf).as_bytes())?;
161        }
162    }
163    writer.flush()?;
164
165    Ok(num_lines)
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171
172    #[test]
173    fn test_read_eol_info_lf() {
174        let eol_info = read_eol_info(&mut "\n".as_bytes()).unwrap();
175
176        assert_eq!(
177            eol_info,
178            EolInfo {
179                cr: 0,
180                lf: 1,
181                crlf: 0,
182                num_lines: 2,
183            }
184        );
185    }
186
187    #[test]
188    fn test_read_eol_info_cr() {
189        let eol_info = read_eol_info(&mut "\r".as_bytes()).unwrap();
190
191        assert_eq!(
192            eol_info,
193            EolInfo {
194                cr: 1,
195                lf: 0,
196                crlf: 0,
197                num_lines: 2,
198            }
199        );
200    }
201
202    #[test]
203    fn test_read_eol_info_crlf() {
204        let eol_info = read_eol_info(&mut "\r\n".as_bytes()).unwrap();
205
206        assert_eq!(
207            eol_info,
208            EolInfo {
209                cr: 0,
210                lf: 0,
211                crlf: 1,
212                num_lines: 2,
213            }
214        );
215    }
216
217    #[test]
218    fn test_read_eol_info_mixed1() {
219        let eol_info = read_eol_info(&mut "\n\r\n\r".as_bytes()).unwrap();
220
221        assert_eq!(
222            eol_info,
223            EolInfo {
224                cr: 1,
225                lf: 1,
226                crlf: 1,
227                num_lines: 4,
228            }
229        );
230    }
231
232    #[test]
233    fn test_write_new_file() {
234        let mut input = "abc\n\r\r\n".as_bytes();
235        let mut output = Vec::new();
236        let num_lines = write_new_eols(&mut input, &mut output, EndOfLine::CrLf).unwrap();
237
238        assert_eq!(num_lines, 4);
239        assert_eq!(String::from_utf8(output).unwrap(), "abc\r\n\r\n\r\n")
240    }
241}