1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
//! Report on or fix line endings.
//!
//! To find out the line endings given a [`Read`] trait object use [`read_eol_info()`]:
//!
//! ```
//! use std::error::Error;
//! use std::fs::File;
//! use whitespace_rs::ender;
//!
//! fn main() -> Result<(), Box<dyn Error>> {
//!   let mut reader = "abc\n\r\r\n".as_bytes();
//!   let eol_info = ender::read_eol_info(&mut reader)?;
//!
//!   println!("{:?}", eol_info);
//!   Ok(())
//! }
//! ```
//!
//! To normalize line endings given a [`Read`] trait object, create a [`Write`] trait object and
//! use [`write_new_eols()`]:
//!
//! ```
//! use std::error::Error;
//! use std::fs::File;
//! use whitespace_rs::ender;
//!
//! fn main() -> Result<(), Box<dyn Error>> {
//!   let mut reader = "abc\n\r\r\n".as_bytes();
//!   let mut writer = Vec::new();
//!   let num_lines = ender::write_new_eols(&mut reader, &mut writer, ender::EndOfLine::Lf)?;
//!
//!   println!("{}", num_lines);
//!   Ok(())
//! }
//! ```

use std::error::Error;
use std::io::{Read, Write};
use utf8_decode::UnsafeDecoder;

// {grcov-excl-start}
#[derive(PartialEq, Debug, Clone, Copy)]
/// Types of line endings.
pub enum EndOfLine {
  /// Carriage return.
  Cr,
  /// Line feed.
  Lf,
  /// Carriage return and line feed.
  CrLf,
}
// {grcov-excl-end}

/// File line information.
#[derive(Debug, PartialEq)]
pub struct EolInfo {
  pub cr: usize,
  pub lf: usize,
  pub crlf: usize,
  pub num_lines: usize,
  pub num_endings: usize,
}

impl Eq for EolInfo {}

impl EolInfo {
  /// Get the most common end-of-line based on the info.
  pub fn get_common_eol(self: Self) -> EndOfLine {
    let mut n = self.lf;
    let mut eol = EndOfLine::Lf;

    if self.crlf > n {
      n = self.crlf;
      eol = EndOfLine::CrLf;
    }

    if self.cr > n {
      eol = EndOfLine::Cr;
    }

    eol
  }
}

/// Read end-of-line information for a file.
pub fn read_eol_info(reader: &mut dyn Read) -> Result<EolInfo, Box<dyn Error>> {
  let mut eol_info = EolInfo {
    cr: 0,
    lf: 0,
    crlf: 0,
    num_endings: 0,
    num_lines: 1,
  };
  let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();

  loop {
    let c;
    match decoder.next() {
      Some(value) => c = value?,
      None => break,
    };
    if c == '\r' {
      if matches!(decoder.peek(), Some(Ok(c)) if *c == '\n') {
        eol_info.crlf += 1;
        decoder.next();
      } else {
        eol_info.cr += 1;
      }

      eol_info.num_lines += 1;
    } else if c == '\n' {
      eol_info.lf += 1;
      eol_info.num_lines += 1;
    }
  }

  eol_info.num_endings =
    (eol_info.cr > 0) as usize + (eol_info.lf > 0) as usize + (eol_info.crlf > 0) as usize;

  Ok(eol_info)
}

/// Write input file out with new end-of-lines.
pub fn write_new_eols(
  reader: &mut dyn Read,
  writer: &mut dyn Write,
  new_eol: EndOfLine,
) -> Result<usize, Box<dyn Error>> {
  let mut num_lines = 1;
  let newline_chars = match new_eol {
    EndOfLine::Cr => "\r".as_bytes(),
    EndOfLine::Lf => "\n".as_bytes(),
    EndOfLine::CrLf => "\r\n".as_bytes(),
  };
  let mut decoder = UnsafeDecoder::new(reader.bytes()).peekable();
  let mut buf = [0u8; 4];

  loop {
    let c;

    match decoder.next() {
      Some(value) => c = value?,
      None => break,
    };
    if c == '\r' {
      if matches!(decoder.peek(), Some(Ok(c)) if *c == '\n') {
        decoder.next();
      }

      num_lines += 1;
      writer.write(newline_chars)?;
    } else if c == '\n' {
      num_lines += 1;
      writer.write(newline_chars)?;
    } else {
      writer.write(c.encode_utf8(&mut buf).as_bytes())?;
    }
  }
  writer.flush()?;

  Ok(num_lines)
}

#[cfg(test)]
mod tests {
  use super::*;

  #[test]
  fn test_read_eol_info_lf() {
    let eol_info = read_eol_info(&mut "\n".as_bytes()).unwrap();

    assert_eq!(
      eol_info,
      EolInfo {
        cr: 0,
        lf: 1,
        crlf: 0,
        num_lines: 2,
        num_endings: 1
      }
    );
  }

  #[test]
  fn test_read_eol_info_cr() {
    let eol_info = read_eol_info(&mut "\r".as_bytes()).unwrap();

    assert_eq!(
      eol_info,
      EolInfo {
        cr: 1,
        lf: 0,
        crlf: 0,
        num_lines: 2,
        num_endings: 1
      }
    );
  }

  #[test]
  fn test_read_eol_info_crlf() {
    let eol_info = read_eol_info(&mut "\r\n".as_bytes()).unwrap();

    assert_eq!(
      eol_info,
      EolInfo {
        cr: 0,
        lf: 0,
        crlf: 1,
        num_lines: 2,
        num_endings: 1
      }
    );
  }

  #[test]
  fn test_read_eol_info_mixed1() {
    let eol_info = read_eol_info(&mut "\n\r\n\r".as_bytes()).unwrap();

    assert_eq!(
      eol_info,
      EolInfo {
        cr: 1,
        lf: 1,
        crlf: 1,
        num_lines: 4,
        num_endings: 3
      }
    );
  }

  #[test]
  fn test_write_new_file() {
    let mut input = "abc\n\r\r\n".as_bytes();
    let mut output = Vec::new();
    let num_lines = write_new_eols(&mut input, &mut output, EndOfLine::CrLf).unwrap();

    assert_eq!(num_lines, 4);
    assert_eq!(String::from_utf8(output).unwrap(), "abc\r\n\r\n\r\n")
  }
}