frep_core/
line_reader.rs

1use std::io::BufRead;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
4pub enum LineEnding {
5    /// No line ending (typically the last line of a file)
6    None,
7    /// Unix/Linux/macOS line ending (`\n`)
8    Lf,
9    /// Windows line ending (`\r\n`)
10    CrLf,
11}
12
13impl LineEnding {
14    #[inline]
15    pub fn as_str(self) -> &'static str {
16        match self {
17            LineEnding::None => "",
18            LineEnding::Lf => "\n",
19            LineEnding::CrLf => "\r\n",
20        }
21    }
22
23    #[inline]
24    pub fn as_bytes(self) -> &'static [u8] {
25        match self {
26            LineEnding::None => b"",
27            LineEnding::Lf => b"\n",
28            LineEnding::CrLf => b"\r\n",
29        }
30    }
31}
32
33/// An iterator that reads lines from a `BufRead` source while preserving line endings.
34///
35/// Unlike the standard library's `lines()` iterator which strips line endings,
36/// this iterator returns tuples of `(content, line_ending)` where the content is
37/// returned as bytes and the line ending is preserved as a separate enum value.
38///
39/// Callers are responsible for UTF-8 validation if they need to work with the content
40/// as text. When the content is known to be valid UTF-8, it can be converted using
41/// `String::from_utf8()` or `String::from_utf8_lossy()`.
42pub struct LinesSplitEndings<R> {
43    reader: R,
44    buffer: Vec<u8>,
45}
46
47impl<R: BufRead> LinesSplitEndings<R> {
48    /// Creates a new `LinesSplitEndings` iterator from any type that implements `BufRead`.
49    pub fn new(reader: R) -> Self {
50        Self {
51            reader,
52            buffer: vec![],
53        }
54    }
55}
56
57impl<R: BufRead> Iterator for LinesSplitEndings<R> {
58    type Item = std::io::Result<(Vec<u8>, LineEnding)>;
59
60    fn next(&mut self) -> Option<Self::Item> {
61        self.buffer.clear();
62        match self.reader.read_until(b'\n', &mut self.buffer) {
63            Ok(0) => None, // EOF
64            Ok(_) => {
65                let (content, ending) = split_line_ending(&self.buffer);
66                Some(Ok((content.to_vec(), ending)))
67            }
68            Err(e) => Some(Err(e)),
69        }
70    }
71}
72
73/// Extension trait that adds the `lines_with_endings()` method to any `BufRead` implementation.
74///
75/// # Examples
76///
77/// ```
78/// use std::io::Cursor;
79/// use frep_core::line_reader::BufReadExt;
80///
81/// let cursor = Cursor::new(b"hello\nworld\r\n");
82///
83/// for line_result in cursor.lines_with_endings() {
84///     let (content, ending) = line_result?;
85///     println!("Content: '{}', Ending: '{:?}'", String::from_utf8_lossy(&content), ending);
86/// }
87/// # Ok::<(), std::io::Error>(())
88/// ```
89pub trait BufReadExt: BufRead {
90    /// Returns an iterator that yields lines with their endings preserved.
91    ///
92    /// Each item yielded by the iterator is a `Result<(Vec<u8>, LineEnding), io::Error>`
93    /// where the first element is the line content as bytes and the second is the line ending type.
94    fn lines_with_endings(self) -> LinesSplitEndings<Self>
95    where
96        Self: Sized,
97    {
98        LinesSplitEndings::new(self)
99    }
100}
101
102impl<R: BufRead> BufReadExt for R {}
103
104/// Splits a line into its content and line ending parts.
105///
106/// # Examples
107///
108/// ```
109/// use frep_core::line_reader::{split_line_ending, LineEnding};
110///
111/// assert_eq!(split_line_ending(b"hello\n"), (&b"hello"[..], LineEnding::Lf));
112/// assert_eq!(split_line_ending(b"hello\r\n"), (&b"hello"[..], LineEnding::CrLf));
113/// assert_eq!(split_line_ending(b"hello"), (&b"hello"[..], LineEnding::None));
114/// ```
115#[inline]
116pub fn split_line_ending(line: &[u8]) -> (&[u8], LineEnding) {
117    let len = line.len();
118    if len == 0 {
119        return (line, LineEnding::None);
120    }
121
122    if line[len - 1] == b'\n' {
123        if len >= 2 && line[len - 2] == b'\r' {
124            (&line[..len - 2], LineEnding::CrLf)
125        } else {
126            (&line[..len - 1], LineEnding::Lf)
127        }
128    } else {
129        (line, LineEnding::None)
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use std::io::Cursor;
137
138    #[test]
139    fn test_split_line_ending_empty() {
140        assert_eq!(split_line_ending(b""), ("".as_bytes(), LineEnding::None));
141    }
142
143    #[test]
144    fn test_split_line_ending_no_ending() {
145        assert_eq!(
146            split_line_ending(b"hello world"),
147            ("hello world".as_bytes(), LineEnding::None)
148        );
149    }
150
151    #[test]
152    fn test_split_line_ending_lf() {
153        assert_eq!(
154            split_line_ending("hello\n".as_bytes()),
155            ("hello".as_bytes(), LineEnding::Lf)
156        );
157        assert_eq!(
158            split_line_ending("\n".as_bytes()),
159            ("".as_bytes(), LineEnding::Lf)
160        );
161    }
162
163    #[test]
164    fn test_split_line_ending_crlf() {
165        assert_eq!(
166            split_line_ending("hello\r\n".as_bytes()),
167            ("hello".as_bytes(), LineEnding::CrLf)
168        );
169        assert_eq!(
170            split_line_ending("\r\n".as_bytes()),
171            ("".as_bytes(), LineEnding::CrLf)
172        );
173    }
174
175    #[test]
176    fn test_split_line_ending_unicode() {
177        assert_eq!(
178            split_line_ending("héllo 世界\n".as_bytes()),
179            ("héllo 世界".as_bytes(), LineEnding::Lf)
180        );
181        assert_eq!(
182            split_line_ending("héllo 世界\r\n".as_bytes()),
183            ("héllo 世界".as_bytes(), LineEnding::CrLf)
184        );
185    }
186
187    #[test]
188    fn test_lines_split_endings_empty() {
189        let cursor = Cursor::new("");
190        let mut lines = LinesSplitEndings::new(cursor);
191        assert!(lines.next().is_none());
192    }
193
194    #[test]
195    fn test_lines_split_endings_single_line_no_ending() {
196        let cursor = Cursor::new("hello");
197        let mut lines = LinesSplitEndings::new(cursor);
198
199        let result = lines.next().unwrap().unwrap();
200        assert_eq!(String::from_utf8(result.0).unwrap(), "hello");
201        assert_eq!(result.1, LineEnding::None);
202
203        assert!(lines.next().is_none());
204    }
205
206    #[test]
207    fn test_lines_split_endings_single_line_with_lf() {
208        let cursor = Cursor::new("hello\n");
209        let mut lines = LinesSplitEndings::new(cursor);
210
211        let result = lines.next().unwrap().unwrap();
212        assert_eq!(String::from_utf8(result.0).unwrap(), "hello");
213        assert_eq!(result.1, LineEnding::Lf);
214
215        assert!(lines.next().is_none());
216    }
217
218    #[test]
219    fn test_lines_split_endings_multiple_lines_mixed() {
220        let cursor = Cursor::new("line1\nline2\r\nline3\n\nline5");
221        let mut lines = LinesSplitEndings::new(cursor);
222
223        let result1 = lines.next().unwrap().unwrap();
224        assert_eq!(String::from_utf8(result1.0).unwrap(), "line1");
225        assert_eq!(result1.1, LineEnding::Lf);
226
227        let result2 = lines.next().unwrap().unwrap();
228        assert_eq!(String::from_utf8(result2.0).unwrap(), "line2");
229        assert_eq!(result2.1, LineEnding::CrLf);
230
231        let result3 = lines.next().unwrap().unwrap();
232        assert_eq!(String::from_utf8(result3.0).unwrap(), "line3");
233        assert_eq!(result3.1, LineEnding::Lf);
234
235        let result4 = lines.next().unwrap().unwrap();
236        assert_eq!(String::from_utf8(result4.0).unwrap(), "");
237        assert_eq!(result4.1, LineEnding::Lf);
238
239        let result5 = lines.next().unwrap().unwrap();
240        assert_eq!(String::from_utf8(result5.0).unwrap(), "line5");
241        assert_eq!(result5.1, LineEnding::None);
242
243        assert!(lines.next().is_none());
244    }
245
246    #[test]
247    fn test_lines_split_endings_empty_lines() {
248        let cursor = Cursor::new("\n\r\n\r\n");
249        let mut lines = LinesSplitEndings::new(cursor);
250
251        let result1 = lines.next().unwrap().unwrap();
252        assert_eq!(String::from_utf8(result1.0).unwrap(), "");
253        assert_eq!(result1.1, LineEnding::Lf);
254
255        let result2 = lines.next().unwrap().unwrap();
256        assert_eq!(String::from_utf8(result2.0).unwrap(), "");
257        assert_eq!(result2.1, LineEnding::CrLf);
258
259        let result3 = lines.next().unwrap().unwrap();
260        assert_eq!(String::from_utf8(result3.0).unwrap(), "");
261        assert_eq!(result3.1, LineEnding::CrLf);
262
263        assert!(lines.next().is_none());
264    }
265
266    #[test]
267    fn test_buf_read_ext_trait() {
268        let cursor = Cursor::new("hello\nworld\r\n");
269        let mut lines = cursor.lines_with_endings();
270
271        let result1 = lines.next().unwrap().unwrap();
272        assert_eq!(String::from_utf8(result1.0).unwrap(), "hello");
273        assert_eq!(result1.1, LineEnding::Lf);
274
275        let result2 = lines.next().unwrap().unwrap();
276        assert_eq!(String::from_utf8(result2.0).unwrap(), "world");
277        assert_eq!(result2.1, LineEnding::CrLf);
278
279        assert!(lines.next().is_none());
280    }
281
282    #[test]
283    fn test_large_line() {
284        let content = "a".repeat(10000);
285        let line = format!("{content}\n");
286        let cursor = Cursor::new(line);
287        let mut lines = LinesSplitEndings::new(cursor);
288
289        let result = lines.next().unwrap().unwrap();
290        assert_eq!(String::from_utf8(result.0).unwrap(), content);
291        assert_eq!(result.1, LineEnding::Lf);
292
293        assert!(lines.next().is_none());
294    }
295
296    #[test]
297    fn test_unicode_content() {
298        let content = "Hello 世界 🦀 Rust";
299        let line = format!("{content}\r\n");
300        let cursor = Cursor::new(line);
301        let mut lines = LinesSplitEndings::new(cursor);
302
303        let result = lines.next().unwrap().unwrap();
304        assert_eq!(String::from_utf8(result.0).unwrap(), content);
305        assert_eq!(result.1, LineEnding::CrLf);
306
307        assert!(lines.next().is_none());
308    }
309}