reverse_lines/
lib.rs

1//! ### ReverseLines
2//!
3//! This library provides a small Rust Iterator for reading files or anything that implements
4//! `std::io::Seek` and `std::io::Read` in reverse.
5//!
6//! It is a rework of [rev_lines](https://docs.rs/rev_lines/latest/rev_lines/) with improved error
7//! handling and allowance for more types.
8//!
9//! #### Example
10//!
11//! ```
12//!  extern crate reverse_lines;
13//!
14//!  use reverse_lines::ReverseLines;
15//!  use std::io::BufReader;
16//!  use std::fs::File;
17//!
18//!  fn main() {
19//!      let file = File::open("tests/multi_line_file").unwrap();
20//!      let reverse_lines = ReverseLines::new(BufReader::new(file)).unwrap();
21//!
22//!      for line in reverse_lines {
23//!          println!("{}", line.unwrap());
24//!      }
25//!  }
26//! ```
27//!
28//! If a line with invalid UTF-8 is encountered, or if there is an I/O error, the iterator will
29//! yield an `std::io::Error`.
30//!
31//! This method uses logic borrowed from [uutils/coreutils
32//! tail](https://github.com/uutils/coreutils/blob/f2166fed0ad055d363aedff6223701001af090d3/src/tail/tail.rs#L399-L402)
33//! and code borrowed from [rev_lines](https://docs.rs/rev_lines/latest/rev_lines/).
34
35use std::cmp::min;
36use std::io::{Error, ErrorKind, Read, Result, Seek, SeekFrom};
37use std::iter::FusedIterator;
38
39#[cfg(test)]
40#[macro_use]
41extern crate assert_matches;
42
43const DEFAULT_SIZE: usize = 4096;
44
45const LF_BYTE: u8 = b'\n';
46const CR_BYTE: u8 = b'\r';
47
48/// `ReverseLines` struct
49pub struct ReverseLines<R: Seek + Read> {
50    reader: R,
51    reader_pos: u64,
52    buf_size: u64,
53    is_error: bool,
54}
55
56impl<R: Seek + Read> ReverseLines<R> {
57    /// Create a new `ReverseLines` struct from a `<R>`. Internal
58    /// buffering for iteration will default to 4096 bytes at a time.
59    pub fn new(reader: R) -> Result<ReverseLines<R>> {
60        ReverseLines::with_capacity(DEFAULT_SIZE, reader)
61    }
62
63    /// Create a new `ReverseLines` struct from a `<R>`. Interal
64    /// buffering for iteration will use `cap` bytes at a time.
65    pub fn with_capacity(cap: usize, mut reader: R) -> Result<ReverseLines<R>> {
66        // Seek to end of reader now
67        let reader_size = reader.seek(SeekFrom::End(0))?;
68
69        let mut reverse_lines = ReverseLines {
70            reader,
71            reader_pos: reader_size,
72            buf_size: cap as u64,
73            is_error: false,
74        };
75
76        // Handle any trailing new line characters for the reader
77        // so the first next call does not return Some("")
78
79        // Read at most 2 bytes
80        let end_size = min(reader_size, 2);
81        let end_buf = reverse_lines.read_to_buffer(end_size)?;
82
83        if end_size == 1 {
84            if end_buf[0] != LF_BYTE {
85                reverse_lines.move_reader_position(1)?;
86            }
87        } else if end_size == 2 {
88            if end_buf[0] != CR_BYTE {
89                reverse_lines.move_reader_position(1)?;
90            }
91
92            if end_buf[1] != LF_BYTE {
93                reverse_lines.move_reader_position(1)?;
94            }
95        }
96
97        Ok(reverse_lines)
98    }
99
100    fn read_to_buffer(&mut self, size: u64) -> Result<Vec<u8>> {
101        let mut buf = vec![0; size as usize];
102        let offset = -(size as i64);
103
104        self.reader.seek(SeekFrom::Current(offset))?;
105        self.reader.read_exact(&mut buf[0..(size as usize)])?;
106        self.reader.seek(SeekFrom::Current(offset))?;
107
108        self.reader_pos -= size;
109
110        Ok(buf)
111    }
112
113    fn move_reader_position(&mut self, offset: u64) -> Result<()> {
114        self.reader.seek(SeekFrom::Current(offset as i64))?;
115        self.reader_pos += offset;
116
117        Ok(())
118    }
119}
120
121impl<R: Read + Seek> Iterator for ReverseLines<R> {
122    type Item = Result<String>;
123
124    fn next(&mut self) -> Option<Self::Item> {
125        if self.is_error {
126            return None;
127        }
128
129        let mut result: Vec<u8> = Vec::new();
130
131        'outer: loop {
132            if self.reader_pos < 1 {
133                if !result.is_empty() {
134                    break;
135                }
136
137                return None;
138            }
139
140            // Read the of minimum between the desired
141            // buffer size or remaining length of the reader
142            let size = min(self.buf_size, self.reader_pos);
143
144            match self.read_to_buffer(size) {
145                Ok(buf) => {
146                    for (idx, ch) in buf.iter().enumerate().rev() {
147                        // Found a new line character to break on
148                        if *ch == LF_BYTE {
149                            let mut offset = idx as u64;
150
151                            // Add an extra byte cause of CR character
152                            if idx > 1 && buf[idx - 1] == CR_BYTE {
153                                offset -= 1;
154                            }
155
156                            match self.reader.seek(SeekFrom::Current(offset as i64)) {
157                                Ok(_) => {
158                                    self.reader_pos += offset;
159                                    break 'outer;
160                                }
161
162                                Err(e) => {
163                                    self.is_error = true;
164                                    return Some(Err(e));
165                                }
166                            }
167                        } else {
168                            result.push(*ch);
169                        }
170                    }
171                }
172
173                Err(e) => {
174                    self.is_error = true;
175                    return Some(Err(e));
176                }
177            }
178        }
179
180        // Reverse the results since they were written backwards
181        result.reverse();
182
183        // Convert to a String
184        Some(String::from_utf8(result).map_err(|e| Error::new(ErrorKind::InvalidData, e)))
185    }
186}
187
188impl<R: Read + Seek> FusedIterator for ReverseLines<R> {}
189
190#[cfg(test)]
191mod tests {
192    use std::fs::File;
193
194    use super::*;
195
196    #[test]
197    fn it_handles_empty_files() {
198        let file = File::open("tests/empty_file").unwrap();
199        let mut rev_lines = ReverseLines::new(file).unwrap();
200
201        assert_matches!(rev_lines.next(), None);
202    }
203
204    #[test]
205    fn it_handles_file_with_one_line() {
206        let file = File::open("tests/one_line_file").unwrap();
207        let mut rev_lines = ReverseLines::new(file).unwrap();
208
209        assert_eq!(rev_lines.next().unwrap().unwrap(), "ABCD".to_string());
210        assert_matches!(rev_lines.next(), None);
211    }
212
213    #[test]
214    fn it_handles_file_with_multi_lines() {
215        let file = File::open("tests/multi_line_file").unwrap();
216        let mut rev_lines = ReverseLines::new(file).unwrap();
217
218        assert_eq!(rev_lines.next().unwrap().unwrap(), "UVWXYZ".to_string());
219        assert_eq!(rev_lines.next().unwrap().unwrap(), "LMNOPQRST".to_string());
220        assert_eq!(rev_lines.next().unwrap().unwrap(), "GHIJK".to_string());
221        assert_eq!(rev_lines.next().unwrap().unwrap(), "ABCDEF".to_string());
222        assert_matches!(rev_lines.next(), None);
223    }
224
225    #[test]
226    fn it_handles_file_with_blank_lines() {
227        let file = File::open("tests/blank_line_file").unwrap();
228        let mut rev_lines = ReverseLines::new(file).unwrap();
229
230        assert_eq!(rev_lines.next().unwrap().unwrap(), "".to_string());
231        assert_eq!(rev_lines.next().unwrap().unwrap(), "".to_string());
232        assert_eq!(rev_lines.next().unwrap().unwrap(), "XYZ".to_string());
233        assert_eq!(rev_lines.next().unwrap().unwrap(), "".to_string());
234        assert_eq!(rev_lines.next().unwrap().unwrap(), "ABCD".to_string());
235        assert_matches!(rev_lines.next(), None);
236    }
237
238    #[test]
239    fn it_handles_file_with_multi_lines_and_with_capacity() {
240        let file = File::open("tests/multi_line_file").unwrap();
241        let mut rev_lines = ReverseLines::with_capacity(5, file).unwrap();
242
243        assert_eq!(rev_lines.next().unwrap().unwrap(), "UVWXYZ".to_string());
244        assert_eq!(rev_lines.next().unwrap().unwrap(), "LMNOPQRST".to_string());
245        assert_eq!(rev_lines.next().unwrap().unwrap(), "GHIJK".to_string());
246        assert_eq!(rev_lines.next().unwrap().unwrap(), "ABCDEF".to_string());
247        assert_matches!(rev_lines.next(), None);
248    }
249
250    #[test]
251    fn it_errors_on_invalid_utf8() {
252        let file = File::open("tests/invalid_utf8").unwrap();
253        let mut rev_lines = ReverseLines::with_capacity(5, file).unwrap();
254
255        assert_eq!(rev_lines.next().unwrap().unwrap(), "Valid UTF8".to_string());
256        assert_matches!(rev_lines.next().unwrap(), Err(_));
257        assert_matches!(rev_lines.next(), None);
258    }
259}