linux_memutils/
reader.rs

1// SPDX-FileCopyrightText: Benedikt Vollmerhaus <benedikt@vollmerhaus.org>
2// SPDX-License-Identifier: MIT
3/*!
4Semi-efficient reading of physical memory from [`/dev/mem`].
5
6[`/dev/mem`]: https://man7.org/linux/man-pages/man4/mem.4.html
7*/
8use std::cmp::min;
9use std::io::{self, BufRead, ErrorKind, Read, Seek, SeekFrom};
10
11/// The default initial size of the internal buffer in bytes.
12const DEFAULT_BUFFER_SIZE: usize = 64 * 1024;
13
14/// A buffering reader that transparently skips inaccessible parts of a file.
15///
16/// It functions similarly to [`io::BufReader`] in that it fills an internal
17/// buffer using larger, infrequent reads but is further capable of handling
18/// intermittent permission errors by repeatedly halving the buffer size and
19/// attempting another read. Eventually, it will fall back to skipping byte
20/// by byte towards the next readable section.
21///
22/// This will occur for many regions in `/dev/mem` with a brute-force search.
23/// Skipping long inaccessible sections is very slow, so that should only be
24/// a last resort when no memory map is available.
25#[allow(clippy::module_name_repetitions)]
26pub struct SkippingBufReader<F: Read + Seek> {
27    file: F,
28
29    /// A maximum offset in the `file` to read to; the reader will return 0
30    /// bytes (i.e. an EOF) once this is reached.
31    max_offset: Option<usize>,
32
33    buffer: Vec<u8>,
34    /// The current size of the internal `buffer`.
35    buffer_size: usize,
36    /// The initial size of the buffer to revert to whenever a read succeeds.
37    initial_buffer_size: usize,
38
39    /// The number of bytes at the start of `buffer` set during the last read.
40    ///
41    /// A read may return fewer bytes than the `buffer_size` (e.g. at the end
42    /// of a file) and thus not overwrite the entire buffer. Only those newly
43    /// initialized bytes may be used; everything after is stale data.
44    valid_bytes_in_buffer: usize,
45
46    /// The position in `buffer` to which bytes have already been "consumed".
47    ///
48    /// [`SkippingBufReader::read`] may be used with an output buffer smaller
49    /// than the number of new bytes available in `buffer`. This keeps track
50    /// of such partial reads and determines when new data needs to be read.
51    read_position_in_buffer: usize,
52}
53
54impl<F: Read + Seek> SkippingBufReader<F> {
55    pub fn new(file: F, start_offset: usize, max_offset: Option<usize>) -> Self {
56        Self::with_buffer_size(DEFAULT_BUFFER_SIZE, file, start_offset, max_offset)
57    }
58
59    /// Create a reader with an internal buffer of the given initial size.
60    ///
61    /// # Panics
62    ///
63    /// This function panics if it cannot seek to the given `start_offset`
64    /// within `file`.
65    pub fn with_buffer_size(
66        buffer_size: usize,
67        mut file: F,
68        start_offset: usize,
69        max_offset: Option<usize>,
70    ) -> Self {
71        file.seek(SeekFrom::Start(start_offset as u64))
72            .expect("failed to seek to given start offset");
73
74        Self {
75            file,
76            max_offset,
77
78            buffer: Vec::with_capacity(buffer_size),
79            buffer_size,
80            initial_buffer_size: buffer_size,
81
82            valid_bytes_in_buffer: 0,
83            read_position_in_buffer: 0,
84        }
85    }
86
87    fn refill_buffer(&mut self) -> io::Result<usize> {
88        loop {
89            // If a maximum offset to read to is specified and closer to the
90            // current position than the buffer size, reduce the buffer size
91            // to only those bytes left to read (down to 0 at the very end)
92            if let Some(max_offset) = self.max_offset {
93                let seek_position = usize::try_from(self.file.stream_position()?).unwrap();
94                let bytes_to_max_offset = max_offset - seek_position;
95                self.buffer_size = min(self.buffer_size, bytes_to_max_offset);
96            }
97
98            self.buffer.resize(self.buffer_size, 0);
99
100            match self.file.read(&mut self.buffer) {
101                Ok(0) => {
102                    log::debug!("Reached EOF or maximum offset.");
103                    return Ok(0);
104                }
105                Ok(bytes_read) => {
106                    self.buffer_size = self.initial_buffer_size;
107
108                    self.valid_bytes_in_buffer = bytes_read;
109                    self.read_position_in_buffer = 0;
110
111                    return Ok(bytes_read);
112                }
113                Err(e) if e.kind() == ErrorKind::PermissionDenied => {
114                    if self.buffer_size > 1 {
115                        self.buffer_size /= 2;
116                    } else {
117                        // Down to a 1-byte buffer; give up on this byte
118                        self.file.seek_relative(1)?;
119                    }
120                }
121                Err(e) => return Err(e),
122            }
123        }
124    }
125
126    /// Return the current read position in the file.
127    ///
128    /// # Note
129    ///
130    /// This is the position a subsequent call to `read()` will return bytes
131    /// from (or refill the buffer if empty). It is distinct from the actual
132    /// seek position in the file, which reflects the end of the latest read
133    /// performed to fill the buffer.
134    ///
135    /// # Panics
136    ///
137    /// This function panics if the current seek position in the underlying
138    /// reader cannot be obtained.
139    #[must_use]
140    pub fn position_in_file(&mut self) -> usize {
141        let seek_position = self
142            .file
143            .stream_position()
144            .expect("obtaining the current seek position should always succeed");
145        let unread_bytes_in_buffer = self.valid_bytes_in_buffer - self.read_position_in_buffer;
146
147        usize::try_from(seek_position).unwrap() - unread_bytes_in_buffer
148    }
149}
150
151impl<F: Read + Seek> BufRead for SkippingBufReader<F> {
152    fn fill_buf(&mut self) -> io::Result<&[u8]> {
153        if self.read_position_in_buffer >= self.valid_bytes_in_buffer {
154            log::trace!("No unread bytes in buffer; refilling it...");
155            let bytes_read = self.refill_buffer()?;
156            log::trace!("Refilled buffer with {bytes_read} bytes.");
157
158            if bytes_read == 0 {
159                return Ok(&[]);
160            }
161        }
162
163        let unread_bytes = &self.buffer[self.read_position_in_buffer..self.valid_bytes_in_buffer];
164        Ok(unread_bytes)
165    }
166
167    fn consume(&mut self, amt: usize) {
168        self.read_position_in_buffer += amt;
169    }
170}
171
172impl<F: Read + Seek> Read for SkippingBufReader<F> {
173    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
174        // Fetch all unread bytes from the internal buffer, refilling
175        // it if needed
176        let mut available_bytes = self.fill_buf()?;
177        // Pull as many available bytes as fit into the output buffer
178        let no_of_bytes_read = available_bytes.read(buf)?;
179        // Advance the cursor in the internal buffer by the number of
180        // bytes that were used
181        self.consume(no_of_bytes_read);
182
183        Ok(no_of_bytes_read)
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190    use std::io::Cursor;
191
192    #[test]
193    fn position_in_file_returns_expected_position() {
194        let file = Cursor::new("abcdefghijklmnopqrst");
195        let mut reader = SkippingBufReader::with_buffer_size(8, file, 0, None);
196
197        // This number of bytes to read falls into the last (partial) buffer
198        // fill, making it suitable for testing that the result is not based
199        // on the buffer's size but the actual number of _valid_ bytes in it
200        let mut data = [0; 18];
201        reader.read_exact(&mut data).unwrap();
202        assert_eq!(reader.position_in_file(), 18);
203    }
204
205    #[test]
206    fn stops_reading_at_max_offset_if_specified() {
207        let file = Cursor::new("abcdefghijklmnopqrst");
208        let mut reader = SkippingBufReader::new(file, 0, Some(10));
209
210        let mut data = Vec::new();
211        reader.read_to_end(&mut data).unwrap();
212        assert_eq!(data, b"abcdefghij");
213    }
214
215    struct CursorWithError {
216        inner: Cursor<Vec<u8>>,
217        bad_byte_position: usize,
218        error_kind: ErrorKind,
219    }
220
221    impl Read for CursorWithError {
222        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
223            let current_position = usize::try_from(self.inner.position()).unwrap();
224            let read_end_position = current_position + buf.len();
225
226            if (current_position..read_end_position).contains(&self.bad_byte_position) {
227                return Err(io::Error::from(self.error_kind));
228            }
229
230            self.inner.read(buf)
231        }
232    }
233
234    impl Seek for CursorWithError {
235        fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
236            self.inner.seek(pos)
237        }
238    }
239
240    #[test]
241    fn skips_byte_for_which_read_returns_a_permission_error() {
242        let file = CursorWithError {
243            inner: Cursor::new("abcdefghijklmnopqrst".as_bytes().to_vec()),
244            bad_byte_position: 4,
245            error_kind: ErrorKind::PermissionDenied,
246        };
247        let mut reader = SkippingBufReader::new(file, 0, None);
248
249        let mut data = Vec::new();
250        reader.read_to_end(&mut data).unwrap();
251        assert_eq!(data, b"abcdfghijklmnopqrst");
252    }
253
254    #[test]
255    fn does_not_ignore_error_kinds_other_than_permission_denied() {
256        let file = CursorWithError {
257            inner: Cursor::new("abcdefghijklmnopqrst".as_bytes().to_vec()),
258            bad_byte_position: 4,
259            error_kind: ErrorKind::ResourceBusy,
260        };
261        let mut reader = SkippingBufReader::new(file, 0, None);
262
263        let mut data = Vec::new();
264        let result = reader.read_to_end(&mut data);
265        assert_eq!(result.map_err(|e| e.kind()), Err(ErrorKind::ResourceBusy));
266    }
267}