exact_reader/
multifile.rs

1use std::io::{Read, Seek};
2
3use crate::utils::calculate_seek;
4
5/// The `File` struct represents an individual file within the multi-file context.
6pub struct File<R> {
7    /// The inner reader for the file.
8    pub file: R,
9    /// The size of the file in bytes.
10    pub size: usize,
11    /// The name of the file.
12    pub filename: String,
13}
14
15impl<R: Read> Read for File<R> {
16    #[inline]
17    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
18        self.file.read(buf)
19    }
20}
21
22impl<R: Seek> Seek for File<R> {
23    #[inline]
24    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
25        self.file.seek(pos)
26    }
27}
28
29/// The `MultiFile` struct combines multiple files into a unified stream,
30/// allowing sequential reading as if all files are concatenated.
31pub struct MultiFile<R> {
32    /// The list of files
33    files: Vec<File<R>>,
34
35    /// The cumulative offset to the current file within the multi-file context.
36    /// (without the in-file offset)
37    cumul_offset: usize,
38    /// The offset within the current file.
39    infile_offset: usize,
40
41    /// The total size of the combined multi-file stream.
42    total_len: usize,
43    /// The index of the current file being read from.
44    current_file_idx: usize,
45}
46
47impl<R> MultiFile<R> {
48    /// Creates a new `MultiFile` instance with the provided list of files.
49    pub fn new(files: Vec<File<R>>) -> Self {
50        let total_len = files.iter().map(|f| f.size).sum();
51        Self {
52            current_file_idx: 0,
53            infile_offset: 0,
54            cumul_offset: 0,
55            files,
56            total_len,
57        }
58    }
59
60    /// Converts the given position within the combined multi-file stream
61    /// to the index of the corresponding file within the `files`.
62    #[inline]
63    fn needle_to_file(&self, needle: usize) -> Option<usize> {
64        if needle > self.total_len {
65            return None;
66        }
67
68        if self.cumul_offset == needle {
69            return Some(self.current_file_idx);
70        }
71
72        let mut res = if self.cumul_offset > needle {
73            0
74        } else {
75            self.cumul_offset
76        };
77
78        for (idx, file) in self.files.iter().enumerate().take(self.current_file_idx) {
79            if res + file.size >= needle {
80                return Some(idx);
81            }
82            res += file.size;
83        }
84
85        unreachable!()
86    }
87
88    /// Calculates the physical offset within the combined multi-file stream.
89    #[inline]
90    fn physical_offset(&self) -> usize {
91        self.cumul_offset + self.infile_offset
92    }
93
94    /// The total size of the multi-file stream in bytes.
95    pub fn size(&self) -> usize {
96        self.total_len
97    }
98}
99
100impl<R: Read> Read for MultiFile<R> {
101    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
102        let tail_idx;
103        let mut infile = 0;
104
105        let expected = buf.len();
106        let mut taken = 0;
107
108        'find: {
109            for (idx, file) in self.files[self.current_file_idx..].iter_mut().enumerate() {
110                infile = file.read(&mut buf[taken..])?;
111                taken += infile;
112                if taken == expected {
113                    tail_idx = self.current_file_idx + idx;
114                    break 'find;
115                }
116            }
117            tail_idx = self.files.len() - 1;
118        }
119        let _cumul_offset: usize = self.files[self.current_file_idx..tail_idx]
120            .iter()
121            .map(|f| f.size)
122            .sum();
123
124        self.cumul_offset += _cumul_offset;
125        self.current_file_idx = tail_idx;
126        self.infile_offset = infile;
127
128        Ok(taken)
129    }
130}
131
132impl<R: Read + Seek> Seek for MultiFile<R> {
133    fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
134        let calculated_seek = calculate_seek(self.total_len, self.physical_offset(), pos)? as usize;
135        let calculated_idx = self.needle_to_file(calculated_seek).unwrap();
136
137        let new_cum = self.files[..calculated_idx]
138            .iter()
139            .map(|f| f.size)
140            .sum::<usize>();
141
142        let seek_to = calculated_seek - new_cum;
143
144        match calculated_idx.cmp(&self.current_file_idx) {
145            std::cmp::Ordering::Greater => {
146                for file in self.files[..calculated_idx].iter_mut() {
147                    let _ = file.seek(std::io::SeekFrom::End(0))?;
148                }
149            }
150            std::cmp::Ordering::Less => {
151                for file in self.files[calculated_idx + 1..=self.current_file_idx].iter_mut() {
152                    let _ = file.seek(std::io::SeekFrom::Start(0))?;
153                }
154            }
155            std::cmp::Ordering::Equal => {}
156        }
157
158        let res =
159            self.files[calculated_idx].seek(std::io::SeekFrom::Start(seek_to as u64))? as usize;
160
161        self.current_file_idx = calculated_idx;
162        self.cumul_offset = new_cum;
163        self.infile_offset = res;
164
165        Ok((new_cum + res) as u64)
166    }
167
168    fn stream_position(&mut self) -> std::io::Result<u64> {
169        Ok(self.physical_offset() as u64)
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176    use std::io::Cursor;
177
178    impl From<Cursor<Vec<u8>>> for File<Cursor<Vec<u8>>> {
179        fn from(value: Cursor<Vec<u8>>) -> Self {
180            let len = value.get_ref().len();
181            Self {
182                file: value,
183                size: len,
184                filename: "cursor".to_string(),
185            }
186        }
187    }
188
189    fn new_file() -> MultiFile<Cursor<Vec<u8>>> {
190        let a = Cursor::new(vec![1u8, 2, 3]);
191        let b = Cursor::new(vec![4u8, 5, 6]);
192
193        MultiFile::new(vec![a.into(), b.into()])
194    }
195    #[test]
196    fn test_read1() {
197        let a = Cursor::new(vec![1u8, 2, 3]);
198        let mut file = MultiFile::new(vec![a.into()]);
199
200        {
201            let mut buf = [0u8; 2];
202            let _ = file.seek(std::io::SeekFrom::End(-2));
203
204            let _ = file.read(&mut buf).unwrap();
205            assert_eq!(buf, [2, 3])
206        }
207    }
208    #[test]
209    fn test_read2() {
210        let mut file = new_file();
211
212        {
213            let mut buf = [0u8; 3];
214            file.read(&mut buf).unwrap();
215            assert_eq!(buf, [1, 2, 3])
216        }
217
218        {
219            let mut buf = [0u8; 1];
220            file.read(&mut buf).unwrap();
221            assert_eq!(buf, [4])
222        }
223
224        {
225            let mut buf = [0u8; 5];
226            file.read(&mut buf).unwrap();
227            assert_eq!(buf, [5, 6, 0, 0, 0])
228        }
229    }
230
231    #[test]
232    fn test_seek() {
233        let mut file = new_file();
234
235        {
236            let mut buf = [0u8; 1];
237
238            let _ = file.seek(std::io::SeekFrom::Start(3));
239
240            let _ = file.read(&mut buf).unwrap();
241            assert_eq!(buf, [4])
242        }
243
244        {
245            let mut buf = [0u8; 1];
246
247            let _ = file.seek(std::io::SeekFrom::Current(0));
248
249            let _ = file.read(&mut buf).unwrap();
250            assert_eq!(buf, [5])
251        }
252        {
253            let mut buf = [0u8; 2];
254
255            let _ = file.seek(std::io::SeekFrom::Current(-1));
256
257            let _ = file.read(&mut buf).unwrap();
258            assert_eq!(buf, [4, 5])
259        }
260
261        {
262            let mut buf = [0u8; 5];
263
264            let _ = file.seek(std::io::SeekFrom::Start(0));
265
266            let _ = file.read(&mut buf).unwrap();
267            assert_eq!(buf, [1, 2, 3, 4, 5])
268        }
269    }
270
271    #[test]
272    fn test_seek2() {
273        let mut file = new_file();
274
275        {
276            let mut buf = [0u8; 1];
277
278            let _ = file.seek(std::io::SeekFrom::Start(0));
279
280            let _ = file.read(&mut buf).unwrap();
281            assert_eq!(buf, [1])
282        }
283
284        {
285            let mut buf = [0u8; 2];
286
287            let _ = file.seek(std::io::SeekFrom::Start(0));
288
289            let _ = file.read(&mut buf).unwrap();
290            assert_eq!(buf, [1, 2])
291        }
292    }
293}