io_partition/
lib.rs

1//! This rust crate allow to take a part of an object that implement ``Read`` + ``Seek`` (typically a file), by specifying it's offset and lenght. It can also build similar item with an Arc<Mutex<File>>, ensuring coherency of the pointer in the file, allowing to access the same file concurrently (althougth it isn't optimized for speed, as it have to unlock the Mutex and seek to the good position).
2//!
3//! # Examples
4//! ```
5//! use std::io::{Cursor, Read};
6//! use io_partition::Partition;
7//! let file = Cursor::new(&[0, 2, 4, 6, 8, 10, 12]);
8//!
9//! let mut sub_file = Partition::new(file, 2, 3).unwrap();
10//! let mut buffer = [0, 0, 0, 0, 0];
11//! assert_eq!(sub_file.read(&mut buffer).unwrap(), 3);
12//! assert_eq!(buffer, [4, 6, 8, 0, 0]);
13//! ```
14//TODO: impl stream_len when seek_convenience is stabilized
15
16use io::ErrorKind;
17use std::io::{Cursor, Read, Seek, SeekFrom, Write};
18use std::sync::{Arc, Mutex};
19use std::{io, sync::MutexGuard};
20use thiserror::Error;
21
22const ERROR_MESSAGE_SEEK_PRE_START: &str = "can't seek before the beggining of the partition";
23const ERROR_MESSAGE_OVERFLOW_POSITION_UNSIGNED: &str = "position cant be more than 2^64.";
24const ERROR_MESSAGE_OVERFLOW_POSITION_SIGNED: &str = "position cant be more than 2^63.";
25const ERROR_MESSAGE_START_LENGHT_OVERFLOW: &str = "the sum of the input start + lenght is superior to the maximum representatble value in a 64 bit number.";
26
27fn partition_read<T: Read + Seek>(
28    buf: &mut [u8],
29    file: &mut T,
30    _start: u64,
31    end: u64,
32    mut pointer: u64,
33    seek_is_correct: bool,
34) -> (u64, io::Result<usize>) {
35    if !seek_is_correct {
36        match file.seek(SeekFrom::Start(pointer)) {
37            Ok(_) => (),
38            Err(err) => return (pointer, Err(err)),
39        }
40    }
41    let end_byte_absolute = match pointer.checked_add(buf.len() as u64) {
42        Some(value) => value,
43        None => {
44            return (
45                pointer,
46                Err(io::Error::new(
47                    io::ErrorKind::InvalidInput,
48                    ERROR_MESSAGE_OVERFLOW_POSITION_UNSIGNED,
49                )),
50            )
51        }
52    };
53    if end_byte_absolute >= end {
54        if end < pointer {
55            return (pointer, Ok(0));
56        };
57        let loop_total_nb = end - pointer;
58        let mut buffer1 = [0];
59
60        for loop_nb in 0..loop_total_nb {
61            match file.read_exact(&mut buffer1) {
62                Ok(()) => (),
63                Err(err) => {
64                    let _ = file.seek(SeekFrom::Start(pointer));
65                    return (pointer, Err(err));
66                }
67            }
68            pointer += 1;
69            buf[loop_nb as usize] = buffer1[0];
70        }
71        (pointer, Ok(loop_total_nb as usize))
72    } else {
73        match file.read(buf) {
74            Ok(value) => (pointer + value as u64, Ok(value)),
75            Err(err) => (pointer, Err(err)),
76        }
77    }
78}
79
80fn partition_seek<T: Read + Seek>(
81    file: &mut T,
82    start: u64,
83    end: u64,
84    pointer: u64,
85    target: SeekFrom,
86) -> (u64, io::Result<u64>) {
87    let new_real_pos: u64 = match target {
88        SeekFrom::Start(nb) => match start.checked_add(nb) {
89            Some(position) => position,
90            None => {
91                return (
92                    pointer,
93                    Err(io::Error::new(
94                        io::ErrorKind::InvalidInput,
95                        ERROR_MESSAGE_OVERFLOW_POSITION_UNSIGNED,
96                    )),
97                )
98            }
99        },
100        SeekFrom::End(nb) => {
101            let result_i64 = match (end as i64).checked_add(nb) {
102                Some(position) => position,
103                None => {
104                    return (
105                        pointer,
106                        Err(io::Error::new(
107                            io::ErrorKind::InvalidInput,
108                            ERROR_MESSAGE_OVERFLOW_POSITION_SIGNED,
109                        )),
110                    )
111                }
112            };
113            if result_i64 < start as i64 {
114                return (
115                    pointer,
116                    Err(io::Error::new(
117                        io::ErrorKind::InvalidInput,
118                        ERROR_MESSAGE_SEEK_PRE_START,
119                    )),
120                );
121            };
122            result_i64 as u64
123        }
124        SeekFrom::Current(nb) => {
125            let result_i64 = match (pointer as i64).checked_add(nb) {
126                Some(position) => position,
127                None => {
128                    return (
129                        pointer,
130                        Err(io::Error::new(
131                            io::ErrorKind::InvalidInput,
132                            ERROR_MESSAGE_OVERFLOW_POSITION_SIGNED,
133                        )),
134                    )
135                }
136            };
137            if result_i64 < start as i64 {
138                return (
139                    pointer,
140                    Err(io::Error::new(
141                        io::ErrorKind::InvalidInput,
142                        ERROR_MESSAGE_SEEK_PRE_START,
143                    )),
144                );
145            };
146            result_i64 as u64
147        }
148    };
149    if new_real_pos < start {
150        return (
151            pointer,
152            Err(io::Error::new(
153                io::ErrorKind::InvalidInput,
154                ERROR_MESSAGE_SEEK_PRE_START,
155            )),
156        );
157    };
158    // do not block seeking post-partition, as it will be caught by read
159    match file.seek(SeekFrom::Start(new_real_pos as u64)) {
160        Ok(_) => (),
161        Err(err) => return (pointer, Err(err)),
162    };
163    (new_real_pos, Ok(new_real_pos - start))
164}
165
166fn check_seek_end_valid<T: Read + Seek>(reader: &mut T, start: u64, end: u64) -> io::Result<()> {
167    let real_end = reader.seek(SeekFrom::End(0))?;
168    if real_end < end {
169        return Err(io::Error::new(
170            ErrorKind::Other,
171            "the end of the file is after the real file end (determined via SeekFrom::end(0))",
172        ));
173    };
174    reader.seek(SeekFrom::Start(start))?;
175    Ok(())
176}
177
178/// A ``Partition`` allow you to refer to a part of the file. It consume the input file.
179///
180/// The input offset is the first byte that will be accessible. The user of the ``Partition`` won't be able to seek before it, and it will be considered the offset 0 of the ``Partition``
181/// The input lenght is the number of byte that can be read with this ``Partition``. The last readable byte from the input file is input_offset + input_len
182///
183/// # Examples
184/// ```
185/// use std::io::{Cursor, Read, Seek, SeekFrom};
186/// use io_partition::Partition;
187///
188/// let some_value = (0..30).collect::<Vec<u8>>();
189/// let input_file = Cursor::new(&some_value); //0, 1, 2, 3 ... 99
190///
191/// let mut partition = Partition::new(input_file, 10, 20).unwrap();
192///
193/// let mut buffer = [0];
194/// partition.read_exact(&mut buffer).unwrap();
195/// assert_eq!(buffer, [10]);
196/// partition.read_exact(&mut buffer).unwrap();
197/// assert_eq!(buffer, [11]);
198///
199/// assert!(partition.seek(SeekFrom::Current(-10)).is_err());
200/// partition.seek(SeekFrom::End(-1)).unwrap();
201/// partition.read_exact(&mut buffer).unwrap();
202/// assert_eq!(buffer, [29]);
203///
204/// partition.seek(SeekFrom::End(-3));
205/// let mut buffer_large = [0; 6];
206/// assert_eq!(partition.read(&mut buffer_large).unwrap(), 3);
207/// assert_eq!(buffer_large, [27, 28, 29, 0, 0, 0]);
208/// ```
209#[derive(Debug)]
210pub struct Partition<T: Read + Seek> {
211    file: T,
212    /// The offset of the first byte that should be included
213    start: u64,
214    pointer: u64,
215    /// The offset of the first byte that should be NOT included
216    end: u64,
217}
218
219impl<T: Read + Seek> Partition<T> {
220    /// Create new ``Partition``, with the specified input file, start and lenght
221    ///
222    /// This will check that the file is big enought at creation, and the cursor will be located at the beggining of the file.
223    pub fn new(mut file: T, start: u64, lenght: u64) -> io::Result<Partition<T>> {
224        let end = start.checked_add(lenght).ok_or_else(|| {
225            io::Error::new(
226                io::ErrorKind::InvalidInput,
227                ERROR_MESSAGE_START_LENGHT_OVERFLOW,
228            )
229        })?;
230        check_seek_end_valid(&mut file, start, end)?;
231        let result = Partition {
232            file,
233            start,
234            pointer: start,
235            end,
236        };
237        Ok(result)
238    }
239}
240
241impl<T: Read + Seek> Read for Partition<T> {
242    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
243        let (new_pointer_pos, result) = partition_read(
244            buf,
245            &mut self.file,
246            self.start,
247            self.end,
248            self.pointer,
249            true,
250        );
251        self.pointer = new_pointer_pos;
252        result
253    }
254}
255
256impl<T: Seek + Read> Seek for Partition<T> {
257    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
258        let (new_pointer_pos, result) =
259            partition_seek(&mut self.file, self.start, self.end, self.pointer, pos);
260        self.pointer = new_pointer_pos;
261        result
262    }
263}
264
265impl<T: Read + Seek> Write for Partition<T> {
266    /// Do not use this write function. It will always fail. It is just here because some library require this to have the ``Write`` trait to make this work with this (rust_vfs)
267    fn write(&mut self, _: &[u8]) -> io::Result<usize> {
268        Err(io::Error::from(io::ErrorKind::PermissionDenied))
269    }
270
271    /// Always suceed. It is useless to call it
272    fn flush(&mut self) -> io::Result<()> {
273        Ok(())
274    }
275}
276
277#[derive(Debug, Error)]
278/// An error that may occur by calling [`PartitionMutex::lock`]
279pub enum LockPartitionError {
280    #[error("an io error occured")]
281    IOError(#[from] io::Error),
282    #[error("A thread panicked while holding this lock")]
283    PoisonError,
284}
285
286/// A ``PartitionMutex`` allow you to refer to a part of the file. It consume the input file.
287///
288/// As the input file is an ``Arc<Mutex<_>>``, multiple ``PartitionMutex`` can be created by file, and ``PartitionMutex`` can be cloned.
289///
290/// Note that all function (but the write one) will lock the Mutex and release it before the function end (except for ``lock``). On calling function of this, you must ensure the input file isn't already locked by this thread.
291///
292/// The input offset is the first byte that will be accessible. The user of the ``PartitionMutex`` won't be able to seek before it, and it will be considered the offset 0 of the ``PartitionMutex``
293/// The input lenght is the number of byte that can be read with this ``PartitionMutex``. The last readable byte from the input file is input_offset + input_len
294///
295/// It is possible to lock the mutex with [`PartitionMutex::lock`]. You need to take care when using it, or a panic may occur. Please read the documentation of the function.
296///
297/// # Examples
298/// ```
299/// use std::io::{Cursor, Read, Seek, SeekFrom};
300/// use io_partition::PartitionMutex;
301/// use std::sync::{Mutex, Arc};
302/// use std::thread;
303///
304/// let mut some_value = (0..100).collect::<Vec<u8>>();
305/// let some_file = Arc::new(Mutex::new(Cursor::new(some_value)));
306///
307/// let mut first_partition = PartitionMutex::new(some_file.clone(), 10, 20).unwrap();
308/// let mut second_partition = PartitionMutex::new(some_file.clone(), 40, 30).unwrap();
309///
310/// let mut buf = [0];
311///
312/// first_partition.seek(SeekFrom::Start(10)).unwrap();
313/// second_partition.seek(SeekFrom::Start(5)).unwrap();
314/// first_partition.read_exact(&mut buf).unwrap();
315/// assert_eq!(buf, [20]);
316/// second_partition.read_exact(&mut buf).unwrap();
317/// assert_eq!(buf, [45]);
318///
319/// second_partition.seek(SeekFrom::Start(5)).unwrap();
320/// let mut second_clone = second_partition.clone();
321/// let handle = thread::spawn(move || {
322///     second_clone.seek(SeekFrom::Current(2)).unwrap();
323///     let mut buf = [0];
324///     second_clone.read_exact(&mut buf).unwrap();
325///     buf[0]
326/// });
327///
328/// second_partition.seek(SeekFrom::End(-1)).unwrap();
329/// second_partition.read_exact(&mut buf).unwrap();
330///
331/// assert_eq!(handle.join().unwrap(), 47);
332/// assert_eq!(buf[0], 69);
333///
334/// first_partition.seek(SeekFrom::Start(2)).unwrap();
335/// {
336///     let mut locked = first_partition.lock().unwrap();
337///     let mut buffer = [0; 2];
338///     locked.read_exact(&mut buffer);
339///     assert_eq!(&buffer, &[12, 13]);
340/// }
341/// ```
342#[derive(Debug, Clone)]
343pub struct PartitionMutex<T: Read + Seek> {
344    file: Arc<Mutex<T>>,
345    start: u64,
346    pointer: u64,
347    end: u64,
348}
349
350impl<T: Read + Seek> PartitionMutex<T> {
351    /// Create new ``PartitionMutex``, with the specified input file, start and lenght. This will lock the input Mutex.
352    ///
353    /// This will check that the file is big enought at creation, and the cursor will be located at the beggining of the file.
354    pub fn new(file: Arc<Mutex<T>>, start: u64, lenght: u64) -> io::Result<PartitionMutex<T>> {
355        let end = start.checked_add(lenght).ok_or_else(|| {
356            io::Error::new(
357                io::ErrorKind::InvalidInput,
358                ERROR_MESSAGE_START_LENGHT_OVERFLOW,
359            )
360        })?;
361        {
362            let mut file = match file.lock() {
363                Ok(value) => value,
364                Err(_) => {
365                    return Err(io::Error::new(
366                        io::ErrorKind::Other,
367                        "the file mutex is poisoned",
368                    ))
369                }
370            };
371            check_seek_end_valid(&mut *file, start, end)?;
372        }
373        let mut result = PartitionMutex {
374            file,
375            start,
376            pointer: start,
377            end,
378        };
379        result.seek(SeekFrom::Start(0))?;
380        Ok(result)
381    }
382
383    /// Lock this [`PartitionMutex`], in a similar fashion to [`Mutex::lock`].
384    /// If the same thread lock this [`PartitionMutex`], or any other structure that use the same file, without first checking it is free, it might panic or softlock.
385    /// Note that the seek/read implementation of [`PartitionMutex`] will lock the file for the duration of those function execution.
386    /// you can use scope for this.
387    pub fn lock(&mut self) -> Result<PartitionMutexLock<'_, T>, LockPartitionError> {
388        let mut file_locked = self
389            .file
390            .lock()
391            .map_err(|_| LockPartitionError::PoisonError)?;
392        file_locked.seek(SeekFrom::Start(self.pointer))?;
393        Ok(PartitionMutexLock {
394            file: file_locked,
395            pointer: &mut self.pointer,
396            start: &mut self.start,
397            end: &mut self.end,
398        })
399    }
400}
401
402impl<T: Read + Seek> Read for PartitionMutex<T> {
403    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
404        let mut file = match self.file.lock() {
405            Ok(value) => value,
406            Err(_) => {
407                return Err(io::Error::new(
408                    io::ErrorKind::Other,
409                    "the file mutex is poisoned",
410                ))
411            }
412        };
413        let (new_pointer_pos, result) =
414            partition_read(buf, &mut *file, self.start, self.end, self.pointer, false);
415        self.pointer = new_pointer_pos;
416        result
417    }
418}
419
420impl<T: Read + Seek> Seek for PartitionMutex<T> {
421    fn seek(&mut self, target: SeekFrom) -> io::Result<u64> {
422        let mut file = match self.file.lock() {
423            Ok(value) => value,
424            Err(_) => {
425                return Err(io::Error::new(
426                    io::ErrorKind::Other,
427                    "the file mutex is poisoned",
428                ))
429            }
430        };
431        let (new_pointer_pos, result) =
432            partition_seek(&mut *file, self.start, self.end, self.pointer, target);
433        self.pointer = new_pointer_pos;
434        result
435    }
436}
437
438impl<T: Read + Seek> Write for PartitionMutex<T> {
439    /// Do not use this write function. It will always fail. It is just here because some library require this to have the ``Write`` trait to make this work with this (rust_vfs)
440    fn write(&mut self, _: &[u8]) -> io::Result<usize> {
441        Err(io::Error::from(io::ErrorKind::PermissionDenied))
442    }
443
444    /// Always suceed. It is useless to call it
445    fn flush(&mut self) -> io::Result<()> {
446        Ok(())
447    }
448}
449
450/// A locked [`PartitionMutex`]. See the documentation of [`PartitionMutex::lock`] for usage precaution.
451pub struct PartitionMutexLock<'a, T: Read + Seek> {
452    file: MutexGuard<'a, T>, // NOTE: we assume the file is seeked at the right position
453    start: &'a mut u64,
454    pointer: &'a mut u64,
455    end: &'a mut u64,
456}
457
458impl<'a, T: Read + Seek> Read for PartitionMutexLock<'a, T> {
459    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
460        let (new_pointer_pos, result) = partition_read(
461            buf,
462            &mut *self.file,
463            *self.start,
464            *self.end,
465            *self.pointer,
466            true,
467        );
468        *self.pointer = new_pointer_pos;
469        result
470    }
471}
472
473impl<'a, T: Read + Seek> Seek for PartitionMutexLock<'a, T> {
474    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
475        let (new_pointer_pos, result) =
476            partition_seek(&mut *self.file, *self.start, *self.end, *self.pointer, pos);
477        *self.pointer = new_pointer_pos;
478        result
479    }
480}
481
482impl<'a, T: Read + Seek> Write for PartitionMutexLock<'a, T> {
483    /// Do not use this write function. It will always fail. It is just here because some library require this to have the ``Write`` trait to make this work with this (rust_vfs)
484    fn write(&mut self, _: &[u8]) -> io::Result<usize> {
485        Err(io::Error::from(io::ErrorKind::PermissionDenied))
486    }
487
488    /// Always suceed. It is useless to call it
489    fn flush(&mut self) -> io::Result<()> {
490        Ok(())
491    }
492}
493
494/// Clone a part of a file into a Vec
495pub fn clone_into_vec<T: Read + Seek>(
496    file: &mut T,
497    start: u64,
498    length: u64,
499) -> Result<Vec<u8>, io::Error> {
500    let mut buffer = [0];
501    file.seek(SeekFrom::Start(start))?;
502    let mut output_buffer = Vec::new();
503    for _ in 0..length {
504        file.read_exact(&mut buffer)?;
505        output_buffer.push(buffer[0]);
506    }
507    Ok(output_buffer)
508}
509
510/// Clone a part of a file
511pub fn partition_clone<T: Read + Seek>(
512    file: &mut T,
513    start: u64,
514    length: u64,
515) -> Result<Cursor<Vec<u8>>, io::Error> {
516    Ok(Cursor::new(clone_into_vec(file, start, length)?))
517}