drill_press/
lib.rs

1use std::io::{Read, Seek};
2use std::ops::Range;
3use std::slice::Iter;
4use thiserror::Error;
5
6cfg_if::cfg_if! {
7    if #[cfg(any(target_os = "linux",
8                 target_os = "android",
9                 target_os = "freebsd",
10                 target_os = "macos",
11    ))]{
12        mod unix;
13    } else if #[cfg(windows)] {
14        mod windows;
15    } else {
16        mod default;
17    }
18}
19
20#[cfg(test)]
21mod test_utils;
22
23#[derive(Error, Debug)]
24/// Errors returned by [`scan_chunks`](SparseFile::scan_chunks)
25pub enum ScanError {
26    #[error("IO Error occurred")]
27    IO(#[from] std::io::Error),
28    #[error("An unknown error occurred interacting with the C API")]
29    Raw(i32),
30    #[error("The operation you are trying to perform is not supported on this platform")]
31    UnsupportedPlatform,
32    #[error("The filesystem does not support operating on sparse files")]
33    UnsupportedFileSystem,
34}
35
36/// Flag for determining if a segment is a hole, or if it contains data
37#[derive(Debug, Copy, Clone, PartialEq, Eq)]
38pub enum SegmentType {
39    Hole,
40    Data,
41}
42
43impl SegmentType {
44    pub fn opposite(&self) -> Self {
45        match self {
46            SegmentType::Hole => SegmentType::Data,
47            SegmentType::Data => SegmentType::Hole,
48        }
49    }
50}
51
52/// Describes the location of a chunk in the file, as well as indicating if it
53/// contains data or is a hole
54#[derive(Debug, Clone, PartialEq, Eq)]
55pub struct Segment {
56    /// Marks this segment as either containing a hole, or containing data
57    pub segment_type: SegmentType,
58    /// the (half-open) range of bytes in the file covered by this segment
59    pub range: Range<u64>,
60}
61
62/// An iterator over the ranges of a file of a specific [`SegmentType`]
63#[derive(Debug, Clone)]
64pub struct SegmentIter<'a> {
65    segment_type: SegmentType,
66    iter: Iter<'a, Segment>,
67}
68
69impl<'a> Iterator for SegmentIter<'a> {
70    type Item = &'a Range<u64>;
71    fn next(&mut self) -> Option<<Self as Iterator>::Item> {
72        for segment in self.iter.by_ref() {
73            if segment.segment_type == self.segment_type {
74                return Some(&segment.range);
75            }
76        }
77        None
78    }
79}
80
81/// An extention trait to filter segments by Hole or Data segments
82pub trait Segments {
83    fn data(&self) -> SegmentIter;
84    fn holes(&self) -> SegmentIter;
85}
86
87impl Segments for Vec<Segment> {
88    fn data(&self) -> SegmentIter {
89        SegmentIter {
90            segment_type: SegmentType::Data,
91            iter: self.iter(),
92        }
93    }
94    fn holes(&self) -> SegmentIter {
95        SegmentIter {
96            segment_type: SegmentType::Hole,
97            iter: self.iter(),
98        }
99    }
100}
101
102#[allow(clippy::len_without_is_empty)] // Segments should never be zero length
103impl Segment {
104    /// Returns true if the provided offset is within the range of bytes this
105    /// segment specifies
106    pub fn contains(&self, offset: &u64) -> bool {
107        self.range.contains(offset)
108    }
109
110    /// Returns true if this segment is a Hole
111    pub fn is_hole(&self) -> bool {
112        self.segment_type == SegmentType::Hole
113    }
114
115    /// Returns true if this segment contains data
116    pub fn is_data(&self) -> bool {
117        self.segment_type == SegmentType::Data
118    }
119
120    /// The starting position of this segment
121    pub fn start(&self) -> u64 {
122        self.range.start
123    }
124
125    /// The number of bytes in this segment
126    pub fn len(&self) -> u64 {
127        self.range.start - self.range.end
128    }
129}
130
131/// An extention trait for [`File`](std::fs::File) for sparse files
132pub trait SparseFile: Read + Seek {
133    /// Scans the file to find its logical chunks
134    ///
135    /// Will return a list of segments, ordered by their start position.
136    ///
137    /// The ranges generated are guaranteed to cover all bytes in the file.
138    ///
139    /// `Hole` segments are guaranteed to represent a part of a file that does
140    /// not contain any non-zero data, however, `Data` segments may represent
141    /// parts of a file that contain what, logically, should be sparse segments.
142    /// This is up to the mercy of your operating system and file system, please
143    /// consult their documentation for how they handle sparse files for more
144    /// details.
145    ///
146    /// Does not make any guarantee about maintaining the Seek position of the
147    /// file, always seek back to a known point after calling this method.
148    ///
149    /// # Errors
150    ///
151    /// Will return `Err(ScanError::UnsupportedPlatform)` if support is not
152    /// implemented for filesystem level hole finding on your system
153    ///
154    /// Will return `Err(ScanError::UnsupportedFileSystem)` if support is
155    /// implemented for your operating system, but the filesystem does not
156    /// support sparse files
157    ///
158    /// Will also return `Err` if any other I/O error occurs
159    fn scan_chunks(&mut self) -> Result<Vec<Segment>, ScanError>;
160
161    /// Unallocate a section of the file, freeing the disk space and making
162    /// future reads return zeros
163    fn drill_hole(&self, start: u64, end: u64) -> Result<(), ScanError>;
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use crate::test_utils::*;
170    use quickcheck_macros::quickcheck;
171    use std::fs::File;
172
173    fn test_chunks_match(file: &mut File, input_segments: &[Segment]) -> bool {
174        // Get both sets of segments
175        let output_segments = file.scan_chunks().expect("Unable to scan chunks");
176
177        if *input_segments != output_segments {
178            println!("Expected: \n {:?} \n", input_segments);
179            println!("Got: \n {:?} \n", output_segments);
180        }
181        *input_segments == output_segments
182    }
183
184    // Creates a file based on desc, then tests that the resulting output of
185    // file.scan_chunks() matches the description used to create the file
186    fn test_round_trips(desc: SparseDescription) -> bool {
187        let mut file = desc.to_file();
188        // Get both sets of segments
189        let input_segments = desc.segments();
190        test_chunks_match(file.as_file_mut(), &input_segments)
191    }
192
193    #[quickcheck]
194    fn round_trips(desc: SparseDescription) -> bool {
195        test_round_trips(desc)
196    }
197
198    #[quickcheck]
199    fn drill_hole(desc: SparseDescription, drop: u8) -> bool {
200        let mut file = desc.to_file();
201        // Get both sets of segments
202        let mut input_segments = desc.segments();
203
204        if input_segments.is_empty() {
205            return true;
206        }
207
208        #[cfg(target_os = "macos")]
209        for hole in input_segments.holes() {
210            file.as_file_mut()
211                .drill_hole(hole.start, hole.end)
212                .expect("pre drill holes");
213        }
214
215        test_chunks_match(file.as_file_mut(), &input_segments);
216
217        // pick a segment to make a hole
218        let drop_idx = drop as usize % input_segments.len();
219        let drop = &mut input_segments[drop_idx];
220
221        file.as_file_mut()
222            .drill_hole(drop.range.start, drop.range.end)
223            .expect("drilled hole");
224
225        drop.segment_type = SegmentType::Hole;
226
227        combine_segments(&mut input_segments);
228
229        test_chunks_match(file.as_file_mut(), &input_segments)
230    }
231    #[quickcheck]
232    fn one_big_segment(segment_type: SegmentType) -> bool {
233        let desc = SparseDescription::one_segment(segment_type, 3545868);
234
235        test_round_trips(desc)
236    }
237
238    fn combine_segments(segments: &mut Vec<Segment>) {
239        let mut prev = 0;
240        for i in 1..segments.len() {
241            if segments[prev].segment_type == segments[i].segment_type {
242                segments[prev].range.end = segments[i].range.end;
243            } else {
244                prev += 1;
245                segments[prev] = segments[i].clone();
246            }
247        }
248        segments.truncate(prev + 1)
249    }
250}