drill_press/
lib.rs

1#![forbid(missing_docs)]
2#![warn(clippy::all)]
3#![deny(warnings)]
4#![deny(clippy::print_stdout)]
5#![cfg_attr(docsrs, feature(doc_cfg))]
6#![doc = include_str!("../README.md")]
7
8use std::io::{Read, Seek};
9use std::ops::Range;
10use std::slice::Iter;
11use thiserror::Error;
12
13cfg_if::cfg_if! {
14    if #[cfg(any(target_os = "linux",
15                 target_os = "android",
16                 target_os = "freebsd",
17                 target_os = "macos",
18    ))]{
19        mod unix;
20    } else if #[cfg(windows)] {
21        mod windows;
22    } else {
23        mod default;
24    }
25}
26
27#[cfg(test)]
28mod test_utils;
29
30#[derive(Error, Debug)]
31/// Errors returned by [`scan_chunks`](SparseFile::scan_chunks)
32pub enum ScanError {
33    /// the syscall for scanning allocated chunks of file failed with IO error
34    #[error("IO Error occurred")]
35    IO(#[from] std::io::Error),
36    /// This will be returned if you compile for a target that this crate does not support
37    #[error("The operation you are trying to perform is not supported on this platform")]
38    UnsupportedPlatform,
39    /// If the OS reports that the file system the file is on does not support sparse files
40    #[error("The filesystem does not support operating on sparse files")]
41    UnsupportedFileSystem,
42}
43
44/// Flag for determining if a segment is a hole, or if it contains data
45#[derive(Debug, Copy, Clone, PartialEq, Eq)]
46pub enum SegmentType {
47    /// A Hole segment is a sequence of zeros in a sparse file that does not take up space on disk
48    Hole,
49    /// A Data segment may or may not be zero but does take up space on the disk
50    Data,
51}
52
53impl SegmentType {
54    /// The opposite segement type
55    /// ```
56    /// # use drill_press::SegmentType;
57    /// let data = SegmentType::Data;
58    /// assert_eq!(data.opposite(), SegmentType::Hole);
59    /// ```
60    pub fn opposite(&self) -> Self {
61        match self {
62            SegmentType::Hole => SegmentType::Data,
63            SegmentType::Data => SegmentType::Hole,
64        }
65    }
66}
67
68/// Describes the location of a chunk in the file, as well as indicating if it
69/// contains data or is a hole
70#[derive(Debug, Clone, PartialEq, Eq)]
71pub struct Segment {
72    /// Marks this segment as either containing a hole, or containing data
73    pub segment_type: SegmentType,
74    /// the (half-open) range of bytes in the file covered by this segment
75    pub range: Range<u64>,
76}
77
78/// An iterator over the ranges of a file of a specific [`SegmentType`]
79#[derive(Debug, Clone)]
80pub struct SegmentIter<'a> {
81    segment_type: SegmentType,
82    iter: Iter<'a, Segment>,
83}
84
85impl<'a> Iterator for SegmentIter<'a> {
86    type Item = &'a Range<u64>;
87    fn next(&mut self) -> Option<<Self as Iterator>::Item> {
88        for segment in self.iter.by_ref() {
89            if segment.segment_type == self.segment_type {
90                return Some(&segment.range);
91            }
92        }
93        None
94    }
95}
96
97/// An extention trait to filter segments by Hole or Data segments
98pub trait Segments {
99    /// An interator of only the data segments
100    fn data(&self) -> SegmentIter;
101    /// An iterator of only the hole segments
102    fn holes(&self) -> SegmentIter;
103}
104
105impl Segments for Vec<Segment> {
106    fn data(&self) -> SegmentIter {
107        SegmentIter {
108            segment_type: SegmentType::Data,
109            iter: self.iter(),
110        }
111    }
112    fn holes(&self) -> SegmentIter {
113        SegmentIter {
114            segment_type: SegmentType::Hole,
115            iter: self.iter(),
116        }
117    }
118}
119
120#[allow(clippy::len_without_is_empty)] // Segments should never be zero length
121impl Segment {
122    /// Returns true if the provided offset is within the range of bytes this
123    /// segment specifies
124    pub fn contains(&self, offset: &u64) -> bool {
125        self.range.contains(offset)
126    }
127
128    /// Returns true if this segment is a Hole
129    pub fn is_hole(&self) -> bool {
130        self.segment_type == SegmentType::Hole
131    }
132
133    /// Returns true if this segment contains data
134    pub fn is_data(&self) -> bool {
135        self.segment_type == SegmentType::Data
136    }
137
138    /// The starting position of this segment
139    pub fn start(&self) -> u64 {
140        self.range.start
141    }
142
143    /// The number of bytes in this segment
144    pub fn len(&self) -> u64 {
145        self.range.end - self.range.start
146    }
147}
148
149/// An extention trait for [`File`](std::fs::File) for sparse files
150pub trait SparseFile: Read + Seek {
151    /// Scans the file to find its logical chunks
152    ///
153    /// Will return a list of segments, ordered by their start position.
154    ///
155    /// The ranges generated are guaranteed to cover all bytes in the file.
156    ///
157    /// `Hole` segments are guaranteed to represent a part of a file that does
158    /// not contain any non-zero data, however, `Data` segments may represent
159    /// parts of a file that contain what, logically, should be sparse segments.
160    /// This is up to the mercy of your operating system and file system, please
161    /// consult their documentation for how they handle sparse files for more
162    /// details.
163    ///
164    /// Does not make any guarantee about maintaining the Seek position of the
165    /// file, always seek back to a known point after calling this method.
166    ///
167    /// # Errors
168    ///
169    /// Will return `Err(ScanError::UnsupportedPlatform)` if support is not
170    /// implemented for filesystem level hole finding on your system
171    ///
172    /// Will return `Err(ScanError::UnsupportedFileSystem)` if support is
173    /// implemented for your operating system, but the filesystem does not
174    /// support sparse files
175    ///
176    /// Will also return `Err` if any other I/O error occurs
177    fn scan_chunks(&mut self) -> Result<Vec<Segment>, ScanError>;
178
179    /// Unallocate a section of the file, freeing the disk space and making
180    /// future reads return zeros
181    fn drill_hole(&self, start: u64, end: u64) -> Result<(), ScanError>;
182}
183
184#[cfg(test)]
185mod tests {
186    use super::*;
187    use crate::test_utils::*;
188    use quickcheck_macros::quickcheck;
189    use std::fs::File;
190
191    fn test_chunks_match(file: &mut File, input_segments: &[Segment]) -> bool {
192        // Get both sets of segments
193        let output_segments = file.scan_chunks().expect("Unable to scan chunks");
194
195        let segments_total = output_segments.iter().map(|x| x.len()).sum::<u64>();
196
197        let file_len = file.metadata().expect("file to exists").len();
198
199        if segments_total != file_len {
200            println!("Segment length {} != file len {}", segments_total, file_len);
201        }
202
203        if *input_segments != output_segments {
204            println!("Expected: \n {:?} \n", input_segments);
205            println!("Got: \n {:?} \n", output_segments);
206        }
207        *input_segments == output_segments
208    }
209
210    // Creates a file based on desc, then tests that the resulting output of
211    // file.scan_chunks() matches the description used to create the file
212    fn test_round_trips(desc: SparseDescription) -> bool {
213        let mut file = desc.to_file();
214        // Get both sets of segments
215        let input_segments = desc.segments();
216        test_chunks_match(file.as_file_mut(), &input_segments)
217    }
218
219    #[quickcheck]
220    fn round_trips(desc: SparseDescription) -> bool {
221        test_round_trips(desc)
222    }
223
224    #[quickcheck]
225    fn drill_hole(desc: SparseDescription, drop: u8) -> bool {
226        let mut file = desc.to_file();
227        // Get both sets of segments
228        let mut input_segments = desc.segments();
229
230        if input_segments.is_empty() {
231            return true;
232        }
233
234        #[cfg(target_os = "macos")]
235        for hole in input_segments.holes() {
236            file.as_file_mut()
237                .drill_hole(hole.start, hole.end)
238                .expect("pre drill holes");
239        }
240
241        test_chunks_match(file.as_file_mut(), &input_segments);
242
243        // pick a segment to make a hole
244        let drop_idx = drop as usize % input_segments.len();
245        let drop = &mut input_segments[drop_idx];
246
247        file.as_file_mut()
248            .drill_hole(drop.range.start, drop.range.end)
249            .expect("drilled hole");
250
251        drop.segment_type = SegmentType::Hole;
252
253        combine_segments(&mut input_segments);
254
255        test_chunks_match(file.as_file_mut(), &input_segments)
256    }
257
258    #[quickcheck]
259    fn one_big_segment(segment_type: SegmentType) -> bool {
260        let desc = SparseDescription::one_segment(segment_type, 3545868);
261
262        test_round_trips(desc)
263    }
264
265    fn combine_segments(segments: &mut Vec<Segment>) {
266        let mut prev = 0;
267        for i in 1..segments.len() {
268            if segments[prev].segment_type == segments[i].segment_type {
269                segments[prev].range.end = segments[i].range.end;
270            } else {
271                prev += 1;
272                segments[prev] = segments[i].clone();
273            }
274        }
275        segments.truncate(prev + 1)
276    }
277}