hole_punch/
lib.rs

1/*!
2Utility methods for locating holes in sparse files
3*/
4
5use std::io::{Read, Seek};
6use thiserror::Error;
7
8cfg_if::cfg_if! {
9    if #[cfg(any(target_os = "linux",
10                 target_os = "android",
11                 target_os = "freebsd",
12    ))]{
13        mod unix;
14    } else if #[cfg(windows)] {
15        mod windows;
16    } else {
17        mod default;
18    }
19}
20
21#[cfg(test)]
22mod test_utils;
23
24#[derive(Error, Debug)]
25pub enum ScanError {
26    #[error("IO Error occurred")]
27    IO(#[from] std::io::Error),
28    #[error("An unknown error occurred interacting with the C API")]
29    Raw(i32),
30    #[error("The operation you are trying to perform is not supported on this platform")]
31    UnsupportedPlatform,
32    #[error("The filesystem does not support operating on sparse files")]
33    UnsupportedFileSystem,
34}
35
36/// Flag for determining if a segment is a hole, or if it contains data
37#[derive(Debug, Copy, Clone, PartialEq, Eq)]
38pub enum SegmentType {
39    Hole,
40    Data,
41}
42
43/// Describes the location of a chunk in the file, as well as indicating if it
44/// contains data or is a hole
45#[derive(Debug, Copy, Clone, PartialEq, Eq)]
46pub struct Segment {
47    /// Marks this segment as either containing a hole, or containing data
48    pub segment_type: SegmentType,
49    pub start: u64,
50    pub end: u64,
51}
52
53impl Segment {
54    /// Returns true if the provided offset is within the range of bytes this
55    /// segment specifies
56    pub fn contains(&self, offset: u64) -> bool {
57        offset >= self.start && offset <= self.end
58    }
59
60    /// Returns true if this segment is a Hole
61    pub fn is_hole(&self) -> bool {
62        self.segment_type == SegmentType::Hole
63    }
64
65    /// Returns true if this segment contains data
66    pub fn is_data(&self) -> bool {
67        self.segment_type == SegmentType::Data
68    }
69}
70
71/// Trait for objects that can have sparsity
72pub trait SparseFile: Read + Seek {
73    /// Scans the file to find its logical chunks
74    ///
75    /// Will return a list of segments, ordered by their start position.
76    ///
77    /// The ranges generated are guaranteed to cover all bytes in the file, up
78    /// to the last non-zero byte in the last segment containing data. All files
79    /// are considered to have a single hole of indeterminate length at the end,
80    /// and this library may not included that hole.
81    ///
82    /// `Hole` segments are guaranteed to represent a part of a file that does
83    /// not contain any non-zero data, however, `Data` segments may represent
84    /// parts of a file that contain what, logically, should be sparse segments.
85    /// This is up to the mercy of your operating system and file system, please
86    /// consult their documentation for how they handle sparse files for more
87    /// details.
88    ///
89    /// Does not make any guarantee about maintaining the Seek position of the
90    /// file, always seek back to a known point after calling this method.
91    ///
92    /// # Errors
93    ///
94    /// Will return `Err(ScanError::UnsupportedPlatform)` if support is not
95    /// implemented for filesystem level hole finding on your system
96    ///
97    /// Will return `Err(ScanError::UnsupportedFileSystem)` if support is
98    /// implemented for your operating system, but the filesystem does not
99    /// support sparse files
100    ///
101    /// Will also return `Err` if any other I/O error occurs
102    fn scan_chunks(&mut self) -> Result<Vec<Segment>, ScanError>;
103}
104
105#[cfg(test)]
106mod tests {
107    use super::*;
108    use crate::test_utils::*;
109    use quickcheck_macros::quickcheck;
110
111    // Creates a file based on desc, then tests that the resulting output of
112    // file.scan_chunks() has every non-zero byte included
113    fn test_covers_all_bytes(desc: SparseDescription) -> bool {
114        let mut file = desc.to_file();
115        // Get both sets of segments
116        let input_segments = desc.segments();
117        let output_segments = file
118            .as_file_mut()
119            .scan_chunks()
120            .expect("Unable to scan chunks");
121        println!("Output: \n {:?} \n", output_segments);
122        // Find the last non-zero byte in the input segments
123        let last_non_zero = input_segments
124            .iter()
125            .map(|x| {
126                if let SegmentType::Data = x.segment_type {
127                    x.end
128                } else {
129                    0
130                }
131            })
132            .max()
133            .unwrap_or(0);
134        println!("Last non zero: {} \n", last_non_zero);
135        let mut last_byte_touched = false;
136        for (x, y) in output_segments.iter().zip(output_segments.iter().skip(1)) {
137            if y.start != x.end + 1 {
138                return false;
139            }
140            if y.end >= last_non_zero {
141                println!("Last byte touched!");
142                last_byte_touched = true;
143            }
144        }
145        if output_segments.len() == 1 {
146            if output_segments[0].end >= last_non_zero {
147                println!("Last byte touched!");
148                last_byte_touched = true;
149            }
150        }
151        last_byte_touched || last_non_zero == 0
152    }
153
154    #[quickcheck]
155    fn covers_all_bytes(desc: SparseDescription) -> bool {
156        test_covers_all_bytes(desc)
157    }
158
159    // Constructs a file with desc, then verifies that the holes in the output
160    // from file.scan_chunks() don't contain any data
161    fn test_holes_have_no_data(desc: SparseDescription) -> bool {
162        println!("Input: \n {:?} \n", desc);
163        let mut file = desc.to_file();
164        // Get both sets of segments
165        let input_segments = desc.segments();
166        let output_segments = file
167            .as_file_mut()
168            .scan_chunks()
169            .expect("Unable to scan chunks");
170        println!("Output: \n {:?} \n", output_segments);
171        for segment in output_segments.iter().filter(|x| x.is_hole()) {
172            if input_segments.iter().filter(|x| x.is_data()).any(|other| {
173                let x = if segment.start > other.start {
174                    !(segment.start > other.end)
175                } else {
176                    !(segment.end < other.start)
177                };
178
179                if x {
180                    println!("Output {:?} overlaps Input {:?}", segment, other);
181                }
182
183                x
184            }) {
185                return false;
186            }
187        }
188        true
189    }
190
191    #[quickcheck]
192    fn holes_have_no_data(desc: SparseDescription) -> bool {
193        test_holes_have_no_data(desc)
194    }
195
196    #[test]
197    fn covers_all_bytes_failure_1() {
198        let desc = SparseDescription::from_segments(vec![
199            Segment {
200                segment_type: SegmentType::Hole,
201                start: 0,
202                end: 3545867,
203            },
204            Segment {
205                segment_type: SegmentType::Data,
206                start: 3545868,
207                end: 3625675,
208            },
209        ]);
210
211        assert!(test_covers_all_bytes(desc));
212    }
213
214    #[test]
215    fn covers_all_bytes_failure_2() {
216        let desc = SparseDescription::from_segments(vec![Segment {
217            segment_type: SegmentType::Hole,
218            start: 0,
219            end: 5440262,
220        }]);
221
222        assert!(test_covers_all_bytes(desc));
223    }
224
225    #[test]
226    fn holes_have_no_data_failure_1() {
227        let desc = SparseDescription::from_segments(vec![
228            Segment {
229                segment_type: SegmentType::Data,
230                start: 0,
231                end: 106392,
232            },
233            Segment {
234                segment_type: SegmentType::Hole,
235                start: 106393,
236                end: 713195,
237            },
238            Segment {
239                segment_type: SegmentType::Data,
240                start: 713196,
241                end: 1164291,
242            },
243            Segment {
244                segment_type: SegmentType::Hole,
245                start: 1164292,
246                end: 1871333,
247            },
248            Segment {
249                segment_type: SegmentType::Data,
250                start: 1871334,
251                end: 2351104,
252            },
253            Segment {
254                segment_type: SegmentType::Hole,
255                start: 2351105,
256                end: 2478705,
257            },
258            Segment {
259                segment_type: SegmentType::Data,
260                start: 2478706,
261                end: 2568019,
262            },
263            Segment {
264                segment_type: SegmentType::Hole,
265                start: 2568020,
266                end: 3062343,
267            },
268            Segment {
269                segment_type: SegmentType::Data,
270                start: 3062344,
271                end: 3285810,
272            },
273            Segment {
274                segment_type: SegmentType::Hole,
275                start: 3285811,
276                end: 3793122,
277            },
278            Segment {
279                segment_type: SegmentType::Data,
280                start: 3793123,
281                end: 4166168,
282            },
283            Segment {
284                segment_type: SegmentType::Hole,
285                start: 4166169,
286                end: 4249362,
287            },
288            Segment {
289                segment_type: SegmentType::Data,
290                start: 4249363,
291                end: 4283128,
292            },
293            Segment {
294                segment_type: SegmentType::Hole,
295                start: 4283129,
296                end: 4597394,
297            },
298            Segment {
299                segment_type: SegmentType::Data,
300                start: 4597395,
301                end: 5204961,
302            },
303            Segment {
304                segment_type: SegmentType::Hole,
305                start: 5204962,
306                end: 5270535,
307            },
308            Segment {
309                segment_type: SegmentType::Data,
310                start: 5270536,
311                end: 5274355,
312            },
313            Segment {
314                segment_type: SegmentType::Hole,
315                start: 5274356,
316                end: 5471034,
317            },
318            Segment {
319                segment_type: SegmentType::Data,
320                start: 5471035,
321                end: 5547210,
322            },
323        ]);
324        assert!(test_holes_have_no_data(desc));
325    }
326}