hole_punch/unix.rs
1//! lseek based implementation that uses `SEEK_DATA` and `SEEK_HOLE` to
2//! reconstruct which segments of the file are data or holes
3use super::*;
4
5use std::fs::File;
6use std::io::Error;
7use std::os::unix::io::AsRawFd;
8
9use errno::errno;
10use libc::{c_int, lseek, off_t, EINVAL, ENXIO, SEEK_DATA, SEEK_END, SEEK_HOLE};
11
12#[derive(Debug, Clone, Copy)]
13enum Tag {
14 Data(i64),
15 Hole(i64),
16 End(i64),
17}
18
19impl Tag {
20 fn offset(&self) -> i64 {
21 match self {
22 Tag::Data(x) | Tag::Hole(x) | Tag::End(x) => *x,
23 }
24 }
25}
26
27impl SparseFile for File {
28 fn scan_chunks(&mut self) -> std::result::Result<std::vec::Vec<Segment>, ScanError> {
29 // Create our output vec
30 let mut tags: Vec<Tag> = Vec::new();
31 // Extract the raw fd from the file
32 let fd = self.as_raw_fd();
33 // Find the end
34 let end = find_end(fd)?;
35 // Our seeking loop assumes that we know what type the previous segment
36 // is, so we check for the case where there is a hole at the start of
37 // the file. This also does double duty checking for sparseness, as if
38 // there are no holes, find_next_hole will return None, and we can short
39 // circuit.
40 if let Some(first_hole) = find_next_hole(fd, 0)? {
41 let mut last_offset;
42 if first_hole == 0 {
43 last_offset = Tag::Hole(0);
44 } else {
45 last_offset = Tag::Data(0);
46 }
47 while last_offset.offset() < end {
48 tags.push(last_offset);
49 match last_offset {
50 Tag::Data(x) => {
51 // If the last tag was a data, we are looking for a hole
52 if let Some(next_offset) = find_next_hole(fd, x)? {
53 last_offset = Tag::Hole(next_offset);
54 } else {
55 // We know the last segment was a data, and there
56 // are no remaining holes, so we must be at the end
57 // of the file, so we end the loop and push an end
58 last_offset = Tag::End(end);
59 }
60 }
61 Tag::Hole(x) => {
62 // If the last tag was a hole, we are looking for a data
63 if let Some(next_offset) = find_next_data(fd, x)? {
64 last_offset = Tag::Data(next_offset);
65 } else {
66 // We know the last segment was a hole, and there
67 // are no remaining holes, so we must be at the end
68 // of the file, so we end the loop and push an end
69 last_offset = Tag::End(end);
70 }
71 }
72 // We never set last_offset to Tag::End until we are done
73 // with the loop, so if we encounter an End, we have made a
74 // major programming error
75 Tag::End(_) => unreachable!(),
76 }
77 }
78 tags.push(Tag::End(end));
79 } else {
80 // In this situation, we have no holes in the data, so we just
81 // represent a single data segment
82 tags.push(Tag::Data(0));
83 let end = find_end(fd)?;
84 tags.push(Tag::End(end));
85 }
86
87 println!("{:?}", tags);
88
89 // Process our list of start point tags into a list of segments.
90 let mut tag_pairs = tags
91 .iter()
92 .copied()
93 .zip(tags.iter().skip(1).copied())
94 .map(|(x, y)| {
95 // All these casts are valid, as the wrapper methods we use
96 // around lseek will return Err rather than returning a value
97 // less than 0
98 match x {
99 Tag::Data(start) => Segment {
100 segment_type: SegmentType::Data,
101 start: start as u64,
102 end: (y.offset() - 1) as u64,
103 },
104 Tag::Hole(start) => Segment {
105 segment_type: SegmentType::Hole,
106 start: start as u64,
107 end: (y.offset() - 1) as u64,
108 },
109 // End should only ever be the last element the tag vector,
110 // so it can never be the first element of a pair
111 Tag::End(_) => unreachable!(),
112 }
113 })
114 .collect::<Vec<_>>();
115 // Modify the last element so it actually ends on the final offset
116 let len = tag_pairs.len();
117 tag_pairs[len - 1].end = end as u64;
118 Ok(tag_pairs)
119 }
120}
121
122fn find_next_hole(fd: c_int, offset: off_t) -> Result<Option<off_t>, ScanError> {
123 unsafe {
124 // First, call lseek with our file descriptor and current offset
125 let new_offset = lseek(fd, offset, SEEK_HOLE);
126 // if the return value of lseek is less than 0, an error has occurred
127 if new_offset < 0 {
128 // find and deref errno, honestly the scariest thing we do here
129 let errno = errno().into();
130 // Some of the errors we might not get here need to be handled
131 // specially, and one of them isn' actually an error
132 match errno {
133 /// EINVAL indicates that the file system does not support
134 /// SEEK_HOLE or SEEK_DATA, so we indicate as such
135 EINVAL => Err(ScanError::UnsupportedFileSystem),
136 // ENXIO indicates that the the file offset we are looking for
137 // either doesn't exist, or would be beyond the end of the file.
138 // In our case, this just means there is no next segment, so we
139 // return Ok(none) to indicate as such.
140 ENXIO => Ok(None),
141 // None of the other error codes require special handling, so we
142 // just turn them into an std::io::Error for user friendliness
143 _ => Err(Error::last_os_error().into()),
144 }
145 } else {
146 // If no errors occurred, we are good to return our offset.
147 Ok(Some(new_offset))
148 }
149 }
150}
151
152fn find_next_data(fd: c_int, offset: off_t) -> Result<Option<off_t>, ScanError> {
153 unsafe {
154 // First, call lseek with our file descriptor and current offset
155 let new_offset = lseek(fd, offset, SEEK_DATA);
156 // if the return value of lseek is less than 0, an error has occurred
157 if new_offset < 0 {
158 // find and deref errno, honestly the scariest thing we do here
159 let errno = errno().into();
160 // Some of the errors we might not get here need to be handled
161 // specially, and one of them isn' actually an error
162 match errno {
163 /// EINVAL indicates that the file system does not support
164 /// SEEK_HOLE or SEEK_DATA, so we indicate as such
165 EINVAL => Err(ScanError::UnsupportedFileSystem),
166 // ENXIO indicates that the the file offset we are looking for
167 // either doesn't exist, or would be beyond the end of the file.
168 // In our case, this just means there is no next segment, so we
169 // return Ok(none) to indicate as such.
170 ENXIO => Ok(None),
171 // None of the other error codes require special handling, so we
172 // just turn them into an std::io::Error for user friendliness
173 _ => Err(Error::last_os_error().into()),
174 }
175 } else {
176 // If no errors occurred, we are good to return our offset.
177 Ok(Some(new_offset))
178 }
179 }
180}
181
182fn find_end(fd: c_int) -> Result<off_t, ScanError> {
183 unsafe {
184 let new_offset = lseek(fd, 0, SEEK_END);
185 if new_offset < 0 {
186 Err(Error::last_os_error().into())
187 } else {
188 Ok(new_offset)
189 }
190 }
191}