hole_punch/
unix.rs

1//! lseek based implementation that uses `SEEK_DATA` and `SEEK_HOLE` to
2//! reconstruct which segments of the file are data or holes
3use super::*;
4
5use std::fs::File;
6use std::io::Error;
7use std::os::unix::io::AsRawFd;
8
9use errno::errno;
10use libc::{c_int, lseek, off_t, EINVAL, ENXIO, SEEK_DATA, SEEK_END, SEEK_HOLE};
11
12#[derive(Debug, Clone, Copy)]
13enum Tag {
14    Data(i64),
15    Hole(i64),
16    End(i64),
17}
18
19impl Tag {
20    fn offset(&self) -> i64 {
21        match self {
22            Tag::Data(x) | Tag::Hole(x) | Tag::End(x) => *x,
23        }
24    }
25}
26
27impl SparseFile for File {
28    fn scan_chunks(&mut self) -> std::result::Result<std::vec::Vec<Segment>, ScanError> {
29        // Create our output vec
30        let mut tags: Vec<Tag> = Vec::new();
31        // Extract the raw fd from the file
32        let fd = self.as_raw_fd();
33        // Find the end
34        let end = find_end(fd)?;
35        // Our seeking loop assumes that we know what type the previous segment
36        // is, so we check for the case where there is a hole at the start of
37        // the file. This also does double duty checking for sparseness, as if
38        // there are no holes, find_next_hole will return None, and we can short
39        // circuit.
40        if let Some(first_hole) = find_next_hole(fd, 0)? {
41            let mut last_offset;
42            if first_hole == 0 {
43                last_offset = Tag::Hole(0);
44            } else {
45                last_offset = Tag::Data(0);
46            }
47            while last_offset.offset() < end {
48                tags.push(last_offset);
49                match last_offset {
50                    Tag::Data(x) => {
51                        // If the last tag was a data, we are looking for a hole
52                        if let Some(next_offset) = find_next_hole(fd, x)? {
53                            last_offset = Tag::Hole(next_offset);
54                        } else {
55                            // We know the last segment was a data, and there
56                            // are no remaining holes, so we must be at the end
57                            // of the file, so we end the loop and push an end
58                            last_offset = Tag::End(end);
59                        }
60                    }
61                    Tag::Hole(x) => {
62                        // If the last tag was a hole, we are looking for a data
63                        if let Some(next_offset) = find_next_data(fd, x)? {
64                            last_offset = Tag::Data(next_offset);
65                        } else {
66                            // We know the last segment was a hole, and there
67                            // are no remaining holes, so we must be at the end
68                            // of the file, so we end the loop and push an end
69                            last_offset = Tag::End(end);
70                        }
71                    }
72                    // We never set last_offset to Tag::End until we are done
73                    // with the loop, so if we encounter an End, we have made a
74                    // major programming error
75                    Tag::End(_) => unreachable!(),
76                }
77            }
78            tags.push(Tag::End(end));
79        } else {
80            // In this situation, we have no holes in the data, so we just
81            // represent a single data segment
82            tags.push(Tag::Data(0));
83            let end = find_end(fd)?;
84            tags.push(Tag::End(end));
85        }
86
87        println!("{:?}", tags);
88
89        // Process our list of start point tags into a list of segments.
90        let mut tag_pairs = tags
91            .iter()
92            .copied()
93            .zip(tags.iter().skip(1).copied())
94            .map(|(x, y)| {
95                // All these casts are valid, as the wrapper methods we use
96                // around lseek will return Err rather than returning a value
97                // less than 0
98                match x {
99                    Tag::Data(start) => Segment {
100                        segment_type: SegmentType::Data,
101                        start: start as u64,
102                        end: (y.offset() - 1) as u64,
103                    },
104                    Tag::Hole(start) => Segment {
105                        segment_type: SegmentType::Hole,
106                        start: start as u64,
107                        end: (y.offset() - 1) as u64,
108                    },
109                    // End should only ever be the last element the tag vector,
110                    // so it can never be the first element of a pair
111                    Tag::End(_) => unreachable!(),
112                }
113            })
114            .collect::<Vec<_>>();
115        // Modify the last element so it actually ends on the final offset
116        let len = tag_pairs.len();
117        tag_pairs[len - 1].end = end as u64;
118        Ok(tag_pairs)
119    }
120}
121
122fn find_next_hole(fd: c_int, offset: off_t) -> Result<Option<off_t>, ScanError> {
123    unsafe {
124        // First, call lseek with our file descriptor and current offset
125        let new_offset = lseek(fd, offset, SEEK_HOLE);
126        // if the return value of lseek is less than 0, an error has occurred
127        if new_offset < 0 {
128            // find and deref errno, honestly the scariest thing we do here
129            let errno = errno().into();
130            // Some of the errors we might not get here need to be handled
131            // specially, and one of them isn' actually an error
132            match errno {
133                /// EINVAL indicates that the file system does not support
134                /// SEEK_HOLE or SEEK_DATA, so we indicate as such
135                EINVAL => Err(ScanError::UnsupportedFileSystem),
136                // ENXIO indicates that the the file offset we are looking for
137                // either doesn't exist, or would be beyond the end of the file.
138                // In our case, this just means there is no next segment, so we
139                // return Ok(none) to indicate as such.
140                ENXIO => Ok(None),
141                // None of the other error codes require special handling, so we
142                // just turn them into an std::io::Error for user friendliness
143                _ => Err(Error::last_os_error().into()),
144            }
145        } else {
146            // If no errors occurred, we are good to return our offset.
147            Ok(Some(new_offset))
148        }
149    }
150}
151
152fn find_next_data(fd: c_int, offset: off_t) -> Result<Option<off_t>, ScanError> {
153    unsafe {
154        // First, call lseek with our file descriptor and current offset
155        let new_offset = lseek(fd, offset, SEEK_DATA);
156        // if the return value of lseek is less than 0, an error has occurred
157        if new_offset < 0 {
158            // find and deref errno, honestly the scariest thing we do here
159            let errno = errno().into();
160            // Some of the errors we might not get here need to be handled
161            // specially, and one of them isn' actually an error
162            match errno {
163                /// EINVAL indicates that the file system does not support
164                /// SEEK_HOLE or SEEK_DATA, so we indicate as such
165                EINVAL => Err(ScanError::UnsupportedFileSystem),
166                // ENXIO indicates that the the file offset we are looking for
167                // either doesn't exist, or would be beyond the end of the file.
168                // In our case, this just means there is no next segment, so we
169                // return Ok(none) to indicate as such.
170                ENXIO => Ok(None),
171                // None of the other error codes require special handling, so we
172                // just turn them into an std::io::Error for user friendliness
173                _ => Err(Error::last_os_error().into()),
174            }
175        } else {
176            // If no errors occurred, we are good to return our offset.
177            Ok(Some(new_offset))
178        }
179    }
180}
181
182fn find_end(fd: c_int) -> Result<off_t, ScanError> {
183    unsafe {
184        let new_offset = lseek(fd, 0, SEEK_END);
185        if new_offset < 0 {
186            Err(Error::last_os_error().into())
187        } else {
188            Ok(new_offset)
189        }
190    }
191}