1use std::fs::File;
31use std::io::{Read, Seek, SeekFrom};
32use std::ops::{Bound, RangeBounds};
33use std::sync::Arc;
34
35#[derive(Clone, Debug)]
40pub struct FileSlice {
41 file: Arc<File>,
42 cursor: u64,
44 start: u64,
45 end: u64,
46}
47
48impl FileSlice {
49 pub fn new(file: File) -> FileSlice {
51 let end = file.metadata().unwrap().len();
52 FileSlice {
53 file: Arc::new(file),
54 cursor: 0,
55 start: 0,
56 end,
57 }
58 }
59
60 pub fn slice<T>(&self, range: T) -> FileSlice
62 where
63 T: RangeBounds<u64>,
64 {
65 let start = match range.start_bound() {
67 Bound::Included(x) => self.start + x,
68 Bound::Excluded(x) => self.start + x + 1,
69 Bound::Unbounded => self.start,
70 };
71 let end = match range.end_bound() {
72 Bound::Included(x) => self.start + x + 1,
73 Bound::Excluded(x) => self.start + x,
74 Bound::Unbounded => self.end,
75 };
76 let end = end
77 .min(self.end) .max(start); FileSlice {
80 file: self.file.clone(),
81 cursor: start,
82 start,
83 end,
84 }
85 }
86}
87
88impl FileSlice {
89 pub fn start_pos(&self) -> u64 {
92 self.start
93 }
94
95 pub fn end_pos(&self) -> u64 {
98 self.end
99 }
100
101 pub fn cursor_pos(&self) -> u64 {
103 self.cursor
104 }
105
106 pub fn is_empty(&self) -> bool {
107 self.start == self.end
108 }
109
110 pub fn len(&self) -> usize {
111 (self.end - self.start) as usize
112 }
113
114 pub fn bytes_remaining(&self) -> usize {
115 (self.end - self.cursor) as usize
116 }
117}
118
119impl Read for FileSlice {
120 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
121 let remaining = (self.end - self.cursor) as usize;
122 let buf = if buf.len() > remaining {
123 &mut buf[..remaining]
124 } else {
125 buf
126 };
127
128 let x;
129 #[cfg(target_family = "unix")]
130 {
131 use std::os::unix::fs::FileExt;
132 x = self.file.read_at(buf, self.cursor)?;
133 }
134 #[cfg(target_family = "windows")]
135 {
136 use std::os::windows::fs::FileExt;
137 x = self.file.seek_read(buf, self.cursor)?;
138 }
139 #[cfg(target_family = "wasm")]
140 {
141 use std::os::wasi::fs::FileExt;
142 x = self.file.read_at(buf, self.cursor)?;
143 }
144
145 self.cursor += x as u64;
146 Ok(x)
147 }
148}
149
150impl Seek for FileSlice {
151 fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
152 let cursor = match pos {
153 SeekFrom::Current(x) => i128::from(self.cursor) + i128::from(x),
154 SeekFrom::Start(x) => i128::from(self.start + x),
155 SeekFrom::End(x) => i128::from(self.end) + i128::from(x),
156 };
157 let cursor = match u64::try_from(cursor) {
158 Ok(x) if x >= self.start => x,
159 _ => {
160 return Err(std::io::Error::new(
161 std::io::ErrorKind::Other,
162 "Out of bounds",
163 ))
164 }
165 };
166 self.cursor = cursor;
167 self.stream_position()
168 }
169
170 fn stream_position(&mut self) -> std::io::Result<u64> {
171 Ok(self.cursor - self.start)
172 }
173}
174
175impl FileSlice {
176 pub fn expand(&mut self) {
183 self.start = 0;
184 self.end = self.file.metadata().unwrap().len();
185 }
186
187 pub fn try_unwrap(self) -> Result<File, FileSlice> {
193 Arc::try_unwrap(self.file).map_err(|file| FileSlice {
194 file,
195 cursor: self.cursor,
196 start: self.start,
197 end: self.end,
198 })
199 }
200}
201
202#[cfg(feature = "parquet")]
203mod parquet_impls {
204 use super::*;
205 use bytes::Bytes;
206 use parquet::file::reader::{ChunkReader, Length};
207
208 impl Length for FileSlice {
209 fn len(&self) -> u64 {
210 self.end - self.start
211 }
212 }
213
214 impl ChunkReader for FileSlice {
215 type T = FileSlice;
216
217 fn get_read(&self, start: u64) -> parquet::errors::Result<FileSlice> {
218 Ok(self.slice(start..self.end))
219 }
220
221 fn get_bytes(&self, start: u64, length: usize) -> parquet::errors::Result<Bytes> {
222 let mut buf = vec![0; length];
223 self.slice(start..(start + length as u64))
224 .read_exact(&mut buf)?;
225 Ok(buf.into())
226 }
227 }
228}
229
230#[cfg(feature = "tar")]
231pub fn slice_tarball(
232 mut archive: tar::Archive<File>,
233) -> std::io::Result<impl Iterator<Item = (tar::Header, FileSlice)>> {
234 let headers = archive
235 .entries_with_seek()?
236 .map(move |entry| {
237 let entry = entry.unwrap();
238 let start = entry.raw_file_position();
239 let end = start + entry.size();
240 (entry.header().clone(), start, end)
241 })
242 .collect::<Vec<_>>();
243 let file = FileSlice::new(archive.into_inner());
244 Ok(headers
245 .into_iter()
246 .map(move |(header, start, end)| (header, file.slice(start..end))))
247}