1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
// Copyright (c) 2016-2021 The http-serve developers
// Copyright (c) 2026 Greg Steffensen
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE.txt or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT.txt or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::io;
use std::path::Path;
#[cfg(unix)]
pub(crate) fn open_file(path: &Path) -> io::Result<std::fs::File> {
std::fs::File::open(path)
}
#[cfg(windows)]
pub(crate) fn open_file(path: &Path) -> io::Result<std::fs::File> {
use std::fs::OpenOptions;
use std::os::windows::fs::OpenOptionsExt;
OpenOptions::new()
.read(true)
// Allow opening directory handles so callers can classify directories
// via metadata in a platform-consistent way.
.custom_flags(windows_sys::Win32::Storage::FileSystem::FILE_FLAG_BACKUP_SEMANTICS)
.open(path)
}
pub trait FileExt {
/// Reads at least 1, at most `chunk_size` bytes beginning at `offset`, or fails.
///
/// If there are no bytes at `offset`, returns an `UnexpectedEof` error.
///
/// The file cursor changes on Windows (like `std::os::windows::fs::seek_read`) but not Unix
/// (like `std::os::unix::fs::FileExt::read_at`). The caller never uses the cursor, so this
/// doesn't matter.
///
/// The windows implementation goes directly to Windows APIs to allow soundly reading into an
/// uninitialized buffer. This can be changed and the implementations unified when
/// [`read_buf`](https://github.com/rust-lang/rust/issues/78485) is stabilized, including buf
/// equivalents of `read_at`/`seek_read`.
fn read_range(&self, chunk_size: usize, offset: u64) -> io::Result<Vec<u8>>;
}
impl FileExt for std::fs::File {
#[cfg(unix)]
fn read_range(&self, chunk_size: usize, offset: u64) -> io::Result<Vec<u8>> {
use std::os::unix::fs::FileExt;
let mut chunk = Vec::with_capacity(chunk_size);
// Get a mutable slice to the uninitialized spare capacity
let spare = chunk.spare_capacity_mut();
debug_assert!(spare.len() == chunk_size);
// SAFETY: read_at on Unix takes a raw buffer. We cast our MaybeUninit
// slice to a raw byte slice. This is safe because we will only
// "initialize" the bytes that are actually read.
let bytes_read = unsafe {
let slice = std::slice::from_raw_parts_mut(spare.as_mut_ptr() as *mut u8, chunk_size);
self.read_at(slice, offset)?
};
// SAFETY: We just confirmed that 'bytes_read' were initialized by the OS.
unsafe {
chunk.set_len(bytes_read);
}
if bytes_read == 0 {
return Err(io::ErrorKind::UnexpectedEof.into());
}
Ok(chunk)
}
#[cfg(windows)]
fn read_range(&self, chunk_size: usize, offset: u64) -> io::Result<Vec<u8>> {
// References:
// https://github.com/rust-lang/rust/blob/5ffebc2cb3a089c27a4c7da13d09fd2365c288aa/library/std/src/sys/windows/handle.rs#L230
// https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-readfile
use std::os::windows::io::AsRawHandle;
let handle = self.as_raw_handle();
let mut read = 0u32;
let mut chunk = Vec::with_capacity(chunk_size);
unsafe {
// SAFETY: a zero `OVERLAPPED` is valid.
let mut overlapped: windows_sys::Win32::System::IO::OVERLAPPED = std::mem::zeroed();
overlapped.Anonymous.Anonymous.Offset = offset as u32;
overlapped.Anonymous.Anonymous.OffsetHigh = (offset >> 32) as u32;
// SAFETY: `Vec::with_capacity` guaranteed the pointer range is valid.
if windows_sys::Win32::Storage::FileSystem::ReadFile(
handle as _,
chunk.as_mut_ptr(),
u32::try_from(chunk_size).unwrap_or(u32::MAX), // saturating conversion
&mut read,
&mut overlapped,
) == 0
{
match windows_sys::Win32::Foundation::GetLastError() {
#[allow(clippy::print_stderr)]
windows_sys::Win32::Foundation::ERROR_IO_PENDING => {
// Match std's <https://github.com/rust-lang/rust/issues/81357> fix:
// abort the process before `overlapped` is dropped.
eprintln!("I/O error: operation failed to complete synchronously");
std::process::abort();
}
windows_sys::Win32::Foundation::ERROR_HANDLE_EOF => {
// std::io::Error::from_raw_os_error converts this to ErrorKind::Other.
// Override that.
return Err(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
format!("no bytes beyond position {}", offset),
));
}
o => return Err(std::io::Error::from_raw_os_error(o as i32)),
}
}
// SAFETY: `ReadFile` guaranteed these bytes are initialized.
chunk.set_len(usize::try_from(read).expect("u32 should fit in usize"));
}
Ok(chunk)
}
}
#[cfg(test)]
mod test {
use super::*;
use std::io::Write;
use tempfile::tempfile;
#[test]
fn test_read_at_middle() {
let mut f = tempfile().unwrap();
f.write_all(b"0123456789").unwrap();
let chunk = f.read_range(3, 4).unwrap();
assert_eq!(chunk, b"456");
}
#[test]
fn test_read_at_beyond_eof() {
let mut f = tempfile().unwrap();
f.write_all(b"0123456789").unwrap();
let chunk = f.read_range(10, 8).unwrap();
assert_eq!(chunk, b"89");
}
#[test]
fn test_read_at_entirely_beyond_eof() {
let mut f = tempfile().unwrap();
f.write_all(b"0123456789").unwrap();
let err = f.read_range(3, 10).unwrap_err();
assert_eq!(err.kind(), std::io::ErrorKind::UnexpectedEof);
}
#[test]
fn test_read_at_io_error() {
let tempdir = tempfile::tempdir().unwrap();
let path = tempdir.path().join("write_only");
let f = std::fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.open(path)
.unwrap();
// This should fail because it's not open for reading.
let err = f.read_range(10, 0).unwrap_err();
#[cfg(unix)]
assert_eq!(err.raw_os_error(), Some(9)); // EBADF
#[cfg(windows)]
assert_eq!(err.raw_os_error(), Some(5)); // ERROR_ACCESS_DENIED
}
#[test]
fn test_read_whole() {
use rand::RngCore;
let mut f = tempfile().unwrap();
let mut data = vec![0u8; 20480]; // 20KB
rand::thread_rng().fill_bytes(&mut data);
f.write_all(&data).unwrap();
let chunk = f.read_range(data.len(), 0).unwrap();
assert_eq!(chunk, data);
}
}