1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
//! Archive file reader with memory mapping support
use crate::error::{CascError, Result};
use memmap2::{Mmap, MmapOptions};
use std::borrow::Cow;
use std::fs::File;
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use tracing::debug;
/// Reader for CASC archive files with memory mapping support
pub struct ArchiveReader {
/// Memory-mapped file (if available)
mmap: Option<Mmap>,
/// Regular file reader (fallback)
file: Option<BufReader<File>>,
/// Path to the archive file (for large file fallback)
path: Arc<PathBuf>,
/// Size of the archive
size: u64,
}
/// A section of an archive that can be streamed
pub struct ArchiveSection<'a> {
data: Cursor<Cow<'a, [u8]>>,
}
impl<'a> ArchiveSection<'a> {
pub fn new(data: Cow<'a, [u8]>) -> Self {
Self {
data: Cursor::new(data),
}
}
/// Create from owned data
pub fn from_vec(data: Vec<u8>) -> Self {
Self {
data: Cursor::new(Cow::Owned(data)),
}
}
/// Create from borrowed data
pub fn from_slice(data: &'a [u8]) -> Self {
Self {
data: Cursor::new(Cow::Borrowed(data)),
}
}
}
impl Read for ArchiveSection<'_> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.data.read(buf)
}
}
impl Seek for ArchiveSection<'_> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
self.data.seek(pos)
}
}
impl ArchiveReader {
/// Determine if we can memory map a file of this size
pub fn can_memory_map(size: u64) -> bool {
// Platform-specific memory mapping limits
#[cfg(target_pointer_width = "64")]
{
// On 64-bit systems, we can handle much larger files
// Practical limit is around 128GB to avoid excessive virtual memory usage
const MAX_MMAP_SIZE: u64 = 128 * 1024 * 1024 * 1024; // 128GB
size <= MAX_MMAP_SIZE
}
#[cfg(target_pointer_width = "32")]
{
// On 32-bit systems, stick to 2GB limit due to address space constraints
const MAX_MMAP_SIZE_32BIT: u64 = 2 * 1024 * 1024 * 1024; // 2GB
size <= MAX_MMAP_SIZE_32BIT
}
}
/// Open an archive file for reading
pub fn open(path: &Path) -> Result<Self> {
let file = File::open(path)?;
let metadata = file.metadata()?;
let size = metadata.len();
let path = Arc::new(path.to_path_buf());
debug!("Opening archive: {:?} (size: {} bytes)", path, size);
// Try to memory-map the file (support for large archives >2GB)
let mmap = if size > 0 && Self::can_memory_map(size) {
// SAFETY: The file handle is valid and will remain open for the lifetime of the mmap.
// The mmap is read-only and the file won't be modified while mapped.
match unsafe { MmapOptions::new().map(&file) } {
Ok(mmap) => {
debug!("Successfully memory-mapped archive ({} bytes)", size);
Some(mmap)
}
Err(e) => {
debug!("Failed to memory-map archive, using file reader: {}", e);
None
}
}
} else if size > 0 {
debug!(
"Archive too large for memory mapping ({} bytes), using file reader",
size
);
None
} else {
None
};
// If we couldn't mmap, use a regular file reader
let file = if mmap.is_none() {
Some(BufReader::new(file))
} else {
None
};
Ok(Self {
mmap,
file,
path,
size,
})
}
/// Create a reader at a specific offset for streaming access (zero-copy when possible)
pub fn reader_at(&self, offset: u64, length: usize) -> Result<ArchiveSection<'_>> {
if offset + length as u64 > self.size {
return Err(CascError::InvalidArchiveFormat(format!(
"Read beyond archive bounds: offset={}, length={}, size={}",
offset, length, self.size
)));
}
if let Some(ref mmap) = self.mmap {
// Memory-mapped access - zero copy
let data = &mmap[offset as usize..(offset as usize + length)];
Ok(ArchiveSection::from_slice(data))
} else {
// For large archives without mmap, read the data into a buffer
let mut data = vec![0u8; length];
self.read_at_fallback(offset, &mut data)?;
Ok(ArchiveSection::from_vec(data))
}
}
/// Read data at a specific offset (returns Cow for zero-copy when possible)
pub fn read_at_cow(&self, offset: u64, length: usize) -> Result<Cow<'_, [u8]>> {
if offset + length as u64 > self.size {
return Err(CascError::InvalidArchiveFormat(format!(
"Read beyond archive bounds: offset={}, length={}, size={}",
offset, length, self.size
)));
}
if let Some(ref mmap) = self.mmap {
// Fast path: memory-mapped access - zero copy
let data = &mmap[offset as usize..(offset as usize + length)];
Ok(Cow::Borrowed(data))
} else {
// For large archives without mmap, read into owned data
let mut data = vec![0u8; length];
self.read_at_fallback(offset, &mut data)?;
Ok(Cow::Owned(data))
}
}
/// Fallback method for reading from non-memory-mapped files
fn read_at_fallback(&self, offset: u64, buffer: &mut [u8]) -> Result<()> {
// For large archives that can't be memory-mapped, use platform-specific optimizations
#[cfg(unix)]
{
use std::os::unix::fs::FileExt;
// Use pread for thread-safe positioned reads without seeking
let file = File::open(&*self.path)?;
file.read_exact_at(buffer, offset)?;
Ok(())
}
#[cfg(windows)]
{
use std::os::windows::fs::FileExt;
// Windows positioned read
let file = File::open(&*self.path)?;
let bytes_read = file.seek_read(buffer, offset)?;
if bytes_read != buffer.len() {
return Err(CascError::InvalidArchiveFormat(
"Incomplete read from archive".into(),
));
}
Ok(())
}
#[cfg(not(any(unix, windows)))]
{
// Fallback for other platforms - not thread-safe but functional
use std::io::{BufRead, BufReader};
let file = File::open(&*self.path)?;
let mut reader = BufReader::new(file);
reader.seek(SeekFrom::Start(offset))?;
reader.read_exact(buffer)?;
Ok(())
}
}
/// Read data at a specific offset (allocates for compatibility)
pub fn read_at(&mut self, offset: u64, length: usize) -> Result<Vec<u8>> {
if offset + length as u64 > self.size {
return Err(CascError::InvalidArchiveFormat(format!(
"Read beyond archive bounds: offset={}, length={}, size={}",
offset, length, self.size
)));
}
if let Some(ref mmap) = self.mmap {
// Fast path: memory-mapped access
let data = &mmap[offset as usize..(offset as usize + length)];
Ok(data.to_vec())
} else if let Some(ref mut file) = self.file {
// Traditional file read (for smaller files or when mmap failed)
file.seek(SeekFrom::Start(offset))?;
let mut buffer = vec![0u8; length];
file.read_exact(&mut buffer)?;
Ok(buffer)
} else {
// Large archive fallback - use positioned reads
let mut buffer = vec![0u8; length];
self.read_at_fallback(offset, &mut buffer)?;
Ok(buffer)
}
}
/// Read a slice of data without allocation (only works with mmap)
pub fn read_slice(&self, offset: u64, length: usize) -> Result<&[u8]> {
if offset + length as u64 > self.size {
return Err(CascError::InvalidArchiveFormat(format!(
"Read beyond archive bounds: offset={}, length={}, size={}",
offset, length, self.size
)));
}
if let Some(ref mmap) = self.mmap {
Ok(&mmap[offset as usize..(offset as usize + length)])
} else {
Err(CascError::InvalidArchiveFormat(
"Memory mapping not available for slice access".into(),
))
}
}
/// Get the size of the archive
pub fn size(&self) -> u64 {
self.size
}
/// Check if memory mapping is available
pub fn is_memory_mapped(&self) -> bool {
self.mmap.is_some()
}
/// Prefetch data into memory (hint to OS)
#[allow(unused_variables)] // `offset` and `length` are only used on Unix
pub fn prefetch(&self, offset: u64, length: usize) -> Result<()> {
if let Some(ref mmap) = self.mmap {
// Advise the OS that we'll need this data soon
#[cfg(unix)]
{
let start = offset as usize;
let end = (offset as usize).saturating_add(length).min(mmap.len());
use memmap2::Advice;
let _ = mmap.advise_range(Advice::WillNeed, start, end - start);
}
}
Ok(())
}
}