casc_storage/archive/
archive_reader.rs

1//! Archive file reader with memory mapping support
2
3use crate::error::{CascError, Result};
4use memmap2::{Mmap, MmapOptions};
5use std::borrow::Cow;
6use std::fs::File;
7use std::io::{BufReader, Cursor, Read, Seek, SeekFrom};
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use tracing::debug;
11
12/// Reader for CASC archive files with memory mapping support
13pub struct ArchiveReader {
14    /// Memory-mapped file (if available)
15    mmap: Option<Mmap>,
16    /// Regular file reader (fallback)
17    file: Option<BufReader<File>>,
18    /// Path to the archive file (for large file fallback)
19    path: Arc<PathBuf>,
20    /// Size of the archive
21    size: u64,
22}
23
24/// A section of an archive that can be streamed
25pub struct ArchiveSection<'a> {
26    data: Cursor<Cow<'a, [u8]>>,
27}
28
29impl<'a> ArchiveSection<'a> {
30    pub fn new(data: Cow<'a, [u8]>) -> Self {
31        Self {
32            data: Cursor::new(data),
33        }
34    }
35
36    /// Create from owned data
37    pub fn from_vec(data: Vec<u8>) -> Self {
38        Self {
39            data: Cursor::new(Cow::Owned(data)),
40        }
41    }
42
43    /// Create from borrowed data
44    pub fn from_slice(data: &'a [u8]) -> Self {
45        Self {
46            data: Cursor::new(Cow::Borrowed(data)),
47        }
48    }
49}
50
51impl Read for ArchiveSection<'_> {
52    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
53        self.data.read(buf)
54    }
55}
56
57impl Seek for ArchiveSection<'_> {
58    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
59        self.data.seek(pos)
60    }
61}
62
63impl ArchiveReader {
64    /// Determine if we can memory map a file of this size
65    pub fn can_memory_map(size: u64) -> bool {
66        // Platform-specific memory mapping limits
67        #[cfg(target_pointer_width = "64")]
68        {
69            // On 64-bit systems, we can handle much larger files
70            // Practical limit is around 128GB to avoid excessive virtual memory usage
71            const MAX_MMAP_SIZE: u64 = 128 * 1024 * 1024 * 1024; // 128GB
72            size <= MAX_MMAP_SIZE
73        }
74
75        #[cfg(target_pointer_width = "32")]
76        {
77            // On 32-bit systems, stick to 2GB limit due to address space constraints
78            const MAX_MMAP_SIZE_32BIT: u64 = 2 * 1024 * 1024 * 1024; // 2GB
79            size <= MAX_MMAP_SIZE_32BIT
80        }
81    }
82
83    /// Open an archive file for reading
84    pub fn open(path: &Path) -> Result<Self> {
85        let file = File::open(path)?;
86        let metadata = file.metadata()?;
87        let size = metadata.len();
88        let path = Arc::new(path.to_path_buf());
89
90        debug!("Opening archive: {:?} (size: {} bytes)", path, size);
91
92        // Try to memory-map the file (support for large archives >2GB)
93        let mmap = if size > 0 && Self::can_memory_map(size) {
94            // SAFETY: The file handle is valid and will remain open for the lifetime of the mmap.
95            // The mmap is read-only and the file won't be modified while mapped.
96            match unsafe { MmapOptions::new().map(&file) } {
97                Ok(mmap) => {
98                    debug!("Successfully memory-mapped archive ({} bytes)", size);
99                    Some(mmap)
100                }
101                Err(e) => {
102                    debug!("Failed to memory-map archive, using file reader: {}", e);
103                    None
104                }
105            }
106        } else if size > 0 {
107            debug!(
108                "Archive too large for memory mapping ({} bytes), using file reader",
109                size
110            );
111            None
112        } else {
113            None
114        };
115
116        // If we couldn't mmap, use a regular file reader
117        let file = if mmap.is_none() {
118            Some(BufReader::new(file))
119        } else {
120            None
121        };
122
123        Ok(Self {
124            mmap,
125            file,
126            path,
127            size,
128        })
129    }
130
131    /// Create a reader at a specific offset for streaming access (zero-copy when possible)
132    pub fn reader_at(&self, offset: u64, length: usize) -> Result<ArchiveSection<'_>> {
133        if offset + length as u64 > self.size {
134            return Err(CascError::InvalidArchiveFormat(format!(
135                "Read beyond archive bounds: offset={}, length={}, size={}",
136                offset, length, self.size
137            )));
138        }
139
140        if let Some(ref mmap) = self.mmap {
141            // Memory-mapped access - zero copy
142            let data = &mmap[offset as usize..(offset as usize + length)];
143            Ok(ArchiveSection::from_slice(data))
144        } else {
145            // For large archives without mmap, read the data into a buffer
146            let mut data = vec![0u8; length];
147            self.read_at_fallback(offset, &mut data)?;
148            Ok(ArchiveSection::from_vec(data))
149        }
150    }
151
152    /// Read data at a specific offset (returns Cow for zero-copy when possible)
153    pub fn read_at_cow(&self, offset: u64, length: usize) -> Result<Cow<'_, [u8]>> {
154        if offset + length as u64 > self.size {
155            return Err(CascError::InvalidArchiveFormat(format!(
156                "Read beyond archive bounds: offset={}, length={}, size={}",
157                offset, length, self.size
158            )));
159        }
160
161        if let Some(ref mmap) = self.mmap {
162            // Fast path: memory-mapped access - zero copy
163            let data = &mmap[offset as usize..(offset as usize + length)];
164            Ok(Cow::Borrowed(data))
165        } else {
166            // For large archives without mmap, read into owned data
167            let mut data = vec![0u8; length];
168            self.read_at_fallback(offset, &mut data)?;
169            Ok(Cow::Owned(data))
170        }
171    }
172
173    /// Fallback method for reading from non-memory-mapped files
174    fn read_at_fallback(&self, offset: u64, buffer: &mut [u8]) -> Result<()> {
175        // For large archives that can't be memory-mapped, use platform-specific optimizations
176
177        #[cfg(unix)]
178        {
179            use std::os::unix::fs::FileExt;
180
181            // Use pread for thread-safe positioned reads without seeking
182            let file = File::open(&*self.path)?;
183            file.read_exact_at(buffer, offset)?;
184            Ok(())
185        }
186
187        #[cfg(windows)]
188        {
189            use std::os::windows::fs::FileExt;
190
191            // Windows positioned read
192            let file = File::open(&*self.path)?;
193            let bytes_read = file.seek_read(buffer, offset)?;
194            if bytes_read != buffer.len() {
195                return Err(CascError::InvalidArchiveFormat(
196                    "Incomplete read from archive".into(),
197                ));
198            }
199            Ok(())
200        }
201
202        #[cfg(not(any(unix, windows)))]
203        {
204            // Fallback for other platforms - not thread-safe but functional
205            use std::io::{BufRead, BufReader};
206
207            let file = File::open(&*self.path)?;
208            let mut reader = BufReader::new(file);
209            reader.seek(SeekFrom::Start(offset))?;
210            reader.read_exact(buffer)?;
211            Ok(())
212        }
213    }
214
215    /// Read data at a specific offset (allocates for compatibility)
216    pub fn read_at(&mut self, offset: u64, length: usize) -> Result<Vec<u8>> {
217        if offset + length as u64 > self.size {
218            return Err(CascError::InvalidArchiveFormat(format!(
219                "Read beyond archive bounds: offset={}, length={}, size={}",
220                offset, length, self.size
221            )));
222        }
223
224        if let Some(ref mmap) = self.mmap {
225            // Fast path: memory-mapped access
226            let data = &mmap[offset as usize..(offset as usize + length)];
227            Ok(data.to_vec())
228        } else if let Some(ref mut file) = self.file {
229            // Traditional file read (for smaller files or when mmap failed)
230            file.seek(SeekFrom::Start(offset))?;
231            let mut buffer = vec![0u8; length];
232            file.read_exact(&mut buffer)?;
233            Ok(buffer)
234        } else {
235            // Large archive fallback - use positioned reads
236            let mut buffer = vec![0u8; length];
237            self.read_at_fallback(offset, &mut buffer)?;
238            Ok(buffer)
239        }
240    }
241
242    /// Read a slice of data without allocation (only works with mmap)
243    pub fn read_slice(&self, offset: u64, length: usize) -> Result<&[u8]> {
244        if offset + length as u64 > self.size {
245            return Err(CascError::InvalidArchiveFormat(format!(
246                "Read beyond archive bounds: offset={}, length={}, size={}",
247                offset, length, self.size
248            )));
249        }
250
251        if let Some(ref mmap) = self.mmap {
252            Ok(&mmap[offset as usize..(offset as usize + length)])
253        } else {
254            Err(CascError::InvalidArchiveFormat(
255                "Memory mapping not available for slice access".into(),
256            ))
257        }
258    }
259
260    /// Get the size of the archive
261    pub fn size(&self) -> u64 {
262        self.size
263    }
264
265    /// Check if memory mapping is available
266    pub fn is_memory_mapped(&self) -> bool {
267        self.mmap.is_some()
268    }
269
270    /// Prefetch data into memory (hint to OS)
271    #[allow(unused_variables)] // `offset` and `length` are only used on Unix
272    pub fn prefetch(&self, offset: u64, length: usize) -> Result<()> {
273        if let Some(ref mmap) = self.mmap {
274            // Advise the OS that we'll need this data soon
275            #[cfg(unix)]
276            {
277                let start = offset as usize;
278                let end = (offset as usize).saturating_add(length).min(mmap.len());
279                use memmap2::Advice;
280                let _ = mmap.advise_range(Advice::WillNeed, start, end - start);
281            }
282        }
283        Ok(())
284    }
285}