Skip to main content

rustalign_io/
mmap.rs

1//! Memory-mapped file support for FM-index loading
2//!
3//! This module provides cross-platform memory-mapped file support
4//! using the memmap2 crate.
5
6use rustalign_common::{AlignError, Result};
7use std::fs::File;
8use std::path::Path;
9
10/// Memory-mapped file wrapper
11///
12/// Provides read-only access to a file via memory mapping.
13/// This is used for efficient loading of FM-index files (.rai).
14#[derive(Debug)]
15pub struct MmapFile {
16    /// The underlying file
17    #[allow(dead_code)]
18    file: File,
19    /// The memory map
20    mmap: memmap2::Mmap,
21}
22
23impl MmapFile {
24    /// Open a file and memory-map it
25    ///
26    /// # Arguments
27    /// * `path` - Path to the file to memory-map
28    ///
29    /// # Returns
30    /// A MmapFile instance providing read-only access
31    pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
32        let path = path.as_ref();
33
34        // Open the file
35        let file = File::open(path).map_err(|e| {
36            AlignError::Other(format!("Failed to open file '{}': {}", path.display(), e))
37        })?;
38
39        // Create the memory map (read-only)
40        let mmap = unsafe { memmap2::Mmap::map(&file) }.map_err(|e| {
41            AlignError::Other(format!(
42                "Failed to memory-map file '{}': {}",
43                path.display(),
44                e
45            ))
46        })?;
47
48        Ok(Self { file, mmap })
49    }
50
51    /// Get the length of the memory-mapped file in bytes
52    pub fn len(&self) -> usize {
53        self.mmap.len()
54    }
55
56    /// Check if the memory-mapped file is empty
57    pub fn is_empty(&self) -> bool {
58        self.mmap.is_empty()
59    }
60
61    /// Get a slice of the memory-mapped file
62    pub fn as_slice(&self) -> &[u8] {
63        &self.mmap
64    }
65
66    /// Get a mutable slice of the memory-mapped file
67    ///
68    /// Note: This requires the Mmap to be created with map_mut
69    pub fn as_mut_slice(&mut self) -> &mut [u8] {
70        // This won't work with read-only mmap - need to use map_mut
71        // For now, we'll just return an empty slice
72        &mut []
73    }
74
75    /// Get a pointer to the memory-mapped data
76    pub fn as_ptr(&self) -> *const u8 {
77        self.mmap.as_ptr()
78    }
79
80    /// Read a value at a specific offset
81    ///
82    /// # Safety
83    /// The offset must be within bounds and properly aligned
84    pub unsafe fn read_at<T>(&self, offset: usize) -> T {
85        unsafe {
86            let ptr = self.as_ptr().add(offset) as *const T;
87            ptr.read_unaligned()
88        }
89    }
90
91    /// Get a subslice starting at the given offset
92    pub fn get_range(&self, offset: usize, len: usize) -> Result<&[u8]> {
93        if offset + len > self.mmap.len() {
94            return Err(AlignError::Other(format!(
95                "Range out of bounds: offset={}, len={}, file_size={}",
96                offset,
97                len,
98                self.mmap.len()
99            )));
100        }
101        Ok(&self.mmap[offset..offset + len])
102    }
103}
104
105impl std::ops::Deref for MmapFile {
106    type Target = [u8];
107
108    fn deref(&self) -> &Self::Target {
109        &self.mmap
110    }
111}
112
113/// Builder for memory-mapped file with options
114pub struct MmapFileBuilder {
115    /// Whether to advise random access pattern
116    random_access: bool,
117    /// Whether to advise sequential access pattern
118    sequential_access: bool,
119}
120
121impl Default for MmapFileBuilder {
122    fn default() -> Self {
123        Self {
124            random_access: false,
125            sequential_access: true,
126        }
127    }
128}
129
130impl MmapFileBuilder {
131    /// Create a new builder
132    pub fn new() -> Self {
133        Self::default()
134    }
135
136    /// Advise random access pattern
137    pub fn random_access(mut self) -> Self {
138        self.random_access = true;
139        self.sequential_access = false;
140        self
141    }
142
143    /// Advise sequential access pattern
144    pub fn sequential_access(mut self) -> Self {
145        self.sequential_access = true;
146        self.random_access = false;
147        self
148    }
149
150    /// Open and memory-map the file
151    pub fn open<P: AsRef<Path>>(self, path: P) -> Result<MmapFile> {
152        let mm = MmapFile::open(path)?;
153
154        // Apply access pattern hints if supported
155        #[cfg(target_os = "linux")]
156        {
157            let advice = if self.random_access {
158                libc::POSIX_MADV_RANDOM
159            } else if self.sequential_access {
160                libc::POSIX_MADV_SEQUENTIAL
161            } else {
162                libc::POSIX_MADV_NORMAL
163            };
164            unsafe {
165                libc::posix_madvise(mm.mmap.as_ptr() as *mut libc::c_void, mm.mmap.len(), advice);
166            }
167        }
168
169        Ok(mm)
170    }
171}
172
173#[cfg(test)]
174mod tests {
175    use super::*;
176    use std::fs;
177    use std::io::Write;
178
179    #[test]
180    fn test_mmap_file_open() {
181        // Create a temporary file
182        let temp_dir = std::env::temp_dir();
183        let test_file = temp_dir.join("test_mmap.txt");
184
185        {
186            let mut f = fs::File::create(&test_file).unwrap();
187            f.write_all(b"Hello, memory-mapped world!").unwrap();
188        }
189
190        // Open and memory-map
191        let mm = MmapFile::open(&test_file).unwrap();
192        // The actual content is what matters
193        assert_eq!(mm.as_slice(), b"Hello, memory-mapped world!");
194
195        // Cleanup
196        fs::remove_file(&test_file).unwrap();
197    }
198
199    #[test]
200    fn test_mmap_file_is_empty() {
201        let temp_dir = std::env::temp_dir();
202        let test_file = temp_dir.join("test_mmap_empty.txt");
203
204        {
205            fs::File::create(&test_file).unwrap();
206        }
207
208        let mm = MmapFile::open(&test_file).unwrap();
209        assert!(mm.is_empty());
210        assert_eq!(mm.len(), 0);
211
212        fs::remove_file(&test_file).unwrap();
213    }
214
215    #[test]
216    fn test_mmap_file_deref() {
217        let temp_dir = std::env::temp_dir();
218        let test_file = temp_dir.join("test_mmap_deref.txt");
219
220        {
221            let mut f = fs::File::create(&test_file).unwrap();
222            f.write_all(b"ABC").unwrap();
223        }
224
225        let mm = MmapFile::open(&test_file).unwrap();
226        assert_eq!(mm[0], b'A');
227        assert_eq!(mm[1], b'B');
228        assert_eq!(mm[2], b'C');
229
230        fs::remove_file(&test_file).unwrap();
231    }
232
233    #[test]
234    fn test_mmap_file_get_range() {
235        let temp_dir = std::env::temp_dir();
236        let test_file = temp_dir.join("test_mmap_range.txt");
237
238        {
239            let mut f = fs::File::create(&test_file).unwrap();
240            f.write_all(b"0123456789").unwrap();
241        }
242
243        let mm = MmapFile::open(&test_file).unwrap();
244        assert_eq!(mm.get_range(0, 5).unwrap(), b"01234");
245        assert_eq!(mm.get_range(5, 5).unwrap(), b"56789");
246
247        // Test out of bounds
248        assert!(mm.get_range(8, 5).is_err());
249
250        fs::remove_file(&test_file).unwrap();
251    }
252
253    #[test]
254    fn test_mmap_file_builder() {
255        let temp_dir = std::env::temp_dir();
256        let test_file = temp_dir.join("test_mmap_builder.txt");
257
258        {
259            let mut f = fs::File::create(&test_file).unwrap();
260            f.write_all(b"Builder test").unwrap();
261        }
262
263        let mm = MmapFileBuilder::new()
264            .sequential_access()
265            .open(&test_file)
266            .unwrap();
267
268        assert_eq!(mm.len(), 12);
269
270        fs::remove_file(&test_file).unwrap();
271    }
272}