scirs2_sparse/adaptive_memory_compression/
memory_mapping.rs

1//! Memory-Mapped File Operations
2//!
3//! This module provides safe wrappers around memory-mapped file operations
4//! for efficient large-scale data access.
5
6use crate::error::{SparseError, SparseResult};
7use std::fs::{File, OpenOptions};
8use std::io::{Read, Seek, SeekFrom, Write};
9use std::marker::PhantomData;
10use std::path::PathBuf;
11
12#[cfg(unix)]
13use std::os::unix::fs::FileExt;
14
15#[cfg(windows)]
16use std::os::windows::fs::FileExt;
17
18/// Memory-mapped file wrapper providing cross-platform file access
19#[derive(Debug)]
20pub struct MemoryMappedFile {
21    filepath: PathBuf,
22    file: File,
23    size: usize,
24    mapped: bool,
25    access_count: u64,
26    _phantom: PhantomData<()>,
27}
28
29/// Memory mapping configuration
30#[derive(Debug, Clone)]
31pub struct MemoryMappingConfig {
32    /// Enable read-only mapping
33    pub read_only: bool,
34    /// Enable write-through mapping
35    pub write_through: bool,
36    /// Prefetch pages on mapping
37    pub prefetch: bool,
38    /// Page size hint for mapping
39    pub page_size_hint: usize,
40}
41
42/// Statistics for memory mapping operations
43#[derive(Debug, Clone)]
44pub struct MemoryMappingStats {
45    pub total_files: usize,
46    pub total_mapped_size: usize,
47    pub read_operations: u64,
48    pub write_operations: u64,
49    pub cache_hits: u64,
50    pub cache_misses: u64,
51}
52
53impl Default for MemoryMappingConfig {
54    fn default() -> Self {
55        Self {
56            read_only: false,
57            write_through: true,
58            prefetch: false,
59            page_size_hint: 4096, // 4KB default page size
60        }
61    }
62}
63
64impl MemoryMappedFile {
65    /// Create a new memory-mapped file
66    pub fn new(filepath: PathBuf, size: usize) -> SparseResult<Self> {
67        let file = OpenOptions::new()
68            .create(true)
69            .truncate(true)
70            .read(true)
71            .write(true)
72            .open(&filepath)
73            .map_err(|e| SparseError::Io(format!("Failed to create file {filepath:?}: {e}")))?;
74
75        // Set file size if creating new file
76        file.set_len(size as u64)
77            .map_err(|e| SparseError::Io(format!("Failed to set file size: {e}")))?;
78
79        Ok(Self {
80            filepath,
81            file,
82            size,
83            mapped: true, // We'll treat buffered I/O as "mapped" for this implementation
84            access_count: 0,
85            _phantom: PhantomData,
86        })
87    }
88
89    /// Create memory-mapped file with configuration
90    pub fn new_with_config(
91        filepath: PathBuf,
92        size: usize,
93        config: MemoryMappingConfig,
94    ) -> SparseResult<Self> {
95        let mut options = OpenOptions::new();
96        options.create(true).read(true);
97
98        if !config.read_only {
99            options.write(true);
100        }
101
102        let file = options
103            .open(&filepath)
104            .map_err(|e| SparseError::Io(format!("Failed to create file {filepath:?}: {e}")))?;
105
106        // Set file size if creating new file
107        if !config.read_only {
108            file.set_len(size as u64)
109                .map_err(|e| SparseError::Io(format!("Failed to set file size: {e}")))?;
110        }
111
112        Ok(Self {
113            filepath,
114            file,
115            size,
116            mapped: true,
117            access_count: 0,
118            _phantom: PhantomData,
119        })
120    }
121
122    /// Open existing memory-mapped file
123    pub fn open(filepath: PathBuf) -> SparseResult<Self> {
124        let file = OpenOptions::new()
125            .read(true)
126            .write(true)
127            .open(&filepath)
128            .map_err(|e| SparseError::Io(format!("Failed to open file {filepath:?}: {e}")))?;
129
130        let metadata = file
131            .metadata()
132            .map_err(|e| SparseError::Io(format!("Failed to get file metadata: {e}")))?;
133
134        Ok(Self {
135            filepath,
136            file,
137            size: metadata.len() as usize,
138            mapped: true,
139            access_count: 0,
140            _phantom: PhantomData,
141        })
142    }
143
144    /// Read data from the mapped file at offset
145    pub fn read_at(&mut self, offset: usize, buffer: &mut [u8]) -> SparseResult<usize> {
146        self.access_count += 1;
147
148        if offset >= self.size {
149            return Ok(0);
150        }
151
152        let read_size = buffer.len().min(self.size - offset);
153        let buffer = &mut buffer[..read_size];
154
155        #[cfg(unix)]
156        {
157            self.file
158                .read_at(buffer, offset as u64)
159                .map_err(|e| SparseError::Io(format!("Failed to read at offset {offset}: {e}")))
160        }
161
162        #[cfg(windows)]
163        {
164            self.file
165                .seek_read(buffer, offset as u64)
166                .map_err(|e| SparseError::Io(format!("Failed to read at offset {offset}: {e}")))
167        }
168
169        #[cfg(not(any(unix, windows)))]
170        {
171            // Fallback for other platforms - use regular seeking
172            let mut file_clone = self
173                .file
174                .try_clone()
175                .map_err(|e| SparseError::Io(format!("Failed to clone file handle: {e}")))?;
176            file_clone
177                .seek(SeekFrom::Start(offset as u64))
178                .map_err(|e| SparseError::Io(format!("Failed to seek to offset {offset}: {e}")))?;
179            file_clone
180                .read(buffer)
181                .map_err(|e| SparseError::Io(format!("Failed to read data: {e}")))
182        }
183    }
184
185    /// Write data to the mapped file at offset
186    pub fn write_at(&mut self, offset: usize, data: &[u8]) -> SparseResult<usize> {
187        self.access_count += 1;
188
189        if offset >= self.size {
190            return Err(SparseError::Io(format!(
191                "Write offset {offset} exceeds file size {}",
192                self.size
193            )));
194        }
195
196        let write_size = data.len().min(self.size - offset);
197        let data = &data[..write_size];
198
199        #[cfg(unix)]
200        {
201            self.file
202                .write_at(data, offset as u64)
203                .map_err(|e| SparseError::Io(format!("Failed to write at offset {offset}: {e}")))
204        }
205
206        #[cfg(windows)]
207        {
208            self.file
209                .seek_write(data, offset as u64)
210                .map_err(|e| SparseError::Io(format!("Failed to write at offset {offset}: {e}")))
211        }
212
213        #[cfg(not(any(unix, windows)))]
214        {
215            // Fallback for other platforms - use regular seeking
216            let mut file_clone = self
217                .file
218                .try_clone()
219                .map_err(|e| SparseError::Io(format!("Failed to clone file handle: {e}")))?;
220            file_clone
221                .seek(SeekFrom::Start(offset as u64))
222                .map_err(|e| SparseError::Io(format!("Failed to seek to offset {offset}: {e}")))?;
223            file_clone
224                .write(data)
225                .map_err(|e| SparseError::Io(format!("Failed to write data: {e}")))
226        }
227    }
228
229    /// Read entire file contents
230    pub fn read_all(&mut self) -> SparseResult<Vec<u8>> {
231        let mut buffer = vec![0u8; self.size];
232        let bytes_read = self.read_at(0, &mut buffer)?;
233        buffer.truncate(bytes_read);
234        Ok(buffer)
235    }
236
237    /// Write entire file contents
238    pub fn write_all(&mut self, data: &[u8]) -> SparseResult<()> {
239        if data.len() > self.size {
240            // Resize file if necessary
241            self.resize(data.len())?;
242        }
243
244        self.write_at(0, data)?;
245        Ok(())
246    }
247
248    /// Resize the mapped file
249    pub fn resize(&mut self, new_size: usize) -> SparseResult<()> {
250        self.file
251            .set_len(new_size as u64)
252            .map_err(|e| SparseError::Io(format!("Failed to resize file: {e}")))?;
253        self.size = new_size;
254        Ok(())
255    }
256
257    /// Flush data to disk
258    pub fn flush(&self) -> SparseResult<()> {
259        self.file
260            .sync_all()
261            .map_err(|e| SparseError::Io(format!("Failed to flush file: {e}")))
262    }
263
264    /// Flush data to disk (metadata only)
265    pub fn flush_data(&self) -> SparseResult<()> {
266        self.file
267            .sync_data()
268            .map_err(|e| SparseError::Io(format!("Failed to flush data: {e}")))
269    }
270
271    /// Get file size
272    pub fn size(&self) -> usize {
273        self.size
274    }
275
276    /// Get file path
277    pub fn path(&self) -> &PathBuf {
278        &self.filepath
279    }
280
281    /// Check if file is mapped
282    pub fn is_mapped(&self) -> bool {
283        self.mapped
284    }
285
286    /// Get access count
287    pub fn access_count(&self) -> u64 {
288        self.access_count
289    }
290
291    /// Reset access count
292    pub fn reset_access_count(&mut self) {
293        self.access_count = 0;
294    }
295
296    /// Get file metadata
297    pub fn metadata(&self) -> SparseResult<std::fs::Metadata> {
298        self.file
299            .metadata()
300            .map_err(|e| SparseError::Io(format!("Failed to get metadata: {e}")))
301    }
302
303    /// Check if file exists
304    pub fn exists(&self) -> bool {
305        self.filepath.exists()
306    }
307
308    /// Read data in chunks for better memory efficiency
309    pub fn read_chunked<F>(&mut self, chunk_size: usize, mut callback: F) -> SparseResult<()>
310    where
311        F: FnMut(&[u8], usize) -> SparseResult<()>,
312    {
313        let mut offset = 0;
314        let mut buffer = vec![0u8; chunk_size];
315
316        while offset < self.size {
317            let bytes_read = self.read_at(offset, &mut buffer)?;
318            if bytes_read == 0 {
319                break;
320            }
321
322            callback(&buffer[..bytes_read], offset)?;
323            offset += bytes_read;
324        }
325
326        Ok(())
327    }
328
329    /// Write data in chunks for better memory efficiency
330    pub fn write_chunked<F>(&mut self, chunk_size: usize, mut data_provider: F) -> SparseResult<()>
331    where
332        F: FnMut(usize) -> SparseResult<Option<Vec<u8>>>,
333    {
334        let mut offset = 0;
335
336        loop {
337            match data_provider(offset)? {
338                Some(chunk) => {
339                    if chunk.is_empty() {
340                        break;
341                    }
342                    let bytes_written = self.write_at(offset, &chunk)?;
343                    offset += bytes_written;
344                }
345                None => break,
346            }
347        }
348
349        Ok(())
350    }
351
352    /// Prefetch data into memory (hint to OS)
353    pub fn prefetch(&self, offset: usize, length: usize) -> SparseResult<()> {
354        // For now, this is a no-op since we're using buffered I/O
355        // In a real implementation, this would use platform-specific prefetch hints
356        let _end_offset = offset + length;
357        Ok(())
358    }
359
360    /// Advise access pattern to OS
361    pub fn advise_access_pattern(&self, pattern: AccessPattern) -> SparseResult<()> {
362        // Platform-specific advice would go here
363        // For now, this is a no-op
364        let _pattern = pattern;
365        Ok(())
366    }
367}
368
369/// Access pattern hints for memory mapping
370#[derive(Debug, Clone, Copy)]
371pub enum AccessPattern {
372    /// Sequential access pattern
373    Sequential,
374    /// Random access pattern
375    Random,
376    /// Will need data soon
377    WillNeed,
378    /// Won't need data anymore
379    DontNeed,
380}
381
382/// Memory mapping manager for multiple files
383#[derive(Debug)]
384pub struct MemoryMappingManager {
385    mapped_files: std::collections::HashMap<PathBuf, MemoryMappedFile>,
386    config: MemoryMappingConfig,
387    stats: MemoryMappingStats,
388}
389
390impl MemoryMappingManager {
391    /// Create new memory mapping manager
392    pub fn new(config: MemoryMappingConfig) -> Self {
393        Self {
394            mapped_files: std::collections::HashMap::new(),
395            config,
396            stats: MemoryMappingStats {
397                total_files: 0,
398                total_mapped_size: 0,
399                read_operations: 0,
400                write_operations: 0,
401                cache_hits: 0,
402                cache_misses: 0,
403            },
404        }
405    }
406
407    /// Map a file
408    pub fn map_file(&mut self, filepath: PathBuf, size: usize) -> SparseResult<()> {
409        if self.mapped_files.contains_key(&filepath) {
410            return Ok(()); // Already mapped
411        }
412
413        let mapped_file =
414            MemoryMappedFile::new_with_config(filepath.clone(), size, self.config.clone())?;
415
416        self.stats.total_files += 1;
417        self.stats.total_mapped_size += size;
418
419        self.mapped_files.insert(filepath, mapped_file);
420        Ok(())
421    }
422
423    /// Unmap a file
424    pub fn unmap_file(&mut self, filepath: &PathBuf) -> SparseResult<()> {
425        if let Some(mapped_file) = self.mapped_files.remove(filepath) {
426            self.stats.total_files -= 1;
427            self.stats.total_mapped_size -= mapped_file.size();
428            mapped_file.flush()?;
429        }
430        Ok(())
431    }
432
433    /// Get mapped file reference
434    pub fn get_file(&mut self, filepath: &PathBuf) -> Option<&mut MemoryMappedFile> {
435        self.mapped_files.get_mut(filepath)
436    }
437
438    /// Get statistics
439    pub fn get_stats(&self) -> &MemoryMappingStats {
440        &self.stats
441    }
442
443    /// Flush all mapped files
444    pub fn flush_all(&self) -> SparseResult<()> {
445        for mapped_file in self.mapped_files.values() {
446            mapped_file.flush()?;
447        }
448        Ok(())
449    }
450
451    /// Close all mapped files
452    pub fn close_all(&mut self) -> SparseResult<()> {
453        self.flush_all()?;
454        self.mapped_files.clear();
455        self.stats.total_files = 0;
456        self.stats.total_mapped_size = 0;
457        Ok(())
458    }
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464    use tempfile::TempDir;
465
466    #[test]
467    fn test_memory_mapped_file_creation() {
468        let temp_dir = TempDir::new().unwrap();
469        let filepath = temp_dir.path().join("test.dat");
470
471        let mapped_file = MemoryMappedFile::new(filepath, 1024);
472        assert!(mapped_file.is_ok());
473
474        let mapped_file = mapped_file.unwrap();
475        assert_eq!(mapped_file.size(), 1024);
476        assert!(mapped_file.is_mapped());
477    }
478
479    #[test]
480    fn test_read_write_operations() {
481        let temp_dir = TempDir::new().unwrap();
482        let filepath = temp_dir.path().join("test.dat");
483
484        let mut mapped_file = MemoryMappedFile::new(filepath, 1024).unwrap();
485
486        // Write some data
487        let write_data = b"Hello, World!";
488        let bytes_written = mapped_file.write_at(0, write_data).unwrap();
489        assert_eq!(bytes_written, write_data.len());
490
491        // Read it back
492        let mut read_buffer = vec![0u8; write_data.len()];
493        let bytes_read = mapped_file.read_at(0, &mut read_buffer).unwrap();
494        assert_eq!(bytes_read, write_data.len());
495        assert_eq!(&read_buffer, write_data);
496    }
497
498    #[test]
499    fn test_file_resize() {
500        let temp_dir = TempDir::new().unwrap();
501        let filepath = temp_dir.path().join("test.dat");
502
503        let mut mapped_file = MemoryMappedFile::new(filepath, 1024).unwrap();
504        assert_eq!(mapped_file.size(), 1024);
505
506        // Resize to larger
507        mapped_file.resize(2048).unwrap();
508        assert_eq!(mapped_file.size(), 2048);
509
510        // Resize to smaller
511        mapped_file.resize(512).unwrap();
512        assert_eq!(mapped_file.size(), 512);
513    }
514
515    #[test]
516    fn test_chunked_operations() {
517        let temp_dir = TempDir::new().unwrap();
518        let filepath = temp_dir.path().join("test.dat");
519
520        let mut mapped_file = MemoryMappedFile::new(filepath, 1024).unwrap();
521
522        // Write chunked data
523        let test_data = b"This is a test string for chunked operations";
524        let chunk_size = 10;
525        let mut offset = 0;
526
527        mapped_file
528            .write_chunked(chunk_size, |current_offset| {
529                if current_offset != offset {
530                    return Ok(None);
531                }
532
533                let start = current_offset;
534                let end = (start + chunk_size).min(test_data.len());
535
536                if start >= test_data.len() {
537                    Ok(None)
538                } else {
539                    offset = end;
540                    Ok(Some(test_data[start..end].to_vec()))
541                }
542            })
543            .unwrap();
544
545        // Read back and verify
546        let mut read_data = Vec::new();
547        mapped_file
548            .read_chunked(chunk_size, |chunk, _offset| {
549                read_data.extend_from_slice(chunk);
550                Ok(())
551            })
552            .unwrap();
553
554        assert_eq!(&read_data[..test_data.len()], test_data);
555    }
556
557    #[test]
558    fn test_memory_mapping_manager() {
559        let temp_dir = TempDir::new().unwrap();
560        let config = MemoryMappingConfig::default();
561        let mut manager = MemoryMappingManager::new(config);
562
563        // Map a file
564        let filepath = temp_dir.path().join("test.dat");
565        manager.map_file(filepath.clone(), 1024).unwrap();
566
567        let stats = manager.get_stats();
568        assert_eq!(stats.total_files, 1);
569        assert_eq!(stats.total_mapped_size, 1024);
570
571        // Unmap the file
572        manager.unmap_file(&filepath).unwrap();
573
574        let stats = manager.get_stats();
575        assert_eq!(stats.total_files, 0);
576        assert_eq!(stats.total_mapped_size, 0);
577    }
578
579    #[test]
580    fn test_access_count_tracking() {
581        let temp_dir = TempDir::new().unwrap();
582        let filepath = temp_dir.path().join("test.dat");
583
584        let mut mapped_file = MemoryMappedFile::new(filepath, 1024).unwrap();
585        assert_eq!(mapped_file.access_count(), 0);
586
587        // Perform some operations
588        let mut buffer = vec![0u8; 10];
589        mapped_file.read_at(0, &mut buffer).unwrap();
590        assert_eq!(mapped_file.access_count(), 1);
591
592        mapped_file.write_at(0, b"test").unwrap();
593        assert_eq!(mapped_file.access_count(), 2);
594
595        // Reset counter
596        mapped_file.reset_access_count();
597        assert_eq!(mapped_file.access_count(), 0);
598    }
599}