Skip to main content

hermes_core/directories/
mmap.rs

1//! Memory-mapped directory for efficient access to large indices
2//!
3//! This module is only compiled with the "native" feature.
4
5use std::io;
6use std::ops::Range;
7use std::path::{Path, PathBuf};
8use std::sync::Arc;
9
10use async_trait::async_trait;
11use memmap2::Mmap;
12
13use super::{Directory, DirectoryWriter, FileSlice, LazyFileHandle, OwnedBytes, RangeReadFn};
14
15/// Memory-mapped directory for efficient access to large index files
16///
17/// Uses memory-mapped files to avoid loading entire files into memory.
18/// The OS manages paging, making this ideal for indices larger than RAM.
19///
20/// Benefits:
21/// - Files are not fully loaded into memory
22/// - OS handles caching and paging automatically
23/// - Multiple processes can share the same mapped pages
24/// - Efficient random access patterns
25///
26/// Note: Write operations still use regular file I/O.
27/// No application-level cache - the OS page cache handles this efficiently.
28pub struct MmapDirectory {
29    root: PathBuf,
30}
31
32impl MmapDirectory {
33    /// Create a new MmapDirectory rooted at the given path
34    pub fn new(root: impl AsRef<Path>) -> Self {
35        Self {
36            root: root.as_ref().to_path_buf(),
37        }
38    }
39
40    fn resolve(&self, path: &Path) -> PathBuf {
41        self.root.join(path)
42    }
43
44    /// Memory-map a file (no application cache - OS page cache handles this)
45    fn mmap_file(&self, path: &Path) -> io::Result<Arc<Mmap>> {
46        let full_path = self.resolve(path);
47        let file = std::fs::File::open(&full_path)?;
48        let mmap = unsafe { Mmap::map(&file)? };
49        Ok(Arc::new(mmap))
50    }
51}
52
53impl Clone for MmapDirectory {
54    fn clone(&self) -> Self {
55        Self {
56            root: self.root.clone(),
57        }
58    }
59}
60
61#[async_trait]
62impl Directory for MmapDirectory {
63    async fn exists(&self, path: &Path) -> io::Result<bool> {
64        let full_path = self.resolve(path);
65        Ok(tokio::fs::try_exists(&full_path).await.unwrap_or(false))
66    }
67
68    async fn file_size(&self, path: &Path) -> io::Result<u64> {
69        let full_path = self.resolve(path);
70        let metadata = tokio::fs::metadata(&full_path).await?;
71        Ok(metadata.len())
72    }
73
74    async fn open_read(&self, path: &Path) -> io::Result<FileSlice> {
75        let mmap = self.mmap_file(path)?;
76        // Copy data - mmap will be dropped after this, OS page cache handles rest
77        let bytes = mmap.to_vec();
78        Ok(FileSlice::new(OwnedBytes::new(bytes)))
79    }
80
81    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes> {
82        let mmap = self.mmap_file(path)?;
83        let start = range.start as usize;
84        let end = range.end as usize;
85
86        if end > mmap.len() {
87            return Err(io::Error::new(
88                io::ErrorKind::InvalidInput,
89                format!("Range {}..{} exceeds file size {}", start, end, mmap.len()),
90            ));
91        }
92
93        Ok(OwnedBytes::new(mmap[start..end].to_vec()))
94    }
95
96    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>> {
97        let full_path = self.resolve(prefix);
98        let mut entries = tokio::fs::read_dir(&full_path).await?;
99        let mut files = Vec::new();
100
101        while let Some(entry) = entries.next_entry().await? {
102            if entry.file_type().await?.is_file() {
103                files.push(entry.path().strip_prefix(&self.root).unwrap().to_path_buf());
104            }
105        }
106
107        Ok(files)
108    }
109
110    async fn open_lazy(&self, path: &Path) -> io::Result<LazyFileHandle> {
111        let mmap = self.mmap_file(path)?;
112        let file_size = mmap.len() as u64;
113
114        let read_fn: RangeReadFn = Arc::new(move |range: Range<u64>| {
115            let mmap = Arc::clone(&mmap);
116            Box::pin(async move {
117                let start = range.start as usize;
118                let end = range.end as usize;
119
120                if end > mmap.len() {
121                    return Err(io::Error::new(
122                        io::ErrorKind::InvalidInput,
123                        format!("Range {}..{} exceeds file size {}", start, end, mmap.len()),
124                    ));
125                }
126
127                // Hint the OS to prefetch these pages before the memcpy
128                #[cfg(unix)]
129                let _ = mmap.advise_range(memmap2::Advice::WillNeed, start, end - start);
130
131                Ok(OwnedBytes::new(mmap[start..end].to_vec()))
132            })
133        });
134
135        Ok(LazyFileHandle::new(file_size, read_fn))
136    }
137}
138
139#[async_trait]
140impl DirectoryWriter for MmapDirectory {
141    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
142        let full_path = self.resolve(path);
143
144        // Ensure parent directory exists
145        if let Some(parent) = full_path.parent() {
146            tokio::fs::create_dir_all(parent).await?;
147        }
148
149        tokio::fs::write(&full_path, data).await
150    }
151
152    async fn delete(&self, path: &Path) -> io::Result<()> {
153        let full_path = self.resolve(path);
154        tokio::fs::remove_file(&full_path).await
155    }
156
157    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
158        let from_path = self.resolve(from);
159        let to_path = self.resolve(to);
160        tokio::fs::rename(&from_path, &to_path).await
161    }
162
163    async fn sync(&self) -> io::Result<()> {
164        // fsync the directory
165        let dir = std::fs::File::open(&self.root)?;
166        dir.sync_all()?;
167        Ok(())
168    }
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174    use tempfile::TempDir;
175
176    #[tokio::test]
177    async fn test_mmap_directory_basic() {
178        let temp_dir = TempDir::new().unwrap();
179        let dir = MmapDirectory::new(temp_dir.path());
180
181        // Write a file
182        let test_data = b"Hello, mmap world!";
183        dir.write(Path::new("test.txt"), test_data).await.unwrap();
184
185        // Check exists
186        assert!(dir.exists(Path::new("test.txt")).await.unwrap());
187        assert!(!dir.exists(Path::new("nonexistent.txt")).await.unwrap());
188
189        // Check file size
190        assert_eq!(
191            dir.file_size(Path::new("test.txt")).await.unwrap(),
192            test_data.len() as u64
193        );
194
195        // Read full file
196        let slice = dir.open_read(Path::new("test.txt")).await.unwrap();
197        let bytes = slice.read_bytes().await.unwrap();
198        assert_eq!(bytes.as_slice(), test_data);
199
200        // Read range
201        let range_bytes = dir.read_range(Path::new("test.txt"), 7..12).await.unwrap();
202        assert_eq!(range_bytes.as_slice(), b"mmap ");
203    }
204
205    #[tokio::test]
206    async fn test_mmap_directory_lazy_handle() {
207        use crate::directories::AsyncFileRead;
208
209        let temp_dir = TempDir::new().unwrap();
210        let dir = MmapDirectory::new(temp_dir.path());
211
212        // Write a larger file
213        let data: Vec<u8> = (0..1000).map(|i| (i % 256) as u8).collect();
214        dir.write(Path::new("large.bin"), &data).await.unwrap();
215
216        // Open lazy handle
217        let handle = dir.open_lazy(Path::new("large.bin")).await.unwrap();
218        assert_eq!(handle.len(), 1000);
219
220        // Read ranges
221        let range1 = handle.read_bytes_range(0..100).await.unwrap();
222        assert_eq!(range1.len(), 100);
223        assert_eq!(range1.as_slice(), &data[0..100]);
224
225        let range2 = handle.read_bytes_range(500..600).await.unwrap();
226        assert_eq!(range2.as_slice(), &data[500..600]);
227    }
228}