Skip to main content

hermes_core/directories/
mmap.rs

1//! Memory-mapped directory for efficient access to large indices
2//!
3//! This module is only compiled with the "native" feature.
4
5use std::io;
6use std::ops::Range;
7use std::path::{Path, PathBuf};
8use std::sync::Arc;
9
10use async_trait::async_trait;
11use memmap2::Mmap;
12
13use super::{Directory, DirectoryWriter, FileSlice, LazyFileHandle, OwnedBytes, RangeReadFn};
14
15/// Memory-mapped directory for efficient access to large index files
16///
17/// Uses memory-mapped files to avoid loading entire files into memory.
18/// The OS manages paging, making this ideal for indices larger than RAM.
19///
20/// Benefits:
21/// - Files are not fully loaded into memory
22/// - OS handles caching and paging automatically
23/// - Multiple processes can share the same mapped pages
24/// - Efficient random access patterns
25///
26/// Note: Write operations still use regular file I/O.
27/// No application-level cache - the OS page cache handles this efficiently.
28pub struct MmapDirectory {
29    root: PathBuf,
30}
31
32impl MmapDirectory {
33    /// Create a new MmapDirectory rooted at the given path
34    pub fn new(root: impl AsRef<Path>) -> Self {
35        Self {
36            root: root.as_ref().to_path_buf(),
37        }
38    }
39
40    fn resolve(&self, path: &Path) -> PathBuf {
41        self.root.join(path)
42    }
43
44    /// Memory-map a file (no application cache - OS page cache handles this)
45    fn mmap_file(&self, path: &Path) -> io::Result<Arc<Mmap>> {
46        let full_path = self.resolve(path);
47        let file = std::fs::File::open(&full_path)?;
48        let mmap = unsafe { Mmap::map(&file)? };
49        Ok(Arc::new(mmap))
50    }
51}
52
53impl Clone for MmapDirectory {
54    fn clone(&self) -> Self {
55        Self {
56            root: self.root.clone(),
57        }
58    }
59}
60
61#[async_trait]
62impl Directory for MmapDirectory {
63    async fn exists(&self, path: &Path) -> io::Result<bool> {
64        let full_path = self.resolve(path);
65        Ok(tokio::fs::try_exists(&full_path).await.unwrap_or(false))
66    }
67
68    async fn file_size(&self, path: &Path) -> io::Result<u64> {
69        let full_path = self.resolve(path);
70        let metadata = tokio::fs::metadata(&full_path).await?;
71        Ok(metadata.len())
72    }
73
74    async fn open_read(&self, path: &Path) -> io::Result<FileSlice> {
75        let mmap = self.mmap_file(path)?;
76        // Copy data - mmap will be dropped after this, OS page cache handles rest
77        let bytes = mmap.to_vec();
78        Ok(FileSlice::new(OwnedBytes::new(bytes)))
79    }
80
81    async fn read_range(&self, path: &Path, range: Range<u64>) -> io::Result<OwnedBytes> {
82        let mmap = self.mmap_file(path)?;
83        let start = range.start as usize;
84        let end = range.end as usize;
85
86        if end > mmap.len() {
87            return Err(io::Error::new(
88                io::ErrorKind::InvalidInput,
89                format!("Range {}..{} exceeds file size {}", start, end, mmap.len()),
90            ));
91        }
92
93        Ok(OwnedBytes::new(mmap[start..end].to_vec()))
94    }
95
96    async fn list_files(&self, prefix: &Path) -> io::Result<Vec<PathBuf>> {
97        let full_path = self.resolve(prefix);
98        let mut entries = tokio::fs::read_dir(&full_path).await?;
99        let mut files = Vec::new();
100
101        while let Some(entry) = entries.next_entry().await? {
102            if entry.file_type().await?.is_file() {
103                files.push(entry.path().strip_prefix(&self.root).unwrap().to_path_buf());
104            }
105        }
106
107        Ok(files)
108    }
109
110    async fn open_lazy(&self, path: &Path) -> io::Result<LazyFileHandle> {
111        let mmap = self.mmap_file(path)?;
112        let file_size = mmap.len() as u64;
113
114        let read_fn: RangeReadFn = Arc::new(move |range: Range<u64>| {
115            let mmap = Arc::clone(&mmap);
116            Box::pin(async move {
117                let start = range.start as usize;
118                let end = range.end as usize;
119
120                if end > mmap.len() {
121                    return Err(io::Error::new(
122                        io::ErrorKind::InvalidInput,
123                        format!("Range {}..{} exceeds file size {}", start, end, mmap.len()),
124                    ));
125                }
126
127                Ok(OwnedBytes::new(mmap[start..end].to_vec()))
128            })
129        });
130
131        Ok(LazyFileHandle::new(file_size, read_fn))
132    }
133}
134
135#[async_trait]
136impl DirectoryWriter for MmapDirectory {
137    async fn write(&self, path: &Path, data: &[u8]) -> io::Result<()> {
138        let full_path = self.resolve(path);
139
140        // Ensure parent directory exists
141        if let Some(parent) = full_path.parent() {
142            tokio::fs::create_dir_all(parent).await?;
143        }
144
145        tokio::fs::write(&full_path, data).await
146    }
147
148    async fn delete(&self, path: &Path) -> io::Result<()> {
149        let full_path = self.resolve(path);
150        tokio::fs::remove_file(&full_path).await
151    }
152
153    async fn rename(&self, from: &Path, to: &Path) -> io::Result<()> {
154        let from_path = self.resolve(from);
155        let to_path = self.resolve(to);
156        tokio::fs::rename(&from_path, &to_path).await
157    }
158
159    async fn sync(&self) -> io::Result<()> {
160        // fsync the directory
161        let dir = std::fs::File::open(&self.root)?;
162        dir.sync_all()?;
163        Ok(())
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    use tempfile::TempDir;
171
172    #[tokio::test]
173    async fn test_mmap_directory_basic() {
174        let temp_dir = TempDir::new().unwrap();
175        let dir = MmapDirectory::new(temp_dir.path());
176
177        // Write a file
178        let test_data = b"Hello, mmap world!";
179        dir.write(Path::new("test.txt"), test_data).await.unwrap();
180
181        // Check exists
182        assert!(dir.exists(Path::new("test.txt")).await.unwrap());
183        assert!(!dir.exists(Path::new("nonexistent.txt")).await.unwrap());
184
185        // Check file size
186        assert_eq!(
187            dir.file_size(Path::new("test.txt")).await.unwrap(),
188            test_data.len() as u64
189        );
190
191        // Read full file
192        let slice = dir.open_read(Path::new("test.txt")).await.unwrap();
193        let bytes = slice.read_bytes().await.unwrap();
194        assert_eq!(bytes.as_slice(), test_data);
195
196        // Read range
197        let range_bytes = dir.read_range(Path::new("test.txt"), 7..12).await.unwrap();
198        assert_eq!(range_bytes.as_slice(), b"mmap ");
199    }
200
201    #[tokio::test]
202    async fn test_mmap_directory_lazy_handle() {
203        use crate::directories::AsyncFileRead;
204
205        let temp_dir = TempDir::new().unwrap();
206        let dir = MmapDirectory::new(temp_dir.path());
207
208        // Write a larger file
209        let data: Vec<u8> = (0..1000).map(|i| (i % 256) as u8).collect();
210        dir.write(Path::new("large.bin"), &data).await.unwrap();
211
212        // Open lazy handle
213        let handle = dir.open_lazy(Path::new("large.bin")).await.unwrap();
214        assert_eq!(handle.len(), 1000);
215
216        // Read ranges
217        let range1 = handle.read_bytes_range(0..100).await.unwrap();
218        assert_eq!(range1.len(), 100);
219        assert_eq!(range1.as_slice(), &data[0..100]);
220
221        let range2 = handle.read_bytes_range(500..600).await.unwrap();
222        assert_eq!(range2.as_slice(), &data[500..600]);
223    }
224}