dynamo_memory/disk.rs
1// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Disk-backed memory storage using memory-mapped files.
5
6use super::{MemoryDescription, Result, StorageError, StorageKind, nixl::NixlDescriptor};
7use std::any::Any;
8use std::path::{Path, PathBuf};
9
10use core::ffi::c_char;
11use nix::fcntl::{FallocateFlags, fallocate};
12use nix::unistd::unlink;
13use std::ffi::CString;
14use std::os::fd::BorrowedFd;
15
16const DISK_CACHE_KEY: &str = "DYN_KVBM_DISK_CACHE_DIR";
17const DEFAULT_DISK_CACHE_DIR: &str = "/tmp/";
18
19#[derive(Debug)]
20pub struct DiskStorage {
21 fd: u64,
22 path: PathBuf,
23 size: usize,
24 unlinked: bool,
25}
26
27impl DiskStorage {
28 pub fn new(size: usize) -> Result<Self> {
29 // We need to open our file with some special flags that aren't supported by the tempfile crate.
30 // Instead, we'll use the mkostemp function to create a temporary file with the correct flags.
31
32 let specified_dir =
33 std::env::var(DISK_CACHE_KEY).unwrap_or_else(|_| DEFAULT_DISK_CACHE_DIR.to_string());
34 let file_path = Path::new(&specified_dir).join("dynamo-kvbm-disk-cache-XXXXXX");
35
36 Self::new_at(file_path, size)
37 }
38
39 pub fn new_at(path: impl AsRef<Path>, len: usize) -> Result<Self> {
40 if len == 0 {
41 return Err(StorageError::AllocationFailed(
42 "zero-sized allocations are not supported".into(),
43 ));
44 }
45
46 let file_path = path.as_ref().to_path_buf();
47
48 if !file_path.exists() {
49 let parent = file_path.parent().ok_or_else(|| {
50 StorageError::AllocationFailed(format!(
51 "disk cache path {} has no parent directory",
52 file_path.display()
53 ))
54 })?;
55 std::fs::create_dir_all(parent).map_err(|e| {
56 StorageError::AllocationFailed(format!(
57 "failed to create disk cache directory {}: {e}",
58 parent.display()
59 ))
60 })?;
61 }
62
63 tracing::debug!("Allocating disk cache file at {}", file_path.display());
64
65 let path_str = file_path.to_str().ok_or_else(|| {
66 StorageError::AllocationFailed(format!(
67 "disk cache path {} is not valid UTF-8",
68 file_path.display()
69 ))
70 })?;
71 let is_template = path_str.contains("XXXXXX");
72
73 let (raw_fd, actual_path) = if is_template {
74 // Template path - use mkostemp to generate unique filename
75 let template = CString::new(path_str).unwrap();
76 let mut template_bytes = template.into_bytes_with_nul();
77
78 let fd = unsafe {
79 nix::libc::mkostemp(
80 template_bytes.as_mut_ptr() as *mut c_char,
81 nix::libc::O_RDWR | nix::libc::O_DIRECT,
82 )
83 };
84
85 if fd == -1 {
86 return Err(StorageError::AllocationFailed(format!(
87 "mkostemp failed: {}",
88 std::io::Error::last_os_error()
89 )));
90 }
91
92 // Extract the actual path created by mkostemp
93 let actual = PathBuf::from(
94 CString::from_vec_with_nul(template_bytes)
95 .unwrap()
96 .to_str()
97 .unwrap(),
98 );
99
100 (fd, actual)
101 } else {
102 // Specific path - use open with O_CREAT
103 let path_cstr = CString::new(path_str).unwrap();
104 let fd = unsafe {
105 nix::libc::open(
106 path_cstr.as_ptr(),
107 nix::libc::O_CREAT | nix::libc::O_RDWR | nix::libc::O_DIRECT,
108 0o644,
109 )
110 };
111
112 if fd == -1 {
113 return Err(StorageError::AllocationFailed(format!(
114 "open failed: {}",
115 std::io::Error::last_os_error()
116 )));
117 }
118
119 (fd, file_path)
120 };
121
122 // We need to use fallocate to actually allocate the storage and create the blocks on disk.
123 unsafe {
124 fallocate(
125 BorrowedFd::borrow_raw(raw_fd),
126 FallocateFlags::empty(),
127 0,
128 len as i64,
129 )
130 .map_err(|e| {
131 StorageError::AllocationFailed(format!("Failed to allocate temp file: {}", e))
132 })?
133 };
134
135 Ok(Self {
136 fd: raw_fd as u64,
137 path: actual_path,
138 size: len,
139 unlinked: false,
140 })
141 }
142
143 pub fn fd(&self) -> u64 {
144 self.fd
145 }
146
147 pub fn path(&self) -> &Path {
148 self.path.as_path()
149 }
150
151 /// Unlink our temp file.
152 /// This means that when this process terminates, the file will be automatically deleted by the OS.
153 /// Unfortunately, GDS requires that files we try to register must be linked.
154 /// To get around this, we unlink the file only after we've registered it with NIXL.
155 pub fn unlink(&mut self) -> Result<()> {
156 if self.unlinked {
157 return Ok(());
158 }
159
160 unlink(self.path.as_path())
161 .map_err(|e| StorageError::AllocationFailed(format!("Failed to unlink file: {}", e)))?;
162 self.unlinked = true;
163 Ok(())
164 }
165
166 pub fn unlinked(&self) -> bool {
167 self.unlinked
168 }
169}
170
171impl Drop for DiskStorage {
172 fn drop(&mut self) {
173 let _ = self.unlink();
174 if let Err(e) = nix::unistd::close(self.fd as std::os::fd::RawFd) {
175 tracing::debug!("failed to close disk cache fd {}: {e}", self.fd);
176 }
177 }
178}
179
180impl MemoryDescription for DiskStorage {
181 fn addr(&self) -> usize {
182 0
183 }
184
185 fn size(&self) -> usize {
186 self.size
187 }
188
189 fn storage_kind(&self) -> StorageKind {
190 StorageKind::Disk(self.fd)
191 }
192
193 fn as_any(&self) -> &dyn Any {
194 self
195 }
196 fn nixl_descriptor(&self) -> Option<NixlDescriptor> {
197 None
198 }
199}
200
201// Support for NIXL registration
202impl super::nixl::NixlCompatible for DiskStorage {
203 fn nixl_params(&self) -> (*const u8, usize, nixl_sys::MemType, u64) {
204 #[cfg(unix)]
205 {
206 // Use file descriptor as device_id for MemType::File
207 (
208 std::ptr::null(),
209 self.size,
210 nixl_sys::MemType::File,
211 self.fd,
212 )
213 }
214
215 #[cfg(not(unix))]
216 {
217 // On non-Unix systems, we can't get the file descriptor easily
218 // Return device_id as 0 - registration will fail on these systems
219 (
220 self.mmap.as_ptr(),
221 self.mmap.len(),
222 nixl_sys::MemType::File,
223 0,
224 )
225 }
226 }
227}
228
229// mod mmap {
230// use super::*;
231
232// #[cfg(unix)]
233// use std::os::unix::io::AsRawFd;
234
235// use memmap2::{MmapMut, MmapOptions};
236// use std::fs::{File, OpenOptions};
237// use tempfile::NamedTempFile;
238
239// /// Disk-backed storage using memory-mapped files.
240// #[derive(Debug)]
241// pub struct MemMappedFileStorage {
242// _file: File, // Keep file alive for the lifetime of the mmap
243// mmap: MmapMut,
244// path: PathBuf,
245// #[cfg(unix)]
246// fd: i32,
247// }
248
249// unsafe impl Send for MemMappedFileStorage {}
250// unsafe impl Sync for MemMappedFileStorage {}
251
252// impl MemMappedFileStorage {
253// /// Create new disk storage with a temporary file.
254// pub fn new_temp(len: usize) -> Result<Self> {
255// if len == 0 {
256// return Err(StorageError::AllocationFailed(
257// "zero-sized allocations are not supported".into(),
258// ));
259// }
260
261// // Create temporary file
262// let temp_file = NamedTempFile::new()?;
263// let path = temp_file.path().to_path_buf();
264// let file = temp_file.into_file();
265
266// // Set file size
267// file.set_len(len as u64)?;
268
269// #[cfg(unix)]
270// let fd = file.as_raw_fd();
271
272// // Memory map the file
273// let mmap = unsafe { MmapOptions::new().len(len).map_mut(&file)? };
274
275// Ok(Self {
276// _file: file,
277// mmap,
278// path,
279// #[cfg(unix)]
280// fd,
281// })
282// }
283
284// /// Create new disk storage with a specific file path.
285// pub fn new_at(path: impl AsRef<Path>, len: usize) -> Result<Self> {
286// if len == 0 {
287// return Err(StorageError::AllocationFailed(
288// "zero-sized allocations are not supported".into(),
289// ));
290// }
291
292// let path = path.as_ref().to_path_buf();
293
294// // Create or open file
295// let file = OpenOptions::new()
296// .read(true)
297// .write(true)
298// .create(true)
299// .open(&path)?;
300
301// // Set file size
302// file.set_len(len as u64)?;
303
304// #[cfg(unix)]
305// let fd = file.as_raw_fd();
306
307// // Memory map the file
308// let mmap = unsafe { MmapOptions::new().len(len).map_mut(&file)? };
309
310// Ok(Self {
311// _file: file,
312// mmap,
313// path,
314// #[cfg(unix)]
315// fd,
316// })
317// }
318
319// /// Get the path to the backing file.
320// pub fn path(&self) -> &Path {
321// &self.path
322// }
323
324// /// Get the file descriptor (Unix only).
325// #[cfg(unix)]
326// pub fn fd(&self) -> i32 {
327// self.fd
328// }
329
330// /// Get a pointer to the memory-mapped region.
331// ///
332// /// # Safety
333// /// The caller must ensure the pointer is not used after this storage is dropped.
334// pub unsafe fn as_ptr(&self) -> *const u8 {
335// self.mmap.as_ptr()
336// }
337
338// /// Get a mutable pointer to the memory-mapped region.
339// ///
340// /// # Safety
341// /// The caller must ensure the pointer is not used after this storage is dropped
342// /// and that there are no other references to this memory.
343// pub unsafe fn as_mut_ptr(&mut self) -> *mut u8 {
344// self.mmap.as_mut_ptr()
345// }
346// }
347
348// impl MemoryDescription for MemMappedFileStorage {
349// fn addr(&self) -> usize {
350// self.mmap.as_ptr() as usize
351// }
352
353// fn size(&self) -> usize {
354// self.mmap.len()
355// }
356
357// fn storage_kind(&self) -> StorageKind {
358// StorageKind::Disk
359// }
360
361// fn as_any(&self) -> &dyn Any {
362// self
363// }
364// }
365
366// // Support for NIXL registration
367// impl super::super::registered::NixlCompatible for MemMappedFileStorage {
368// fn nixl_params(&self) -> (*const u8, usize, nixl_sys::MemType, u64) {
369// #[cfg(unix)]
370// {
371// // Use file descriptor as device_id for MemType::File
372// (
373// self.mmap.as_ptr(),
374// self.mmap.len(),
375// nixl_sys::MemType::File,
376// self.fd as u64,
377// )
378// }
379
380// #[cfg(not(unix))]
381// {
382// // On non-Unix systems, we can't get the file descriptor easily
383// // Return device_id as 0 - registration will fail on these systems
384// (
385// self.mmap.as_ptr(),
386// self.mmap.len(),
387// nixl_sys::MemType::File,
388// 0,
389// )
390// }
391// }
392// }
393// }