1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
//! Memory-mapped model data abstraction (PMAT-COMPLY)
//!
//! Extracted from mod.rs for file health compliance.
//!
//! # Heijunka - Level Loading
//!
//! References:
//! - Didona et al. (2022): mmap vs read() achieves 2.3x throughput for sequential access
//! - Chu (2011): LMDB design - let kernel manage pages, don't fight the VM subsystem
//! - Vahalia (1996): SIGBUS behavior on truncated mmap
//!
//! This abstraction allows models to be loaded via:
//! 1. Memory mapping (mmap) - zero-copy, kernel manages pages, no zram pressure
//! 2. Heap allocation (Vec<u8>) - required for compressed files after decompression
use std::fs::File;
use std::path::{Path, PathBuf};
use crate::error::{RealizarError, Result};
/// Model data storage abstraction for zero-copy access.
///
/// # Memory Management
///
/// When using `Mmap` variant:
/// - Data is not copied into userspace heap
/// - Kernel demand-pages from disk on access
/// - After GPU transfer, call `release_cpu_pages()` to advise kernel
/// - Pages backed by file (not zram) when evicted
///
/// When using `Heap` variant:
/// - Used for compressed files (must decompress to Vec<u8>)
/// - Standard heap allocation behavior
/// - May be compressed to zram when idle
#[derive(Debug)]
pub enum ModelData {
/// Memory-mapped file (zero-copy, kernel-managed paging)
#[cfg(not(target_arch = "wasm32"))]
Mmap {
/// Memory-mapped region
mmap: memmap2::Mmap,
/// Original file path (for diagnostics)
path: PathBuf,
},
/// Heap-allocated data (for compressed files or WASM)
Heap(Vec<u8>),
}
impl ModelData {
/// Open a file with memory mapping.
///
/// # Safety
///
/// Uses `memmap2::Mmap` which requires:
/// - File must not be truncated while mapped (SIGBUS on Unix)
/// - File must not be modified while mapped (undefined behavior)
///
/// # References
///
/// - Vahalia (1996): SIGBUS from truncated mmap
/// - memmap2 crate safety documentation
#[cfg(not(target_arch = "wasm32"))]
#[allow(unsafe_code)]
pub fn open_mmap(path: impl AsRef<Path>) -> Result<Self> {
let path_ref = path.as_ref();
let file = File::open(path_ref).map_err(|e| RealizarError::IoError {
message: format!("Failed to open file '{}': {e}", path_ref.display()),
})?;
// SAFETY: File is opened read-only. We document the single-writer
// assumption. Callers should validate checksums before trusting data.
// SIGBUS can occur if file is truncated externally - this is documented.
let mmap = unsafe {
memmap2::MmapOptions::new()
.map(&file)
.map_err(|e| RealizarError::IoError {
message: format!("Failed to mmap file '{}': {e}", path_ref.display()),
})?
};
Ok(Self::Mmap {
mmap,
path: path_ref.to_path_buf(),
})
}
/// Create from heap-allocated data (for compressed files).
#[must_use]
pub fn from_vec(data: Vec<u8>) -> Self {
Self::Heap(data)
}
/// Get the data as a byte slice.
#[must_use]
pub fn as_slice(&self) -> &[u8] {
match self {
#[cfg(not(target_arch = "wasm32"))]
Self::Mmap { mmap, .. } => mmap,
Self::Heap(data) => data,
}
}
/// Get data length.
#[must_use]
pub fn len(&self) -> usize {
self.as_slice().len()
}
/// Check if data is empty.
#[must_use]
pub fn is_empty(&self) -> bool {
self.as_slice().is_empty()
}
/// Release CPU pages after GPU transfer (Unix only).
///
/// Calls `madvise(MADV_DONTNEED)` to tell the kernel these pages
/// are no longer needed. The kernel will:
/// - Drop pages immediately (not compress to zram)
/// - Re-fault from disk if accessed again
///
/// # When to Call
///
/// After `cuMemcpy()` completes for all tensors.
///
/// # Safety
///
/// Uses `unchecked_advise` because `MADV_DONTNEED` is in the
/// `UncheckedAdvice` enum. This is safe for read-only mmaps where
/// data can be re-faulted from the backing file.
///
/// # References
///
/// - Didona et al. (2022): madvise for memory management
#[cfg(all(unix, not(target_arch = "wasm32")))]
#[allow(unsafe_code)]
pub fn release_cpu_pages(&self) -> Result<()> {
match self {
Self::Mmap { mmap, path } => {
// SAFETY: We opened the file read-only, so MADV_DONTNEED is safe -
// the kernel will re-fault pages from the backing file if accessed.
unsafe {
mmap.unchecked_advise(memmap2::UncheckedAdvice::DontNeed)
.map_err(|e| RealizarError::IoError {
message: format!(
"madvise(MADV_DONTNEED) failed for '{}': {e}",
path.display()
),
})
}
},
Self::Heap(_) => {
// No-op for heap data - kernel manages via normal VM pressure
Ok(())
},
}
}
/// No-op on non-Unix platforms (madvise not available).
#[cfg(not(all(unix, not(target_arch = "wasm32"))))]
pub fn release_cpu_pages(&self) -> Result<()> {
Ok(())
}
/// Advise sequential access pattern (Unix only).
///
/// Call before linear scan through model data.
#[cfg(all(unix, not(target_arch = "wasm32")))]
pub fn advise_sequential(&self) -> Result<()> {
match self {
Self::Mmap { mmap, path } => {
mmap.advise(memmap2::Advice::Sequential)
.map_err(|e| RealizarError::IoError {
message: format!(
"madvise(MADV_SEQUENTIAL) failed for '{}': {e}",
path.display()
),
})
},
Self::Heap(_) => Ok(()),
}
}
/// No-op on non-Unix platforms (madvise not available).
#[cfg(not(all(unix, not(target_arch = "wasm32"))))]
pub fn advise_sequential(&self) -> Result<()> {
Ok(())
}
/// Check if this is memory-mapped data.
#[must_use]
pub fn is_mmap(&self) -> bool {
match self {
#[cfg(not(target_arch = "wasm32"))]
Self::Mmap { .. } => true,
Self::Heap(_) => false,
}
}
}