1#![warn(static_mut_refs)]
2pub use nix::libc;
3use nix::libc::c_void;
4use nix::sys::mman::MapFlags;
5pub use nix::sys::mman::ProtFlags;
6use nix::sys::signal;
7use nix::unistd;
8use parking_lot::Mutex;
9use std::mem;
10use std::num::NonZeroUsize;
11use std::os::fd::{AsRawFd, BorrowedFd, IntoRawFd, RawFd};
12use std::ptr::NonNull;
13use std::sync::Arc;
14use std::sync::atomic::{AtomicBool, AtomicPtr, Ordering};
15
16mod machdep;
17
18const ADDR_SIZE: usize = std::mem::size_of::<usize>();
19
20#[derive(Debug, PartialEq, Eq)]
21pub enum AccessType {
22 Read,
23 Write,
24}
25
26pub trait PageStore {
27 fn page_fault(
29 &mut self, offset: usize, length: usize, access: AccessType,
30 ) -> Option<Box<dyn Iterator<Item = Box<dyn AsRef<[u8]> + '_>> + '_>>;
31}
32
33pub struct MappedMemory {
34 base: AtomicPtr<u8>,
35 length: usize,
36 unmap: bool,
37 shared: Mutex<Vec<SharedMemory>>,
38}
39
40impl MappedMemory {
41 pub fn new(base: Option<*mut u8>, mut length: usize, page_size: usize, flags: ProtFlags) -> Result<Self, Error> {
42 let rem = length & (page_size - 1);
43 match base {
44 Some(base) => {
45 if (base as usize) % page_size != 0 {
46 return Err(Error::BaseNotAligned);
47 }
48 if rem != 0 {
49 return Err(Error::LengthNotAligned);
50 }
51 }
52 None => {
53 if rem != 0 {
54 length += page_size - rem
55 }
56 }
57 }
58
59 let addr = match base {
60 Some(b) => Some(NonZeroUsize::new(b as usize).ok_or(Error::NullBase)?),
61 None => None,
62 };
63 let length_nz = NonZeroUsize::new(length).ok_or(Error::LengthIsZero)?;
64 let map_flags = match base {
65 Some(_) => MapFlags::MAP_FIXED,
66 None => MapFlags::empty(),
67 } | MapFlags::MAP_PRIVATE;
68
69 let new_base = unsafe {
70 nix::sys::mman::mmap_anonymous(addr, length_nz, flags, map_flags)
71 .map_err(Error::UnixError)?
72 .cast::<u8>()
73 };
74 let new_base_ptr = new_base.as_ptr();
75
76 if let Some(base) = base {
77 if base != new_base_ptr {
78 return Err(Error::NotSupported);
79 }
80 }
81
82 Ok(Self {
83 base: AtomicPtr::new(new_base_ptr),
84 length,
85 unmap: base.is_none(),
86 shared: Mutex::new(Vec::new()),
87 })
88 }
89 #[inline(always)]
90 pub fn base(&self) -> *mut u8 {
91 unsafe { *self.base.as_ptr() }
92 }
93
94 #[inline(always)]
95 pub fn as_slice(&self) -> &mut [u8] {
96 unsafe { std::slice::from_raw_parts_mut(self.base(), self.length) }
97 }
98
99 pub fn make_shared(&self, offset: usize, shm: &SharedMemory, flags: ProtFlags) -> Result<(), Error> {
100 let len = shm.0.size;
101 if offset + len >= self.length {
102 return Err(Error::MemoryOverflow);
103 }
104 unsafe {
105 nix::sys::mman::mmap(
106 Some(NonZeroUsize::new(self.base().add(offset) as usize).unwrap()),
107 NonZeroUsize::new(len).unwrap(),
108 flags,
109 MapFlags::MAP_FIXED | MapFlags::MAP_SHARED,
110 &shm.0.fd,
111 0,
112 )
113 .map_err(Error::UnixError)?;
114 }
115 self.shared.lock().push(shm.clone());
117 Ok(())
118 }
119}
120
121impl Drop for MappedMemory {
122 fn drop(&mut self) {
123 if self.unmap {
124 unsafe {
125 if let Some(ptr) = NonNull::new(self.base() as *mut c_void) {
126 nix::sys::mman::munmap(ptr, self.length).unwrap();
127 }
128 }
129 }
130 }
131}
132
133pub struct PagedMemory<'a> {
134 mem: Arc<MappedMemory>,
135 page_size: usize,
136 _phantom: std::marker::PhantomData<&'a ()>,
137}
138
139struct PagedMemoryEntry {
140 start: usize,
141 len: usize,
142 mem: Arc<MappedMemory>,
143 store: Box<dyn PageStore + Send + 'static>,
144 page_size: usize,
145}
146
147#[derive(Debug, PartialEq, Eq)]
148pub enum Error {
149 BaseNotAligned,
150 NullBase,
151 LengthNotAligned,
152 LengthIsZero,
153 PageSizeNotAvail,
154 NotSupported,
155 UnixError(nix::errno::Errno),
156 MemoryOverlap,
157 MemoryOverflow,
158}
159
160static HANDLER_SPIN: AtomicBool = AtomicBool::new(false);
161static mut TO_HANDLER: (RawFd, RawFd) = (0, 1);
162static mut FROM_HANDLER: (RawFd, RawFd) = (0, 1);
163static mut PREV_SIGSEGV: mem::MaybeUninit<signal::SigAction> = mem::MaybeUninit::uninit();
164static mut PREV_SIGBUS: mem::MaybeUninit<signal::SigAction> = mem::MaybeUninit::uninit();
165
166#[inline]
167fn handle_page_fault_(info: *mut libc::siginfo_t, ctx: *mut c_void) -> bool {
168 let (tx, rx, addr, ctx) = unsafe {
169 let (rx, _) = TO_HANDLER;
170 let (_, tx) = FROM_HANDLER;
171 (tx, rx, (*info).si_addr() as usize, &mut *(ctx as *mut libc::ucontext_t))
172 };
173 let flag = machdep::check_page_fault_rw_flag_from_context(*ctx);
174 let mut buff = [0; ADDR_SIZE + 1];
175 buff[..ADDR_SIZE].copy_from_slice(&addr.to_le_bytes());
176 buff[ADDR_SIZE] = flag;
177 while HANDLER_SPIN.swap(true, Ordering::Acquire) {
179 std::thread::yield_now();
180 }
181 if unistd::write(unsafe { BorrowedFd::borrow_raw(tx) }, &buff).is_err() {
182 HANDLER_SPIN.swap(false, Ordering::Release);
183 return true;
184 }
185 let _ = unistd::read(unsafe { BorrowedFd::borrow_raw(rx) }, &mut buff[..1]);
186 HANDLER_SPIN.swap(false, Ordering::Release);
187 buff[0] == 1
188}
189
190extern "C" fn handle_page_fault(signum: libc::c_int, info: *mut libc::siginfo_t, ctx: *mut c_void) {
193 if !handle_page_fault_(info, ctx) {
194 return;
195 }
196
197 unsafe {
198 let previous_signal = signal::Signal::try_from(signum).expect("invalid signum");
200 let previous = *(match previous_signal {
201 signal::SIGSEGV => PREV_SIGSEGV.as_ptr(),
202 signal::SIGBUS => PREV_SIGBUS.as_ptr(),
203 _ => panic!("unknown signal: {}", previous_signal),
204 });
205
206 match previous.handler() {
207 signal::SigHandler::SigDfl => {
208 signal::signal(previous_signal, signal::SigHandler::SigDfl).expect("fail to reset signal handler");
209 let _ = signal::raise(previous_signal);
210 }
211 signal::SigHandler::SigIgn => {}
212 signal::SigHandler::SigAction(handler)
213 if previous.flags() & signal::SaFlags::SA_SIGINFO == signal::SaFlags::SA_SIGINFO =>
214 {
215 handler(signum, info, ctx);
216 }
217 signal::SigHandler::Handler(handler) => handler(signum),
218 _ => panic!("unexpected signal handler"),
219 }
220 }
221}
222
223unsafe fn register_signal_handlers_(handler: extern "C" fn(i32, *mut libc::siginfo_t, *mut c_void)) {
224 let register = |slot: *mut signal::SigAction, signal: signal::Signal| {
225 let sig_action = signal::SigAction::new(
239 signal::SigHandler::SigAction(handler),
240 signal::SaFlags::SA_NODEFER | signal::SaFlags::SA_SIGINFO | signal::SaFlags::SA_ONSTACK,
241 signal::SigSet::empty(),
242 );
243
244 unsafe { *slot = signal::sigaction(signal, &sig_action).expect("fail to register signal handler") };
245 };
246
247 unsafe {
248 register(PREV_SIGSEGV.as_mut_ptr(), signal::SIGSEGV);
249 register(PREV_SIGBUS.as_mut_ptr(), signal::SIGBUS);
250 }
251}
252
253pub unsafe fn register_signal_handlers() {
254 unsafe { register_signal_handlers_(handle_page_fault) };
255}
256
257struct PagedMemoryManager {
258 entries: Vec<PagedMemoryEntry>,
259}
260
261impl PagedMemoryManager {
262 fn insert(&mut self, entry: PagedMemoryEntry) -> bool {
263 for (i, PagedMemoryEntry { start, len, .. }) in self.entries.iter().enumerate() {
264 if entry.start + entry.len <= *start {
265 self.entries.insert(i, entry);
267 return true;
268 }
269 if entry.start < *start + *len {
270 return false;
272 }
273 }
274 self.entries.push(entry);
275 true
276 }
277
278 fn remove(&mut self, start_: usize, len_: usize) {
279 for (i, PagedMemoryEntry { start, len, .. }) in self.entries.iter().enumerate() {
280 if *start == start_ && *len == len_ {
281 self.entries.remove(i);
282 return;
283 }
284 }
285 panic!(
286 "failed to locate PagedMemoryEntry (start = 0x{:x}, end = 0x{:x})",
287 start_,
288 start_ + len_
289 )
290 }
291}
292
293static MANAGER: Mutex<PagedMemoryManager> = Mutex::new(PagedMemoryManager { entries: Vec::new() });
294
295fn handler_init() {
296 let (to_read, to_write) = nix::unistd::pipe().expect("fail to create pipe to the handler");
297 let (from_read, from_write) = nix::unistd::pipe().expect("fail to create pipe from the handler");
298 let from_handler = unsafe { BorrowedFd::borrow_raw(from_read.as_raw_fd()) };
299 let to_handler = unsafe { BorrowedFd::borrow_raw(to_write.as_raw_fd()) };
300 unsafe {
301 TO_HANDLER = (to_read.into_raw_fd(), to_write.into_raw_fd());
302 FROM_HANDLER = (from_read.into_raw_fd(), from_write.into_raw_fd());
303 register_signal_handlers();
304 }
305 std::sync::atomic::fence(Ordering::SeqCst);
306 std::thread::spawn(move || {
307 let mut buff = [0; ADDR_SIZE + 1];
308 loop {
309 unistd::read(&from_handler, &mut buff).unwrap();
310 let addr = usize::from_le_bytes(buff[..ADDR_SIZE].try_into().unwrap());
311 let (access_type, mprotect_flag) = match buff[ADDR_SIZE] {
312 0 => (AccessType::Read, ProtFlags::PROT_READ),
313 _ => (AccessType::Write, ProtFlags::PROT_READ | ProtFlags::PROT_WRITE),
314 };
315 let mut mgr = MANAGER.lock();
316 let mut fallback = 1;
317 for entry in mgr.entries.iter_mut() {
318 if entry.start <= addr && addr < entry.start + entry.len {
319 let page_mask = usize::MAX ^ (entry.page_size - 1);
320 let page_addr = addr & page_mask;
321 let page_ptr = unsafe { NonNull::new_unchecked(page_addr as *mut c_void) };
322 let slice = entry.mem.as_slice();
324 let base = slice.as_ptr() as usize;
325 let page_offset = page_addr - base;
326 if let Some(page) = entry.store.page_fault(page_offset, entry.page_size, access_type) {
327 unsafe {
328 nix::sys::mman::mprotect(
329 page_ptr,
330 entry.page_size,
331 ProtFlags::PROT_READ | ProtFlags::PROT_WRITE,
332 )
333 .expect("mprotect failed");
334 }
335 let target = &mut slice[page_offset..page_offset + entry.page_size];
336 let mut base = 0;
337 for chunk in page {
338 let chunk = (*chunk).as_ref();
339 let chunk_len = chunk.len();
340 target[base..base + chunk_len].copy_from_slice(&chunk);
341 base += chunk_len;
342 }
343 }
344 unsafe {
346 nix::sys::mman::mprotect(page_ptr, entry.page_size, mprotect_flag).expect("mprotect failed");
347 }
348 fallback = 0;
349 break;
350 }
351 }
352 unistd::write(&to_handler, &[fallback]).unwrap();
354 }
355 });
356}
357
358impl<'a> PagedMemory<'a> {
359 pub unsafe fn from_raw<S: PageStore + Send + 'static>(
363 base: *mut u8, length: usize, store: S, page_size: Option<usize>,
364 ) -> Result<PagedMemory<'static>, Error> {
365 let mem: &'static mut [u8] = unsafe { std::slice::from_raw_parts_mut(base, length) };
366 Self::new_(Some(mem.as_ptr() as *mut u8), mem.len(), store, page_size)
367 }
368
369 pub fn new<S: PageStore + Send + 'static>(
371 length: usize, store: S, page_size: Option<usize>,
372 ) -> Result<PagedMemory<'static>, Error> {
373 Self::new_(None, length, store, page_size)
374 }
375
376 fn new_<'b, S: PageStore + Send + 'static>(
377 base: Option<*mut u8>, length: usize, store: S, page_size: Option<usize>,
378 ) -> Result<PagedMemory<'b>, Error> {
379 static INIT: std::sync::Once = std::sync::Once::new();
380 INIT.call_once(|| handler_init());
381 let page_size = match page_size {
382 Some(s) => s,
383 None => get_page_size()?,
384 };
385 let mem = std::sync::Arc::new(MappedMemory::new(base, length, page_size, ProtFlags::PROT_NONE)?);
386 let mut mgr = MANAGER.lock();
387 if !mgr.insert(PagedMemoryEntry {
388 start: mem.base() as usize,
389 len: length,
390 mem: mem.clone(),
391 store: Box::new(store),
392 page_size,
393 }) {
394 return Err(Error::MemoryOverlap);
395 }
396
397 Ok(PagedMemory {
398 mem,
399 page_size,
400 _phantom: std::marker::PhantomData,
401 })
402 }
403
404 pub fn as_slice_mut(&mut self) -> &mut [u8] {
434 self.mem.as_slice()
435 }
436
437 pub fn as_slice(&self) -> &[u8] {
438 self.mem.as_slice()
439 }
440
441 pub fn as_raw_parts(&self) -> (*mut u8, usize) {
442 let s = self.mem.as_slice();
443 (s.as_mut_ptr(), s.len())
444 }
445
446 pub fn page_size(&self) -> usize {
448 self.page_size
449 }
450
451 pub fn mark_read_only(&self, offset: usize, length: usize) {
454 assert!(offset + length <= self.mem.length);
455 unsafe {
456 let ptr = NonNull::new_unchecked(self.mem.base().add(offset) as *mut c_void);
457 nix::sys::mman::mprotect(ptr, length, ProtFlags::PROT_READ).expect("mprotect failed");
458 }
459 }
460
461 pub fn release_page(&self, page_offset: usize) {
465 if page_offset & (self.page_size - 1) != 0 || page_offset >= self.mem.length {
466 panic!("invalid page offset: {:x}", page_offset);
467 }
468 let page_addr = self.mem.base() as usize + page_offset;
469 unsafe {
470 nix::sys::mman::mmap_anonymous(
471 Some(NonZeroUsize::new(page_addr).unwrap()),
472 NonZeroUsize::new(self.page_size).unwrap(),
473 ProtFlags::PROT_NONE,
474 MapFlags::MAP_FIXED | MapFlags::MAP_PRIVATE,
475 )
476 .expect("mmap failed");
477 }
478 }
479
480 pub fn release_all_pages(&self) {
481 unsafe {
482 nix::sys::mman::mmap_anonymous(
483 Some(NonZeroUsize::new(self.mem.base() as usize).unwrap()),
484 NonZeroUsize::new(self.mem.length).unwrap(),
485 ProtFlags::PROT_NONE,
486 MapFlags::MAP_FIXED | MapFlags::MAP_PRIVATE,
487 )
488 .expect("mmap failed");
489 }
490 self.mem.shared.lock().clear();
491 }
492
493 pub fn make_shared(&self, offset: usize, shm: &SharedMemory) -> Result<(), Error> {
494 self.mem.make_shared(offset, shm, ProtFlags::PROT_NONE)
495 }
496}
497
498impl<'a> Drop for PagedMemory<'a> {
499 fn drop(&mut self) {
500 let mut mgr = MANAGER.lock();
501 mgr.remove(self.mem.base() as usize, self.mem.length);
502 }
503}
504
505pub struct VecPageStore(Vec<u8>);
506
507impl VecPageStore {
508 pub fn new(vec: Vec<u8>) -> Self {
509 Self(vec)
510 }
511}
512
513impl PageStore for VecPageStore {
514 fn page_fault(
515 &mut self, offset: usize, length: usize, _access: AccessType,
516 ) -> Option<Box<dyn Iterator<Item = Box<dyn AsRef<[u8]> + '_>> + '_>> {
517 #[cfg(debug_assertions)]
518 println!(
519 "{:?} loading page at 0x{:x} access={:?}",
520 self as *mut Self, offset, _access,
521 );
522 Some(Box::new(std::iter::once(
523 Box::new(&self.0[offset..offset + length]) as Box<dyn AsRef<[u8]>>
524 )))
525 }
526}
527
528#[derive(Clone)]
529pub struct SharedMemory(Arc<SharedMemoryInner>);
530
531struct SharedMemoryInner {
532 fd: std::os::fd::OwnedFd,
533 size: usize,
534}
535
536impl SharedMemory {
537 pub fn new(size: usize) -> Result<Self, Error> {
538 let fd = machdep::get_shared_memory()?;
539 nix::unistd::ftruncate(&fd, size as libc::off_t).map_err(Error::UnixError)?;
540 Ok(Self(Arc::new(SharedMemoryInner { fd, size })))
541 }
542}
543
544pub fn get_page_size() -> Result<usize, Error> {
545 Ok(unistd::sysconf(unistd::SysconfVar::PAGE_SIZE)
546 .map_err(Error::UnixError)?
547 .ok_or(Error::PageSizeNotAvail)? as usize)
548}
549
550#[cfg(test)]
551mod tests {
552 use super::*;
553 use lazy_static::lazy_static;
554
555 lazy_static! {
556 static ref PAGE_SIZE: usize = unistd::sysconf(unistd::SysconfVar::PAGE_SIZE).unwrap().unwrap() as usize;
557 }
558
559 #[test]
560 fn test1() {
561 for _ in 0..100 {
562 let mut v = Vec::new();
563 v.resize(*PAGE_SIZE * 100, 0);
564 v[0] = 42;
565 v[*PAGE_SIZE * 10 + 1] = 43;
566 v[*PAGE_SIZE * 20 + 1] = 44;
567
568 let pm = PagedMemory::new(*PAGE_SIZE * 100, VecPageStore::new(v), None).unwrap();
569 let m = pm.as_slice();
570 assert_eq!(m[0], 42);
571 assert_eq!(m[*PAGE_SIZE * 10 + 1], 43);
572 assert_eq!(m[*PAGE_SIZE * 20 + 1], 44);
573 }
574 }
575
576 #[test]
577 fn test2() {
578 for _ in 0..100 {
579 let mut v = Vec::new();
580 v.resize(*PAGE_SIZE * 100, 0);
581 v[0] = 1;
582 v[*PAGE_SIZE * 10 + 1] = 2;
583 v[*PAGE_SIZE * 20 + 1] = 3;
584
585 let pm1 = PagedMemory::new(*PAGE_SIZE * 100, VecPageStore::new(v), None).unwrap();
586
587 let mut v = Vec::new();
588 v.resize(*PAGE_SIZE * 100, 0);
589 for (i, v) in v.iter_mut().enumerate() {
590 *v = i as u8;
591 }
592 let mut pm2 = PagedMemory::new(*PAGE_SIZE * 100, VecPageStore::new(v), None).unwrap();
593
594 let m2 = pm2.as_slice_mut();
595 let m1 = pm1.as_slice();
596
597 assert_eq!(m2[100], 100);
598 m2[100] = 0;
599 assert_eq!(m2[100], 0);
600
601 assert_eq!(m1[0], 1);
602 assert_eq!(m1[*PAGE_SIZE * 10 + 1], 2);
603 assert_eq!(m1[*PAGE_SIZE * 20 + 1], 3);
604 }
605 }
606
607 #[test]
608 fn test_shared_memory() {
609 let mut v = Vec::new();
610 v.resize(*PAGE_SIZE * 100, 0);
611 v[0] = 42;
612 v[*PAGE_SIZE * 10 + 1] = 43;
613 v[*PAGE_SIZE * 20 + 1] = 44;
614
615 let shm = SharedMemory::new(*PAGE_SIZE).unwrap();
616 let mut pm1 = PagedMemory::new(*PAGE_SIZE * 100, VecPageStore::new(v.clone()), None).unwrap();
617 let pm2 = PagedMemory::new(*PAGE_SIZE * 100, VecPageStore::new(v), None).unwrap();
618 pm1.make_shared(*PAGE_SIZE * 10, &shm).unwrap();
619 pm2.make_shared(*PAGE_SIZE * 10, &shm).unwrap();
620
621 assert_eq!(pm1.as_slice()[*PAGE_SIZE * 10 + 1], 43);
622 assert_eq!(pm2.as_slice()[*PAGE_SIZE * 10 + 1], 43);
623 pm1.as_slice_mut()[*PAGE_SIZE * 10 + 1] = 99;
624 assert_eq!(pm2.as_slice()[*PAGE_SIZE * 10 + 1], 99);
625 assert_eq!(pm1.as_slice()[*PAGE_SIZE * 10 + 1], 99);
626
627 let m = pm1.as_slice();
628 assert_eq!(m[0], 42);
629 assert_eq!(m[*PAGE_SIZE * 20 + 1], 44);
630 }
631
632 #[test]
633 fn test_release_page() {
634 let mut v = Vec::new();
635 v.resize(*PAGE_SIZE * 20, 0);
636 v[0] = 42;
637 v[*PAGE_SIZE * 10 + 1] = 43;
638
639 let pm = PagedMemory::new(*PAGE_SIZE * 100, VecPageStore::new(v), None).unwrap();
640 let m = pm.as_slice();
641 assert_eq!(m[0], 42);
642 assert_eq!(m[*PAGE_SIZE * 10 + 1], 43);
643 for _ in 0..5 {
644 pm.release_page(0);
645 pm.release_page(*PAGE_SIZE * 10);
646 assert_eq!(m[0], 42);
647 assert_eq!(m[*PAGE_SIZE * 10 + 1], 43);
648 }
649 }
650
651 #[test]
652 fn out_of_order_scan() {
653 let mut v = Vec::new();
654 v.resize(*PAGE_SIZE * 100, 0);
655 for (i, v) in v.iter_mut().enumerate() {
656 *v = i as u8;
657 }
658 let store = VecPageStore::new(v);
659 let pm = PagedMemory::new(*PAGE_SIZE * 100, store, None).unwrap();
660 use rand::{SeedableRng, seq::SliceRandom};
661 use rand_chacha::ChaChaRng;
662 let seed = [0; 32];
663 let mut rng = ChaChaRng::from_seed(seed);
664
665 let m = pm.as_slice();
666 let mut idxes = Vec::new();
667 for i in 0..m.len() {
668 idxes.push(i);
669 }
670 idxes.shuffle(&mut rng);
671 for i in idxes.into_iter() {
672 #[cfg(debug_assertions)]
673 {
674 let x = m[i];
675 println!("m[0x{:08x}] = {}", i, x);
676 }
677 assert_eq!(m[i], i as u8);
678 }
679 }
680}