#[cfg(feature = "pooling-allocator")]
use crate::prelude::*;
use self::ioctl::{Categories, PageMapScanBuilder};
use crate::runtime::vm::{HostAlignedByteCount, host_page_size};
use rustix::ioctl::ioctl;
use std::fs::File;
use std::mem::MaybeUninit;
use std::ptr;
#[cfg(feature = "pooling-allocator")]
static PROCESS_PAGEMAP: std::sync::LazyLock<Option<File>> = std::sync::LazyLock::new(|| {
use rustix::fd::AsRawFd;
let pagemap = File::open("/proc/self/pagemap").ok()?;
let rc = unsafe { libc::pthread_atfork(None, None, Some(after_fork_in_child)) };
if rc != 0 {
return None;
}
return Some(pagemap);
unsafe extern "C" fn after_fork_in_child() {
let Some(parent_pagemap) = PROCESS_PAGEMAP.as_ref() else {
return;
};
unsafe {
let flags = libc::O_CLOEXEC | libc::O_RDONLY;
let mut child_pagemap = libc::open(c"/proc/self/pagemap".as_ptr(), flags);
if child_pagemap == -1 {
child_pagemap = libc::open(c"/dev/null".as_ptr(), flags);
}
if child_pagemap == -1 {
libc::abort();
}
let rc = libc::dup2(child_pagemap, parent_pagemap.as_raw_fd());
if rc == -1 {
libc::abort();
}
let rc = libc::close(child_pagemap);
if rc == -1 {
libc::abort();
}
}
}
});
#[derive(Debug)]
pub struct PageMap(&'static File);
impl PageMap {
#[cfg(feature = "pooling-allocator")]
pub fn new() -> Option<PageMap> {
let file = PROCESS_PAGEMAP.as_ref()?;
let mut regions = vec![MaybeUninit::uninit(); 1];
let pm_scan = PageMapScanBuilder::new(ptr::slice_from_raw_parts(ptr::null_mut(), 0))
.max_pages(1)
.return_mask(Categories::empty())
.category_mask(Categories::all())
.build(&mut regions);
unsafe {
ioctl(&file, pm_scan).ok()?;
}
Some(PageMap(file))
}
}
pub unsafe fn reset_with_pagemap(
mut pagemap: Option<&PageMap>,
ptr: *mut u8,
len: HostAlignedByteCount,
mut keep_resident: HostAlignedByteCount,
mut reset_manually: impl FnMut(&mut [u8]),
mut decommit: impl FnMut(*mut u8, usize),
) -> usize {
keep_resident = keep_resident.min(len);
let host_page_size = host_page_size();
if pagemap.is_some() {
if keep_resident.byte_count() == 0 {
pagemap = None;
}
if keep_resident.byte_count() <= host_page_size && len.byte_count() <= host_page_size {
pagemap = None;
}
}
let pagemap = match pagemap {
Some(pagemap) => pagemap,
_ => unsafe {
return crate::runtime::vm::pagemap_disabled::reset_with_pagemap(
None,
ptr,
len,
keep_resident,
reset_manually,
decommit,
);
},
};
const MAX_REGIONS: usize = 32;
let mut storage = [MaybeUninit::uninit(); MAX_REGIONS];
let scan_arg = PageMapScanBuilder::new(ptr::slice_from_raw_parts(ptr, len.byte_count()))
.max_pages(keep_resident.byte_count() / host_page_size)
.category_inverted(Categories::PFNZERO | Categories::FILE)
.category_mask(
Categories::WRITTEN | Categories::PRESENT | Categories::PFNZERO | Categories::FILE,
)
.return_mask(Categories::empty())
.build(&mut storage);
let result = match unsafe { ioctl(&pagemap.0, scan_arg) } {
Ok(result) => result,
Err(err) => unsafe {
log::warn!("failed pagemap scan {err}");
return crate::runtime::vm::pagemap_disabled::reset_with_pagemap(
None,
ptr,
len,
keep_resident,
reset_manually,
decommit,
);
},
};
let mut bytes_resident = 0;
for region in result.regions() {
unsafe {
reset_manually(&mut *region.region().cast_mut());
}
bytes_resident += region.len();
}
let scan_size = result.walk_end().addr() - ptr.addr();
decommit(result.walk_end().cast_mut(), len.byte_count() - scan_size);
bytes_resident
}
mod ioctl {
use rustix::ioctl::*;
use std::ffi::c_void;
use std::fmt;
use std::marker;
use std::mem::MaybeUninit;
use std::ptr;
bitflags::bitflags! {
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct Categories: u64 {
const WPALLOWED = 1 << 0;
const WRITTEN = 1 << 1;
const FILE = 1 << 2;
const PRESENT = 1 << 3;
const SWAPPED = 1 << 4;
const PFNZERO = 1 << 5;
const HUGE = 1 << 6;
const SOFT_DIRTY = 1 << 7;
}
}
impl fmt::Debug for Categories {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
bitflags::parser::to_writer(self, f)
}
}
impl fmt::Display for Categories {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
bitflags::parser::to_writer(self, f)
}
}
pub struct PageMapScanBuilder {
pm_scan_arg: pm_scan_arg,
}
impl PageMapScanBuilder {
pub fn new(region: *const [u8]) -> PageMapScanBuilder {
PageMapScanBuilder {
pm_scan_arg: pm_scan_arg {
size: size_of::<pm_scan_arg>() as u64,
flags: 0,
start: region.cast::<u8>().addr() as u64,
end: region.cast::<u8>().addr().wrapping_add(region.len()) as u64,
walk_end: 0,
vec: 0,
vec_len: 0,
max_pages: 0,
category_inverted: Categories::empty(),
category_anyof_mask: Categories::empty(),
category_mask: Categories::empty(),
return_mask: Categories::empty(),
},
}
}
pub fn max_pages(&mut self, max: usize) -> &mut PageMapScanBuilder {
self.pm_scan_arg.max_pages = max.try_into().unwrap();
self
}
pub fn category_inverted(&mut self, flags: Categories) -> &mut PageMapScanBuilder {
self.pm_scan_arg.category_inverted = flags;
self
}
pub fn category_mask(&mut self, flags: Categories) -> &mut PageMapScanBuilder {
self.pm_scan_arg.category_mask = flags;
self
}
#[expect(dead_code, reason = "bindings for the future if we need them")]
pub fn category_anyof_mask(&mut self, flags: Categories) -> &mut PageMapScanBuilder {
self.pm_scan_arg.category_anyof_mask = flags;
self
}
pub fn return_mask(&mut self, flags: Categories) -> &mut PageMapScanBuilder {
self.pm_scan_arg.return_mask = flags;
self
}
pub fn build<'a>(&self, dst: &'a mut [MaybeUninit<PageRegion>]) -> PageMapScan<'a> {
let mut ret = PageMapScan {
pm_scan_arg: self.pm_scan_arg,
_marker: marker::PhantomData,
};
ret.pm_scan_arg.vec = dst.as_ptr() as u64;
ret.pm_scan_arg.vec_len = dst.len() as u64;
return ret;
}
}
#[repr(transparent)]
pub struct PageMapScan<'a> {
pm_scan_arg: pm_scan_arg,
_marker: marker::PhantomData<&'a mut [MaybeUninit<PageRegion>]>,
}
#[derive(Copy, Clone)]
#[repr(C)]
struct pm_scan_arg {
size: u64,
flags: u64,
start: u64,
end: u64,
walk_end: u64,
vec: u64,
vec_len: u64,
max_pages: u64,
category_inverted: Categories,
category_mask: Categories,
category_anyof_mask: Categories,
return_mask: Categories,
}
#[derive(Debug)]
pub struct PageMapScanResult<'a> {
walk_end: *const u8,
regions: &'a mut [PageRegion],
}
impl PageMapScanResult<'_> {
pub fn walk_end(&self) -> *const u8 {
self.walk_end
}
pub fn regions(&self) -> &[PageRegion] {
self.regions
}
}
#[repr(transparent)]
#[derive(Copy, Clone)]
pub struct PageRegion(page_region);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
struct page_region {
start: u64,
end: u64,
categories: Categories,
}
impl PageRegion {
#[inline]
pub fn region(&self) -> *const [u8] {
ptr::slice_from_raw_parts(self.start(), self.len())
}
#[inline]
pub fn start(&self) -> *const u8 {
self.0.start as *const u8
}
#[inline]
pub fn len(&self) -> usize {
usize::try_from(self.0.end - self.0.start).unwrap()
}
#[inline]
#[cfg_attr(
not(test),
expect(dead_code, reason = "bindings for the future if we need them")
)]
pub fn categories(&self) -> Categories {
self.0.categories
}
}
impl fmt::Debug for PageRegion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PageRegion")
.field("start", &self.start())
.field("len", &self.len())
.field("categories", &self.0.categories)
.finish()
}
}
unsafe impl<'a> Ioctl for PageMapScan<'a> {
type Output = PageMapScanResult<'a>;
const IS_MUTATING: bool = true;
fn opcode(&self) -> Opcode {
opcode::read_write::<pm_scan_arg>(b'f', 16)
}
fn as_ptr(&mut self) -> *mut c_void {
(&raw mut self.pm_scan_arg).cast()
}
unsafe fn output_from_ptr(
out: IoctlOutput,
extract_output: *mut c_void,
) -> rustix::io::Result<Self::Output> {
let extract_output = extract_output.cast::<pm_scan_arg>();
let len = usize::try_from(out).unwrap();
let regions = unsafe {
assert!((len as u64) <= (*extract_output).vec_len);
std::slice::from_raw_parts_mut((*extract_output).vec as *mut PageRegion, len)
};
Ok(PageMapScanResult {
regions,
walk_end: unsafe { (*extract_output).walk_end as *const u8 },
})
}
}
}
#[cfg(test)]
mod tests {
use super::ioctl::*;
use crate::prelude::*;
use rustix::ioctl::*;
use rustix::mm::*;
use std::fs::File;
use std::ptr;
struct MmapAnonymous {
ptr: *mut std::ffi::c_void,
len: usize,
}
impl MmapAnonymous {
fn new(pages: usize) -> MmapAnonymous {
let len = pages * rustix::param::page_size();
let ptr = unsafe {
mmap_anonymous(
ptr::null_mut(),
len,
ProtFlags::READ | ProtFlags::WRITE,
MapFlags::PRIVATE,
)
.unwrap()
};
MmapAnonymous { ptr, len }
}
fn read(&self, page: usize) {
unsafe {
let offset = page * rustix::param::page_size();
assert!(offset < self.len);
std::ptr::read_volatile(self.ptr.cast::<u8>().add(offset));
}
}
fn write(&self, page: usize) {
unsafe {
let offset = page * rustix::param::page_size();
assert!(offset < self.len);
std::ptr::write_volatile(self.ptr.cast::<u8>().add(offset), 1);
}
}
fn region(&self) -> *const [u8] {
ptr::slice_from_raw_parts(self.ptr.cast(), self.len)
}
fn page_region(&self, pages: std::ops::Range<usize>) -> *const [u8] {
ptr::slice_from_raw_parts(
self.ptr
.cast::<u8>()
.wrapping_add(pages.start * rustix::param::page_size()),
(pages.end - pages.start) * rustix::param::page_size(),
)
}
fn end(&self) -> *const u8 {
self.ptr.cast::<u8>().wrapping_add(self.len)
}
fn page_end(&self, page: usize) -> *const u8 {
self.ptr
.cast::<u8>()
.wrapping_add((page + 1) * rustix::param::page_size())
}
}
impl Drop for MmapAnonymous {
fn drop(&mut self) {
unsafe {
munmap(self.ptr, self.len).unwrap();
}
}
}
fn ioctl_supported() -> bool {
let mmap = MmapAnonymous::new(1);
let mut results = Vec::with_capacity(1);
let fd = File::open("/proc/self/pagemap").unwrap();
unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(mmap.region())
.category_mask(Categories::WRITTEN)
.return_mask(Categories::all())
.build(results.spare_capacity_mut()),
)
.is_ok()
}
}
#[test]
fn no_pages_returned() {
if !ioctl_supported() {
return;
}
let mmap = MmapAnonymous::new(10);
let mut results = Vec::with_capacity(10);
let fd = File::open("/proc/self/pagemap").unwrap();
let result = unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(mmap.region())
.category_mask(Categories::WRITTEN)
.return_mask(Categories::all())
.build(results.spare_capacity_mut()),
)
.unwrap()
};
assert!(result.regions().is_empty());
assert_eq!(result.walk_end(), mmap.end());
}
#[test]
fn empty_region() {
if !ioctl_supported() {
return;
}
let mut results = Vec::with_capacity(10);
let fd = File::open("/proc/self/pagemap").unwrap();
let empty_region = ptr::slice_from_raw_parts(rustix::param::page_size() as *const u8, 0);
let result = unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(empty_region)
.return_mask(Categories::all())
.build(results.spare_capacity_mut()),
)
.unwrap()
};
assert!(result.regions().is_empty());
}
#[test]
fn basic_page_flags() {
if !ioctl_supported() {
return;
}
let mmap = MmapAnonymous::new(10);
let mut results = Vec::with_capacity(10);
let fd = File::open("/proc/self/pagemap").unwrap();
mmap.read(0);
mmap.write(1);
mmap.write(2);
mmap.read(3);
mmap.read(5);
mmap.read(6);
let result = unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(mmap.region())
.category_mask(Categories::WRITTEN)
.return_mask(Categories::WRITTEN | Categories::PRESENT | Categories::PFNZERO)
.build(results.spare_capacity_mut()),
)
.unwrap()
};
assert_eq!(result.regions().len(), 4);
assert_eq!(result.walk_end(), mmap.end());
assert_eq!(result.regions()[0].region(), mmap.page_region(0..1));
assert_eq!(
result.regions()[0].categories(),
Categories::WRITTEN | Categories::PRESENT | Categories::PFNZERO
);
assert_eq!(result.regions()[1].region(), mmap.page_region(1..3));
assert_eq!(
result.regions()[1].categories(),
Categories::WRITTEN | Categories::PRESENT
);
assert_eq!(result.regions()[2].region(), mmap.page_region(3..4));
assert_eq!(
result.regions()[2].categories(),
Categories::WRITTEN | Categories::PRESENT | Categories::PFNZERO
);
assert_eq!(result.regions()[3].region(), mmap.page_region(5..7));
assert_eq!(
result.regions()[3].categories(),
Categories::WRITTEN | Categories::PRESENT | Categories::PFNZERO
);
}
#[test]
fn only_written_pages() {
if !ioctl_supported() {
return;
}
let mmap = MmapAnonymous::new(10);
let mut results = Vec::with_capacity(10);
let fd = File::open("/proc/self/pagemap").unwrap();
mmap.read(0);
mmap.write(1);
mmap.write(2);
mmap.read(3);
mmap.read(5);
mmap.read(6);
let result = unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(mmap.region())
.category_inverted(Categories::PFNZERO)
.category_mask(Categories::WRITTEN | Categories::PFNZERO)
.return_mask(Categories::WRITTEN | Categories::PRESENT | Categories::PFNZERO)
.build(results.spare_capacity_mut()),
)
.unwrap()
};
assert_eq!(result.regions().len(), 1);
assert_eq!(result.walk_end(), mmap.end());
assert_eq!(result.regions()[0].region(), mmap.page_region(1..3));
assert_eq!(
result.regions()[0].categories(),
Categories::WRITTEN | Categories::PRESENT
);
}
#[test]
fn region_limit() {
if !ioctl_supported() {
return;
}
let mmap = MmapAnonymous::new(10);
let mut results = Vec::with_capacity(1);
let fd = File::open("/proc/self/pagemap").unwrap();
mmap.read(0);
mmap.write(1);
mmap.read(2);
mmap.write(3);
let result = unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(mmap.region())
.return_mask(Categories::WRITTEN | Categories::PFNZERO)
.build(results.spare_capacity_mut()),
)
.unwrap()
};
assert_eq!(result.regions().len(), 1);
assert_eq!(result.walk_end(), mmap.page_end(0));
assert_eq!(result.regions()[0].region(), mmap.page_region(0..1));
assert_eq!(
result.regions()[0].categories(),
Categories::WRITTEN | Categories::PFNZERO
);
let result = unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(mmap.region())
.return_mask(Categories::WRITTEN)
.build(results.spare_capacity_mut()),
)
.unwrap()
};
assert_eq!(result.regions().len(), 1);
assert_eq!(result.walk_end(), mmap.page_end(3));
assert_eq!(result.regions()[0].region(), mmap.page_region(0..4));
assert_eq!(result.regions()[0].categories(), Categories::WRITTEN);
}
#[test]
fn page_limit() {
if !ioctl_supported() {
return;
}
let mmap = MmapAnonymous::new(10);
let mut results = Vec::with_capacity(10);
let fd = File::open("/proc/self/pagemap").unwrap();
mmap.read(0);
mmap.read(1);
mmap.read(2);
mmap.read(3);
let result = unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(mmap.region())
.return_mask(Categories::WRITTEN | Categories::PFNZERO)
.max_pages(2)
.build(results.spare_capacity_mut()),
)
.unwrap()
};
assert_eq!(result.regions().len(), 1);
assert_eq!(result.walk_end(), mmap.page_end(1));
assert_eq!(result.regions()[0].region(), mmap.page_region(0..2));
assert_eq!(
result.regions()[0].categories(),
Categories::WRITTEN | Categories::PFNZERO
);
}
#[test]
fn page_limit_with_hole() {
if !ioctl_supported() {
return;
}
let mmap = MmapAnonymous::new(10);
let mut results = Vec::with_capacity(10);
let fd = File::open("/proc/self/pagemap").unwrap();
mmap.read(0);
mmap.read(2);
mmap.read(3);
let result = unsafe {
ioctl(
&fd,
PageMapScanBuilder::new(mmap.region())
.category_mask(Categories::WRITTEN)
.return_mask(Categories::WRITTEN | Categories::PFNZERO)
.max_pages(2)
.build(results.spare_capacity_mut()),
)
.unwrap()
};
assert_eq!(result.regions().len(), 2);
assert_eq!(result.walk_end(), mmap.page_end(2));
assert_eq!(result.regions()[0].region(), mmap.page_region(0..1));
assert_eq!(
result.regions()[0].categories(),
Categories::WRITTEN | Categories::PFNZERO
);
assert_eq!(result.regions()[1].region(), mmap.page_region(2..3));
assert_eq!(
result.regions()[1].categories(),
Categories::WRITTEN | Categories::PFNZERO
);
}
}