use crate::hv_defs::Vtl;
use range_map_vec::RangeMap;
use std::collections::BTreeMap;
use thiserror::Error;
use zerocopy::FromBytes;
use zerocopy::FromZeros;
use zerocopy::Immutable;
use zerocopy::IntoBytes;
use zerocopy::KnownLayout;
use zerocopy::Unalign;
const X64_CR4_LA57: u64 = 0x0000000000001000;
const X64_PTE_PRESENT: u64 = 1;
const X64_PTE_READ_WRITE: u64 = 1 << 1;
const X64_PTE_ACCESSED: u64 = 1 << 5;
const X64_PTE_DIRTY: u64 = 1 << 6;
const X64_PTE_LARGE_PAGE: u64 = 1 << 7;
const PAGE_TABLE_ENTRY_COUNT: usize = 512;
const X64_PAGE_SHIFT: u64 = 12;
const X64_PTE_BITS: u64 = 9;
pub const X64_PAGE_SIZE: u64 = 4096;
pub const X64_LARGE_PAGE_SIZE: u64 = 0x200000;
pub const X64_1GB_PAGE_SIZE: u64 = 0x40000000;
#[derive(Copy, Clone, PartialEq, Eq, IntoBytes, Immutable, KnownLayout, FromBytes)]
#[repr(transparent)]
pub struct PageTableEntry {
entry: u64,
}
impl std::fmt::Debug for PageTableEntry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PageTableEntry")
.field("entry", &self.entry)
.field("is_present", &self.is_present())
.field("is_large_page", &self.is_large_page())
.field("gpa", &self.gpa())
.finish()
}
}
#[derive(Debug, Copy, Clone)]
pub enum PageTableEntryType {
Leaf1GbPage(u64),
Leaf2MbPage(u64),
Leaf4kPage(u64),
Pde(u64),
}
impl PageTableEntry {
pub fn set_entry(&mut self, entry_type: PageTableEntryType) {
self.entry = X64_PTE_PRESENT | X64_PTE_ACCESSED | X64_PTE_READ_WRITE;
match entry_type {
PageTableEntryType::Leaf1GbPage(address) => {
assert!(address % X64_1GB_PAGE_SIZE == 0);
self.entry |= address;
self.entry |= X64_PTE_LARGE_PAGE | X64_PTE_DIRTY;
}
PageTableEntryType::Leaf2MbPage(address) => {
assert!(address % X64_LARGE_PAGE_SIZE == 0);
self.entry |= address;
self.entry |= X64_PTE_LARGE_PAGE | X64_PTE_DIRTY;
}
PageTableEntryType::Leaf4kPage(address) => {
assert!(address % X64_PAGE_SIZE == 0);
self.entry |= address;
self.entry |= X64_PTE_DIRTY;
}
PageTableEntryType::Pde(address) => {
assert!(address % X64_PAGE_SIZE == 0);
self.entry |= address;
}
}
}
pub fn is_present(&self) -> bool {
self.entry & X64_PTE_PRESENT == X64_PTE_PRESENT
}
pub fn is_large_page(&self) -> bool {
self.entry & X64_PTE_LARGE_PAGE == X64_PTE_LARGE_PAGE
}
pub fn gpa(&self) -> Option<u64> {
if self.is_present() {
Some(self.entry & 0x000f_ffff_ffff_f000)
} else {
None
}
}
pub fn set_addr(&mut self, addr: u64) {
const VALID_BITS: u64 = 0x000f_ffff_ffff_f000;
assert!(addr & !VALID_BITS == 0);
self.entry &= !0x000f_ffff_ffff_f000;
self.entry |= addr;
}
pub fn clear(&mut self) {
self.entry = 0;
}
}
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, IntoBytes, Immutable, KnownLayout, FromBytes)]
pub struct PageTable {
entries: [PageTableEntry; PAGE_TABLE_ENTRY_COUNT],
}
impl PageTable {
pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut PageTableEntry> {
self.entries.iter_mut()
}
pub fn entry(&mut self, gva: u64, level: u8) -> &mut PageTableEntry {
let index = get_amd64_pte_index(gva, level as u64) as usize;
&mut self.entries[index]
}
}
impl std::ops::Index<usize> for PageTable {
type Output = PageTableEntry;
fn index(&self, index: usize) -> &Self::Output {
&self.entries[index]
}
}
impl std::ops::IndexMut<usize> for PageTable {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
&mut self.entries[index]
}
}
fn get_amd64_pte_index(gva: u64, page_map_level: u64) -> u64 {
let index = gva >> (X64_PAGE_SHIFT + page_map_level * X64_PTE_BITS);
index & ((1 << X64_PTE_BITS) - 1)
}
fn flatten_page_table(page_table: Vec<PageTable>) -> Vec<u8> {
let mut flat_tables = Vec::with_capacity(page_table.len() * X64_PAGE_SIZE as usize);
for table in page_table {
flat_tables.extend_from_slice(table.as_bytes());
}
flat_tables
}
#[derive(Debug, Error)]
pub enum Error {
#[error("page data length is not 4k")]
PageDataLength,
#[error("page data gpa not contained within builder region")]
PageDataGpa,
#[error("cr3 is not within the page table region")]
Cr3,
#[error("a relocation offset is not aligned to a page table mapping")]
UnalignedOffset {
va: u64,
page_table_entry_mapping_size: u64,
relocation_offset: i64,
},
#[error("page table region does not have enough free space to fix up page table")]
NotEnoughFreeSpace,
}
#[derive(Debug, Clone, Copy)]
pub struct CpuPagingState {
pub cr3: u64,
pub cr4: u64,
}
#[derive(Debug, Clone)]
pub struct PageTableRelocationBuilder {
pub gpa: u64,
pub size: u64,
pub used_size: u64,
page_data: Vec<u8>,
pub vp_index: u16,
pub vtl: Vtl,
}
impl PageTableRelocationBuilder {
pub fn new(gpa: u64, size: u64, used_size: u64, vp_index: u16, vtl: Vtl) -> Self {
assert!(used_size <= size);
PageTableRelocationBuilder {
gpa,
size,
used_size,
page_data: vec![0; used_size as usize],
vp_index,
vtl,
}
}
pub fn set_page_data(&mut self, gpa: u64, data: &[u8]) -> Result<(), Error> {
if data.is_empty() {
return Ok(());
}
if data.len() != X64_PAGE_SIZE as usize {
return Err(Error::PageDataLength);
}
if !self.contains(gpa) {
return Err(Error::PageDataGpa);
}
let start = (gpa - self.gpa) as usize;
let end = start + X64_PAGE_SIZE as usize;
self.page_data[start..end].copy_from_slice(data);
Ok(())
}
pub fn contains(&self, gpa: u64) -> bool {
let end = self.gpa + self.size;
gpa >= self.gpa && gpa < end
}
fn recurse_fixup(
&self,
table_reloc_offset: i64,
page_tables: &mut Vec<PageTable>,
entry_map: &mut BTreeMap<u64, (u8, PageTableEntry)>,
relocation_offsets: &RangeMap<u64, i64>,
table_index: usize,
level: u8,
mut current_va: u64,
) -> Result<(), Error> {
let mut entry_index = 0;
struct NextTableInfo {
table_index: usize,
current_va: u64,
}
while entry_index < 512 {
let table = &mut page_tables[table_index];
let mut recurse_table = None;
for entry in table.iter_mut().skip(entry_index) {
entry_index += 1;
let mapping_size = Self::mapping_size(level);
let entry_va = current_va;
current_va += mapping_size;
if entry.is_present() {
let is_pde_entry = match level {
3 => true, 2 | 1 => !entry.is_large_page(),
0 => false,
_ => unreachable!(),
};
if is_pde_entry {
let old_gpa = entry.gpa().expect("entry is present");
if let Some(index) = self.calculate_page_table_index(old_gpa) {
let new_gpa = Self::relocate_address(old_gpa, table_reloc_offset);
entry.set_addr(new_gpa);
recurse_table = Some(NextTableInfo {
table_index: index,
current_va: entry_va,
});
break;
} else {
continue;
}
}
let start = entry_va;
let end = entry_va + mapping_size - 1;
if let Some(offset) = relocation_offsets.get_range(start..=end) {
if offset.unsigned_abs() < mapping_size {
continue;
}
let new_va = Self::relocate_address(entry_va, *offset);
if new_va % mapping_size != 0 {
return Err(Error::UnalignedOffset {
va: entry_va,
page_table_entry_mapping_size: mapping_size,
relocation_offset: *offset,
});
}
let mut new_entry = *entry;
new_entry.set_addr(new_va);
assert!(entry_map.insert(new_va, (level, new_entry)).is_none());
entry.clear();
} else {
}
}
}
match recurse_table {
Some(info) => {
self.recurse_fixup(
table_reloc_offset,
page_tables,
entry_map,
relocation_offsets,
info.table_index,
level - 1,
info.current_va,
)?;
}
None => {
assert!(entry_index == 512);
}
}
}
Ok(())
}
fn relocate_address(addr: u64, offset: i64) -> u64 {
if offset >= 0 {
addr + offset as u64
} else {
addr - (offset as u64)
}
}
fn mapping_size(level: u8) -> u64 {
const SIZE_512_GB: u64 = 0x8000000000;
match level {
3 => SIZE_512_GB,
2 => X64_1GB_PAGE_SIZE,
1 => X64_LARGE_PAGE_SIZE,
0 => X64_PAGE_SIZE,
_ => unreachable!(),
}
}
fn calculate_page_table_index(&self, page_table_gpa: u64) -> Option<usize> {
if self.contains(page_table_gpa) {
Some(((page_table_gpa - self.gpa) / X64_PAGE_SIZE) as usize)
} else {
None
}
}
fn calculate_page_table_addr(region_base_gpa: u64, page_table_index: usize) -> u64 {
region_base_gpa + page_table_index as u64 * X64_PAGE_SIZE
}
pub fn build(
self,
table_reloc_offset: i64,
relocation_offsets: RangeMap<u64, i64>,
paging_state: CpuPagingState,
) -> Result<Vec<u8>, Error> {
assert_eq!(self.page_data.len() as u64, self.used_size);
let CpuPagingState { cr3: old_cr3, cr4 } = paging_state;
if cr4 & X64_CR4_LA57 == X64_CR4_LA57 {
todo!("handle 5 level paging")
}
if !self.contains(old_cr3) {
return Err(Error::Cr3);
}
let mut page_tables = <[Unalign<PageTable>]>::ref_from_bytes(self.page_data.as_slice())
.expect("page data is a valid list of page tables")
.iter()
.map(|v| v.into_inner())
.collect();
let mut entry_map: BTreeMap<u64, (u8, PageTableEntry)> = BTreeMap::new();
self.recurse_fixup(
table_reloc_offset,
&mut page_tables,
&mut entry_map,
&relocation_offsets,
self.calculate_page_table_index(old_cr3)
.expect("region must contain cr3"),
3,
0,
)?;
let new_cr3 = Self::relocate_address(old_cr3, table_reloc_offset);
let free_table_count = (self.size - self.used_size) / X64_PAGE_SIZE;
let mut free_table_index = page_tables.len();
for _ in 0..free_table_count {
page_tables.push(PageTable::new_zeroed());
}
let page_table_len = page_tables.len();
let reloc_builder = PageTableRelocationBuilder {
gpa: Self::relocate_address(self.gpa, table_reloc_offset),
page_data: Vec::new(),
..self
};
for (gva, (entry_level, new_entry)) in entry_map.iter() {
let mut page_table_gpa = new_cr3;
let mut level = 3;
loop {
let table_index = reloc_builder
.calculate_page_table_index(page_table_gpa)
.expect("should be part of relocation region");
let entry = page_tables[table_index].entry(*gva, level);
if level == *entry_level {
if entry.is_present() {
assert_eq!(*entry, *new_entry);
tracing::warn!(
gva,
"page table entry relocated to an already existing identical entry"
);
} else {
*entry = *new_entry;
}
break;
} else {
if entry.is_present() {
page_table_gpa = entry.gpa().expect("entry is present");
} else {
assert!(level > 0);
if free_table_index == page_table_len {
return Err(Error::NotEnoughFreeSpace);
}
let new_table_index = free_table_index;
free_table_index += 1;
let new_table_gpa =
Self::calculate_page_table_addr(reloc_builder.gpa, new_table_index);
entry.set_entry(PageTableEntryType::Pde(new_table_gpa));
page_table_gpa = new_table_gpa;
}
level -= 1;
}
}
}
page_tables.truncate(free_table_index);
Ok(flatten_page_table(page_tables))
}
}
#[cfg(test)]
mod tests {
use super::flatten_page_table;
use super::CpuPagingState;
use super::PageTable;
use super::PageTableEntryType;
use super::PageTableRelocationBuilder;
use super::X64_1GB_PAGE_SIZE;
use super::X64_LARGE_PAGE_SIZE;
use super::X64_PAGE_SIZE;
use crate::hv_defs::Vtl;
use range_map_vec::RangeMap;
use zerocopy::FromBytes;
use zerocopy::FromZeros;
use zerocopy::Unalign;
#[derive(Debug, Clone)]
struct PteInfo {
va: u64,
value: PageTableEntryType,
}
fn build_page_table(cr3: u64, size: usize, entries: Vec<PteInfo>) -> Vec<u8> {
let mut page_tables = vec![PageTable::new_zeroed(); size];
let mut free_index = 1;
let calculate_page_table_index =
|page_table_gpa| -> usize { ((page_table_gpa - cr3) / X64_PAGE_SIZE) as usize };
for PteInfo { va, value } in entries {
let mut page_table_gpa = cr3;
let mut level = 3;
let entry_level = match &value {
PageTableEntryType::Leaf1GbPage(_) => 2,
PageTableEntryType::Leaf2MbPage(_) => 1,
PageTableEntryType::Leaf4kPage(_) => 0,
PageTableEntryType::Pde(_) => 0, };
loop {
let table_index = calculate_page_table_index(page_table_gpa);
let entry = page_tables[table_index].entry(va, level);
if level == entry_level {
if !matches!(value, PageTableEntryType::Pde(_)) {
assert!(!entry.is_present());
entry.set_entry(value);
}
break;
} else {
if entry.is_present() {
page_table_gpa = entry.gpa().expect("entry is present");
} else {
assert!(level > 0);
let new_table_index = free_index;
assert!(new_table_index < size);
free_index += 1;
let new_table_gpa = PageTableRelocationBuilder::calculate_page_table_addr(
cr3,
new_table_index,
);
entry.set_entry(PageTableEntryType::Pde(new_table_gpa));
page_table_gpa = new_table_gpa;
}
level -= 1;
}
}
}
page_tables.truncate(free_index);
flatten_page_table(page_tables)
}
#[test]
fn builder_test_relocation() {
let original_entries = vec![
PteInfo {
va: 0,
value: PageTableEntryType::Leaf4kPage(0),
},
PteInfo {
va: X64_PAGE_SIZE,
value: PageTableEntryType::Leaf4kPage(X64_PAGE_SIZE),
},
PteInfo {
va: X64_LARGE_PAGE_SIZE,
value: PageTableEntryType::Leaf2MbPage(X64_LARGE_PAGE_SIZE),
},
PteInfo {
va: 2 * X64_LARGE_PAGE_SIZE,
value: PageTableEntryType::Leaf2MbPage(2 * X64_LARGE_PAGE_SIZE),
},
PteInfo {
va: X64_1GB_PAGE_SIZE,
value: PageTableEntryType::Leaf1GbPage(X64_1GB_PAGE_SIZE),
},
PteInfo {
va: 2 * X64_1GB_PAGE_SIZE,
value: PageTableEntryType::Leaf1GbPage(2 * X64_1GB_PAGE_SIZE),
},
];
let small_reloc = 0x100000; let med_reloc = 0x100000 * 8; let large_reloc = X64_1GB_PAGE_SIZE * 5; let reloc_entries = vec![
PteInfo {
va: X64_PAGE_SIZE,
value: PageTableEntryType::Leaf4kPage(X64_PAGE_SIZE),
},
PteInfo {
va: small_reloc,
value: PageTableEntryType::Leaf4kPage(small_reloc),
},
PteInfo {
va: X64_LARGE_PAGE_SIZE + med_reloc,
value: PageTableEntryType::Leaf2MbPage(X64_LARGE_PAGE_SIZE + med_reloc),
},
PteInfo {
va: 2 * X64_LARGE_PAGE_SIZE,
value: PageTableEntryType::Leaf2MbPage(2 * X64_LARGE_PAGE_SIZE),
},
PteInfo {
va: X64_1GB_PAGE_SIZE + large_reloc,
value: PageTableEntryType::Leaf1GbPage(X64_1GB_PAGE_SIZE + large_reloc),
},
PteInfo {
va: 2 * X64_1GB_PAGE_SIZE,
value: PageTableEntryType::Leaf1GbPage(2 * X64_1GB_PAGE_SIZE),
},
];
let cr3 = 1024 * X64_1GB_PAGE_SIZE;
let original_tables = build_page_table(cr3, 4, original_entries);
let cr3_offset = 1024 * X64_1GB_PAGE_SIZE;
let new_tables = build_page_table(cr3 + cr3_offset, 4, reloc_entries);
let mut builder = PageTableRelocationBuilder::new(
cr3,
(original_tables.len() * 4) as u64, original_tables.len() as u64,
0,
Vtl::Vtl0,
);
original_tables
.as_slice()
.chunks_exact(X64_PAGE_SIZE as usize)
.enumerate()
.for_each(|(index, chunk)| {
builder
.set_page_data(cr3 + index as u64 * X64_PAGE_SIZE, chunk)
.unwrap()
});
let mut reloc_map = RangeMap::new();
reloc_map.insert(0..=X64_PAGE_SIZE - 1, small_reloc as i64);
reloc_map.insert(
X64_LARGE_PAGE_SIZE..=X64_LARGE_PAGE_SIZE * 2 - 1,
med_reloc as i64,
);
reloc_map.insert(
X64_1GB_PAGE_SIZE..=X64_1GB_PAGE_SIZE * 2 - 1,
large_reloc as i64,
);
let built_tables = builder
.build(cr3_offset as i64, reloc_map, CpuPagingState { cr3, cr4: 0 })
.unwrap();
let expected: Vec<PageTable> =
<[Unalign<PageTable>]>::ref_from_bytes(new_tables.as_slice())
.expect("page data is a valid list of page tables")
.iter()
.map(|v| v.into_inner())
.collect();
let actual: Vec<PageTable> =
<[Unalign<PageTable>]>::ref_from_bytes(built_tables.as_slice())
.expect("page data is a valid list of page tables")
.iter()
.map(|v| v.into_inner())
.collect();
assert_eq!(expected.len(), actual.len());
compare_page_tables(&expected, &actual);
}
fn compare_page_tables(left: &[PageTable], right: &[PageTable]) {
for (table_index, (left, right)) in left.iter().zip(right.iter()).enumerate() {
for (pte_index, (left, right)) in
left.entries.iter().zip(right.entries.iter()).enumerate()
{
assert_eq!(left, right, "table {} pte {}", table_index, pte_index);
}
}
}
#[test]
fn builder_illegal_reloc() {
let original_entries = vec![
PteInfo {
va: 0,
value: PageTableEntryType::Leaf4kPage(0),
},
PteInfo {
va: X64_PAGE_SIZE,
value: PageTableEntryType::Leaf4kPage(X64_PAGE_SIZE),
},
PteInfo {
va: X64_LARGE_PAGE_SIZE,
value: PageTableEntryType::Leaf2MbPage(X64_LARGE_PAGE_SIZE),
},
PteInfo {
va: 2 * X64_LARGE_PAGE_SIZE,
value: PageTableEntryType::Leaf2MbPage(2 * X64_LARGE_PAGE_SIZE),
},
];
let small_reloc = 0x100000; let med_reloc = 3 * 0x100000; let cr3 = 1024 * X64_1GB_PAGE_SIZE;
let original_tables = build_page_table(cr3, 4, original_entries);
let cr3_offset = 1024 * X64_1GB_PAGE_SIZE;
let mut builder = PageTableRelocationBuilder::new(
cr3,
(original_tables.len() * 4) as u64, original_tables.len() as u64,
0,
Vtl::Vtl0,
);
original_tables
.as_slice()
.chunks_exact(X64_PAGE_SIZE as usize)
.enumerate()
.for_each(|(index, chunk)| {
builder
.set_page_data(cr3 + index as u64 * X64_PAGE_SIZE, chunk)
.unwrap()
});
let mut reloc_map = RangeMap::new();
reloc_map.insert(0..=X64_PAGE_SIZE - 1, small_reloc as i64);
reloc_map.insert(
X64_LARGE_PAGE_SIZE..=X64_LARGE_PAGE_SIZE * 2 - 1,
med_reloc as i64,
);
let built_tables =
builder.build(cr3_offset as i64, reloc_map, CpuPagingState { cr3, cr4: 0 });
assert!(built_tables.is_err());
}
#[test]
fn builder_test_allocation() {
let original_entries = vec![
PteInfo {
va: 0,
value: PageTableEntryType::Leaf4kPage(0),
},
PteInfo {
va: X64_LARGE_PAGE_SIZE,
value: PageTableEntryType::Leaf2MbPage(X64_LARGE_PAGE_SIZE),
},
PteInfo {
va: X64_1GB_PAGE_SIZE,
value: PageTableEntryType::Leaf1GbPage(X64_1GB_PAGE_SIZE),
},
];
let reloc = X64_1GB_PAGE_SIZE * 512;
let reloc_entries = vec![
PteInfo {
va: 0,
value: PageTableEntryType::Pde(0),
},
PteInfo {
va: reloc,
value: PageTableEntryType::Leaf4kPage(reloc),
},
PteInfo {
va: X64_LARGE_PAGE_SIZE + reloc,
value: PageTableEntryType::Leaf2MbPage(X64_LARGE_PAGE_SIZE + reloc),
},
PteInfo {
va: X64_1GB_PAGE_SIZE + reloc,
value: PageTableEntryType::Leaf1GbPage(X64_1GB_PAGE_SIZE + reloc),
},
];
let cr3 = 2048 * X64_1GB_PAGE_SIZE;
let original_tables = build_page_table(cr3, 4, original_entries);
let new_tables = build_page_table(cr3, 8, reloc_entries);
let mut builder = PageTableRelocationBuilder::new(
cr3,
original_tables.len() as u64 * 2,
original_tables.len() as u64,
0,
Vtl::Vtl0,
);
original_tables
.as_slice()
.chunks_exact(X64_PAGE_SIZE as usize)
.enumerate()
.for_each(|(index, chunk)| {
builder
.set_page_data(cr3 + index as u64 * X64_PAGE_SIZE, chunk)
.unwrap()
});
let mut reloc_map = RangeMap::new();
reloc_map.insert(0..=2 * X64_1GB_PAGE_SIZE - 1, reloc as i64);
let built_tables = builder
.build(0, reloc_map, CpuPagingState { cr3, cr4: 0 })
.unwrap();
let expected: Vec<PageTable> =
<[Unalign<PageTable>]>::ref_from_bytes(new_tables.as_slice())
.expect("page data is a valid list of page tables")
.iter()
.map(|v| v.into_inner())
.collect();
let actual: Vec<PageTable> =
<[Unalign<PageTable>]>::ref_from_bytes(built_tables.as_slice())
.expect("page data is a valid list of page tables")
.iter()
.map(|v| v.into_inner())
.collect();
compare_page_tables(&expected, &actual);
}
}