use std::collections::{HashMap, HashSet};
use vmi_arch_amd64::{Amd64, PageTableEntry, PageTableLevel};
use vmi_core::{
AddressContext, Architecture as _, Gfn, MemoryAccess, MemoryAccessOptions, Pa, Va, VcpuId,
View, VmiCore, VmiError,
driver::{VmiDriver, VmiRead, VmiSetProtection},
};
use super::super::{
ArchAdapter, PageEntryUpdate, PageTableMonitorAdapter, PageTableMonitorEvent, TagType,
};
type VaKey = (View, AddressContext);
type EntryKey = (View, Pa);
type TableKey = (View, Gfn);
struct MonitoredVa<Tag> {
tag: Tag,
paged_in: bool,
resolved_pa: Option<Pa>,
entry_keys: Vec<EntryKey>,
}
struct MonitoredEntry {
cached_pte: PageTableEntry,
va_levels: HashMap<VaKey, PageTableLevel>,
}
fn is_leaf(level: PageTableLevel, pte: PageTableEntry) -> bool {
level == PageTableLevel::Pt || pte.large()
}
fn leaf_pa(va: Va, level: PageTableLevel, pfn: Gfn) -> Pa {
Amd64::pa_from_gfn(pfn) + Amd64::va_offset_for(va, level)
}
fn read_pte<Driver>(vmi: &VmiCore<Driver>, pa: Pa) -> Result<PageTableEntry, VmiError>
where
Driver: VmiRead,
{
vmi.read_struct(pa)
}
impl<Driver, Tag> ArchAdapter<Driver, Tag> for Amd64
where
Driver: VmiDriver<Architecture = Amd64> + VmiRead + VmiSetProtection,
Tag: TagType,
{
type Monitor = PageTableMonitorAmd64<Tag>;
}
pub struct PageTableMonitorAmd64<Tag>
where
Tag: TagType,
{
vas: HashMap<VaKey, MonitoredVa<Tag>>,
entries: HashMap<EntryKey, MonitoredEntry>,
tables: HashMap<TableKey, usize>,
dirty: HashMap<VcpuId, HashSet<EntryKey>>,
}
impl<Tag> PageTableMonitorAmd64<Tag>
where
Tag: TagType,
{
fn add_table_ref<Driver>(
&mut self,
vmi: &VmiCore<Driver>,
gfn: Gfn,
view: View,
) -> Result<(), VmiError>
where
Driver: VmiRead + VmiSetProtection,
{
let table_key = (view, gfn);
let refcount = self.tables.entry(table_key).or_insert(0);
if *refcount == 0 {
vmi.set_memory_access_with_options(
gfn,
view,
MemoryAccess::R,
MemoryAccessOptions::IGNORE_PAGE_WALK_UPDATES,
)?;
}
*refcount += 1;
Ok(())
}
fn remove_table_ref<Driver>(&mut self, vmi: &VmiCore<Driver>, gfn: Gfn, view: View)
where
Driver: VmiRead + VmiSetProtection,
{
let table_key = (view, gfn);
if let Some(refcount) = self.tables.get_mut(&table_key) {
*refcount -= 1;
if *refcount == 0 {
self.tables.remove(&table_key);
match vmi.set_memory_access(gfn, view, MemoryAccess::RW) {
Ok(()) | Err(VmiError::ViewNotFound) => {}
Err(err) => {
tracing::warn!(%gfn, %view, %err, "failed to restore memory access");
}
}
}
}
}
fn detach_va_from_entry<Driver>(
&mut self,
vmi: &VmiCore<Driver>,
va_key: VaKey,
(view, pa): EntryKey,
) where
Driver: VmiRead + VmiSetProtection,
{
let entry_key = (view, pa);
let entry = match self.entries.get_mut(&entry_key) {
Some(entry) => entry,
None => return,
};
entry.va_levels.remove(&va_key);
if entry.va_levels.is_empty() {
self.entries.remove(&entry_key);
self.remove_table_ref(vmi, Amd64::gfn_from_pa(pa), view);
}
}
fn tear_down_subtree<Driver>(
&mut self,
vmi: &VmiCore<Driver>,
va_key: VaKey,
anchor_key: EntryKey,
) where
Driver: VmiRead + VmiSetProtection,
{
let va = match self.vas.get_mut(&va_key) {
Some(va) => va,
None => return,
};
let pos = match va.entry_keys.iter().position(|k| *k == anchor_key) {
Some(pos) => pos,
None => {
debug_assert!(
false,
"tear_down_subtree: anchor {anchor_key:?} not found in VA {va_key:?} entry_keys"
);
return;
}
};
let removed = va.entry_keys.drain(pos + 1..).collect::<Vec<_>>();
for ek in removed {
self.detach_va_from_entry(vmi, va_key, ek);
}
}
fn walk_subtree<Driver>(
&mut self,
vmi: &VmiCore<Driver>,
(view, ctx): VaKey,
start_gfn: Gfn,
parent_level: PageTableLevel,
events: &mut Vec<PageTableMonitorEvent>,
) -> Result<(), VmiError>
where
Driver: VmiRead + VmiSetProtection,
{
let va_key = (view, ctx);
let mut current_gfn = start_gfn;
let mut level_opt = parent_level.next();
while let Some(level) = level_opt {
let index = Amd64::va_index_for(ctx.va, level);
let entry_pa = Amd64::pa_from_gfn(current_gfn) + index * 8;
let entry_key = (view, entry_pa);
if !self.entries.contains_key(&entry_key) {
self.add_table_ref(vmi, current_gfn, view)?;
}
let pte = read_pte(vmi, entry_pa)?;
let entry = self.entries.entry(entry_key).or_insert(MonitoredEntry {
cached_pte: pte,
va_levels: HashMap::new(),
});
entry.va_levels.insert(va_key, level);
if let Some(va) = self.vas.get_mut(&va_key) {
va.entry_keys.push(entry_key);
}
if !pte.present() {
break;
}
if is_leaf(level, pte) {
let pa = leaf_pa(ctx.va, level, pte.pfn());
if let Some(va) = self.vas.get_mut(&va_key) {
va.paged_in = true;
va.resolved_pa = Some(pa);
}
events.push(PageTableMonitorEvent::PageIn(PageEntryUpdate {
view,
ctx,
pa,
}));
break;
}
current_gfn = pte.pfn();
level_opt = level.next();
}
Ok(())
}
}
impl<Driver, Tag> PageTableMonitorAdapter<Driver, Tag> for PageTableMonitorAmd64<Tag>
where
Driver: VmiDriver<Architecture = Amd64> + VmiRead + VmiSetProtection,
Tag: TagType,
{
fn new() -> Self {
Self {
vas: HashMap::new(),
entries: HashMap::new(),
tables: HashMap::new(),
dirty: HashMap::new(),
}
}
fn monitored_tables(&self) -> usize {
self.tables.len()
}
fn monitored_entries(&self) -> usize {
self.entries.len()
}
fn paged_in_entries(&self) -> usize {
self.vas.values().filter(|va| va.paged_in).count()
}
fn dump(&self) {
tracing::trace!(
tables = self.tables.len(),
entries = self.entries.len(),
vas = self.vas.len(),
paged_in = self.vas.values().filter(|va| va.paged_in).count(),
"page table monitor state"
);
for (&(view, ctx), va) in &self.vas {
tracing::trace!(
va = %ctx.va,
root = %ctx.root,
view = %view,
tag = ?va.tag,
paged_in = va.paged_in,
resolved_pa = ?va.resolved_pa,
chain_len = va.entry_keys.len(),
" monitored VA"
);
}
}
fn monitor(
&mut self,
vmi: &VmiCore<Driver>,
ctx: impl Into<AddressContext>,
view: View,
tag: Tag,
) -> Result<(), VmiError> {
let ctx = ctx.into();
let va_key = (view, ctx);
if self.vas.contains_key(&va_key) {
self.unmonitor(vmi, ctx, view)?;
}
let mut entry_keys = Vec::new();
let mut current_gfn = Amd64::gfn_from_pa(ctx.root);
let mut paged_in = false;
let mut resolved_pa = None;
let mut level_opt = Some(PageTableLevel::Pml4);
while let Some(level) = level_opt {
let index = Amd64::va_index_for(ctx.va, level);
let entry_pa = Amd64::pa_from_gfn(current_gfn) + index * 8;
let entry_key = (view, entry_pa);
if !self.entries.contains_key(&entry_key) {
self.add_table_ref(vmi, current_gfn, view)?;
}
let pte = read_pte(vmi, entry_pa)?;
let entry = self.entries.entry(entry_key).or_insert(MonitoredEntry {
cached_pte: pte,
va_levels: HashMap::new(),
});
entry.cached_pte = pte;
entry.va_levels.insert(va_key, level);
entry_keys.push(entry_key);
if !pte.present() {
break;
}
if is_leaf(level, pte) {
resolved_pa = Some(leaf_pa(ctx.va, level, pte.pfn()));
paged_in = true;
break;
}
current_gfn = pte.pfn();
level_opt = level.next();
}
self.vas.insert(
va_key,
MonitoredVa {
tag,
paged_in,
resolved_pa,
entry_keys,
},
);
Ok(())
}
fn unmonitor(
&mut self,
vmi: &VmiCore<Driver>,
ctx: impl Into<AddressContext>,
view: View,
) -> Result<(), VmiError> {
let ctx = ctx.into();
let va_key = (view, ctx);
let va = match self.vas.remove(&va_key) {
Some(va) => va,
None => return Ok(()),
};
for entry_key in va.entry_keys {
self.detach_va_from_entry(vmi, va_key, entry_key);
}
Ok(())
}
fn unmonitor_all(&mut self, vmi: &VmiCore<Driver>) {
for &(view, gfn) in self.tables.keys() {
let _ = vmi.set_memory_access(gfn, view, MemoryAccess::RW);
}
self.tables.clear();
self.entries.clear();
self.vas.clear();
self.dirty.clear();
}
fn unmonitor_view(&mut self, vmi: &VmiCore<Driver>, view: View) {
let va_keys = self
.vas
.keys()
.filter(|&&(v, _)| v == view)
.copied()
.collect::<Vec<VaKey>>();
for (v, ctx) in va_keys {
debug_assert_eq!(v, view);
let _ = self.unmonitor(vmi, ctx, view);
}
}
fn mark_dirty_entry(&mut self, entry_pa: Pa, view: View, vcpu_id: VcpuId) -> bool {
let entry_key = (view, entry_pa);
if !self.entries.contains_key(&entry_key) {
return false;
}
self.dirty.entry(vcpu_id).or_default().insert(entry_key)
}
fn process_dirty_entries(
&mut self,
vmi: &VmiCore<Driver>,
vcpu_id: VcpuId,
) -> Result<Vec<PageTableMonitorEvent>, VmiError> {
let dirty_keys = self.dirty.remove(&vcpu_id).unwrap_or_default();
let mut to_process = Vec::new();
for key in dirty_keys {
if let Some(entry) = self.entries.get(&key) {
let max_level = match entry.va_levels.values().copied().max() {
Some(level) => level,
None => continue,
};
to_process.push((key, max_level));
}
}
to_process.sort_by_key(|b| std::cmp::Reverse(b.1));
let mut events = Vec::new();
for (entry_key, _) in to_process {
let entry = match self.entries.get(&entry_key) {
Some(entry) => entry,
None => continue,
};
let old_pte = entry.cached_pte;
let (_, entry_pa) = entry_key;
let new_pte = read_pte(vmi, entry_pa)?;
if old_pte == new_pte {
continue;
}
if let Some(entry) = self.entries.get_mut(&entry_key) {
entry.cached_pte = new_pte;
}
let old_present = old_pte.present();
let new_present = new_pte.present();
let va_levels = self.entries[&entry_key]
.va_levels
.iter()
.map(|(&k, &v)| (k, v))
.collect::<Vec<_>>();
for ((view, ctx), level) in va_levels {
let va_key = (view, ctx);
let old_leaf = old_present && is_leaf(level, old_pte);
let new_leaf = new_present && is_leaf(level, new_pte);
let need_teardown = old_present
&& (!new_present || old_pte.pfn() != new_pte.pfn() || old_leaf != new_leaf);
let need_setup = new_present
&& (!old_present || old_pte.pfn() != new_pte.pfn() || old_leaf != new_leaf);
if !need_teardown && !need_setup {
continue;
}
if need_teardown {
if let Some(va) = self.vas.get_mut(&va_key)
&& va.paged_in
{
if let Some(pa) = va.resolved_pa {
events.push(PageTableMonitorEvent::PageOut(PageEntryUpdate {
view,
ctx,
pa,
}));
}
va.paged_in = false;
va.resolved_pa = None;
}
if !old_leaf {
self.tear_down_subtree(vmi, va_key, entry_key);
}
}
if need_setup {
if new_leaf {
let pa = leaf_pa(ctx.va, level, new_pte.pfn());
if let Some(va) = self.vas.get_mut(&va_key) {
va.paged_in = true;
va.resolved_pa = Some(pa);
}
events.push(PageTableMonitorEvent::PageIn(PageEntryUpdate {
view,
ctx,
pa,
}));
}
else {
self.walk_subtree(vmi, va_key, new_pte.pfn(), level, &mut events)?;
}
}
}
}
Ok(events)
}
}