ostd 0.17.2

Rust OS framework that facilitates the development of and innovation in OS kernels
Documentation
// SPDX-License-Identifier: MPL-2.0

//! This module provides accessors to the page table entries in a node.

use super::{PageTableGuard, PageTableNode, PteState, PteStateRef, PteTrait};
use crate::{
    mm::{
        HasPaddr, nr_subpage_per_huge,
        page_prop::PageProperty,
        page_size,
        page_table::{PageTableConfig, PageTableNodeRef, PteScalar},
    },
    panic::PanicGuard,
    sync::RcuDrop,
    task::atomic_mode::InAtomicMode,
};

/// A view of an entry in a page table node.
///
/// It can be borrowed from a node using the [`PageTableGuard::entry`] method.
///
/// This is a static reference to an entry in a node that does not account for
/// a dynamic reference count to the child. It can be used to create a owned
/// handle, which is a [`PteState`].
pub(in crate::mm) struct Entry<'a, 'rcu, C: PageTableConfig> {
    /// The page table entry.
    ///
    /// We store the page table entry here to optimize the number of reads from
    /// the node. We cannot hold a `&mut E` reference to the entry because that
    /// other CPUs may modify the memory location for accessed/dirty bits. Such
    /// accesses will violate the aliasing rules of Rust and cause undefined
    /// behaviors.
    pte: C::E,
    /// The index of the entry in the node.
    idx: usize,
    /// The node that contains the entry.
    node: &'a mut PageTableGuard<'rcu, C>,
}

impl<'a, 'rcu, C: PageTableConfig> Entry<'a, 'rcu, C> {
    /// Gets a reference to the child.
    pub(in crate::mm) fn to_ref(&self) -> PteStateRef<'rcu, C> {
        // SAFETY:
        //  - The child pointed to by the PTE outlives the reference, since
        //    either PTs and mapped items outlive `'rcu`.
        //  - The level matches the current node.
        unsafe { PteStateRef::from_pte(&self.pte, self.node.level()) }
    }

    /// Operates on the mapping properties of the entry.
    ///
    /// It only modifies the properties if the entry is present.
    pub(in crate::mm) fn protect(&mut self, op: &mut impl FnMut(&mut PageProperty)) {
        let level = self.node.level();
        let PteScalar::Mapped(pa, prop) = self.pte.to_repr(level) else {
            return;
        };

        let mut new_prop = prop;
        op(&mut new_prop);

        if prop == new_prop {
            return;
        }

        self.pte = C::E::from_repr(&PteScalar::Mapped(pa, new_prop), level);

        // SAFETY:
        //  1. The index is within the bounds.
        //  2. We replace the PTE with a new one, which differs only in
        //     `PageProperty`, so it's in `C` and at the correct paging level.
        //  3. The child is still owned by the page table node.
        unsafe { self.node.write_pte(self.idx, self.pte) };
    }

    /// Replaces the entry with a new child.
    ///
    /// The old child is returned.
    ///
    /// # Panics
    ///
    /// The method panics if the level of the new child does not match the
    /// current node.
    pub(in crate::mm) fn replace(&mut self, new_child: PteState<C>) -> PteState<C> {
        match &new_child {
            PteState::PageTable(node) => {
                assert_eq!(node.level(), self.node.level() - 1);
            }
            PteState::Mapped(item) => {
                assert_eq!(C::item_raw_info(&**item).1, self.node.level());
            }
            PteState::Absent => {}
        }

        // SAFETY:
        //  - The PTE is not referenced by other `PteStateRef`s (since we have `&mut self`).
        //  - The level matches the current node.
        let old_child = unsafe { PteState::from_pte(self.pte, self.node.level()) };

        if old_child.is_absent() && !new_child.is_absent() {
            *self.node.nr_children_mut() += 1;
        } else if !old_child.is_absent() && new_child.is_absent() {
            *self.node.nr_children_mut() -= 1;
        }

        self.pte = new_child.into_pte();

        // SAFETY:
        //  1. The index is within the bounds.
        //  2. The new PTE is a child in `C` and at the correct paging level.
        //  3. The ownership of the child is passed to the page table node.
        unsafe { self.node.write_pte(self.idx, self.pte) };

        old_child
    }

    /// Allocates a new child page table node and replaces the entry with it.
    ///
    /// If the old entry is not none, the operation will fail and return `None`.
    /// Otherwise, the lock guard of the new child page table node is returned.
    pub(in crate::mm::page_table) fn alloc_if_none(
        &mut self,
        guard: &'rcu dyn InAtomicMode,
    ) -> Option<PageTableGuard<'rcu, C>> {
        if !matches!(self.to_ref(), PteStateRef::Absent) || self.node.level() == 1 {
            return None;
        }

        let level = self.node.level();
        let new_page = RcuDrop::new(PageTableNode::<C>::alloc(level - 1));

        let paddr = new_page.paddr();
        // SAFETY: The page table won't be dropped before the RCU grace period
        // ends, so it outlives `'rcu`.
        let pt_ref = unsafe { PageTableNodeRef::borrow_paddr(paddr) };

        // Lock before writing the PTE, so no one else can operate on it.
        let pt_lock_guard = pt_ref.lock(guard);

        self.pte = PteState::PageTable(new_page).into_pte();

        // SAFETY:
        //  1. The index is within the bounds.
        //  2. The new PTE is a child in `C` and at the correct paging level.
        //  3. The ownership of the child is passed to the page table node.
        unsafe { self.node.write_pte(self.idx, self.pte) };

        *self.node.nr_children_mut() += 1;

        Some(pt_lock_guard)
    }

    /// Splits the entry to smaller pages if it maps to a huge page.
    ///
    /// If the entry does map to a huge page, it is split into smaller pages
    /// mapped by a child page table node. The new child page table node
    /// is returned.
    ///
    /// If the entry does not map to a untracked huge page, the method returns
    /// `None`.
    pub(in crate::mm::page_table) fn split_if_mapped_huge(
        &mut self,
        guard: &'rcu dyn InAtomicMode,
    ) -> Option<PageTableGuard<'rcu, C>> {
        let level = self.node.level();
        let PteScalar::Mapped(pa, prop) = self.pte.to_repr(level) else {
            return None;
        };

        let new_page = RcuDrop::new(PageTableNode::<C>::alloc(level - 1));

        let paddr = new_page.paddr();
        // SAFETY: The page table won't be dropped before the RCU grace period
        // ends, so it outlives `'rcu`.
        let pt_ref = unsafe { PageTableNodeRef::borrow_paddr(paddr) };

        // Lock before writing the PTE, so no one else can operate on it.
        let mut pt_lock_guard = pt_ref.lock(guard);

        // Prevent double-dropping the small items when panicking (e.g., debug assertion fails).
        let panic_guard = PanicGuard::new();

        for i in 0..nr_subpage_per_huge::<C>() {
            let small_pa = pa + i * page_size::<C>(level - 1);
            let mut entry = pt_lock_guard.entry(i);
            // SAFETY: It's a part of the mapped item, and the ownership is
            // properly transferred to the new sub-entry.
            let small_item = unsafe { C::item_from_raw(small_pa, level - 1, prop) };
            let old = entry.replace(PteState::Mapped(RcuDrop::new(small_item)));
            debug_assert!(old.is_absent());
        }

        self.pte = PteState::PageTable(new_page).into_pte();

        // SAFETY:
        //  1. The index is within the bounds.
        //  2. The new PTE is a child in `C` and at the correct paging level.
        //  3. The ownership of the child is passed to the page table node.
        unsafe { self.node.write_pte(self.idx, self.pte) };

        panic_guard.forget();

        Some(pt_lock_guard)
    }

    /// Create a new entry at the node with guard.
    ///
    /// # Safety
    ///
    /// The caller must ensure that the index is within the bounds of the node.
    pub(super) unsafe fn new_at(guard: &'a mut PageTableGuard<'rcu, C>, idx: usize) -> Self {
        // SAFETY: The index is within the bound.
        let pte = unsafe { guard.read_pte(idx) };
        Self {
            pte,
            idx,
            node: guard,
        }
    }
}