starry-kernel 0.5.10

A Linux-compatible OS kernel built on ArceOS unikernel
Documentation
use alloc::{
    boxed::Box,
    string::ToString,
    sync::{Arc, Weak},
    vec::Vec,
};
use core::sync::atomic::{AtomicUsize, Ordering};

use ax_errno::{AxError, AxResult};
use ax_fs::{CachedFile, FileFlags};
use ax_hal::paging::{MappingFlags, PageSize, PageTableCursor, PagingError};
use ax_memory_addr::{PAGE_SIZE_4K, VirtAddr, VirtAddrRange};
use ax_sync::Mutex;
use weak_map::StrongRef;

use super::{AddrSpace, Backend, BackendFileInfo, BackendOps, PopulateCallback, pages_in};

#[doc(hidden)]
pub struct FileBackendInner {
    shared: bool,
    file_data: Mutex<FileBackendInnerData>,
    cache: CachedFile,
    flags: FileFlags,
    handle: AtomicUsize,
    futex_handle: Arc<()>,
}

#[derive(Clone)]
struct FileBackendInnerData {
    start: VirtAddr,
    offset_page: u32,
}

impl Drop for FileBackendInner {
    fn drop(&mut self) {
        let handle = self.handle.load(Ordering::Acquire);
        if handle != 0 {
            unsafe {
                self.cache.remove_evict_listener(handle);
            }
        }
    }
}
impl FileBackendInner {
    pub fn register_listener(self: &Arc<Self>, aspace: &Arc<Mutex<AddrSpace>>) {
        if self.handle.load(Ordering::Acquire) != 0 {
            panic!("Listener already registered");
        }
        let aspace = Arc::downgrade(aspace);
        let handle = self.cache.add_evict_listener({
            let this = Arc::downgrade(self);
            move |pn, _page| {
                let Some(this) = this.upgrade() else {
                    return;
                };
                let Some(aspace) = aspace.upgrade() else {
                    // The address space has been dropped, nothing to do.
                    return;
                };
                let Some(mut aspace) = aspace.try_lock() else {
                    // This can happen during the populate process, when new pages
                    // are being populated and old pages are being evicted. In this
                    // case, we delegate the unmapping to the populate process.
                    return;
                };
                this.on_evict(pn, &mut aspace);
            }
        });
        self.handle.store(handle, Ordering::Release);
    }

    fn on_evict(self: &Arc<Self>, pn: u32, aspace: &mut AddrSpace) {
        let file_data = self.file_data.lock();
        let Some(pn) = pn.checked_sub(file_data.offset_page) else {
            return;
        };
        let vaddr = file_data.start + pn as usize * PageSize::Size4K as usize;
        if !aspace.find_area(vaddr).is_some_and(
            |it| matches!(it.backend(), Backend::File(file) if Arc::ptr_eq(&file.0, self)),
        ) {
            // Ignore if the page is not controlled by this file mapping.
            return;
        }

        let pt = aspace.page_table_mut();
        match pt.cursor().unmap(vaddr) {
            Ok(_) | Err(PagingError::NotMapped) => {}
            Err(err) => {
                warn!("Failed to unmap page {:?}: {:?}", vaddr, err);
            }
        }
    }
}

/// File-backed mapping backend.
#[derive(Clone)]
pub struct FileBackend(Arc<FileBackendInner>, Weak<Mutex<AddrSpace>>);
impl FileBackend {
    fn check_flags(&self, flags: MappingFlags) -> AxResult {
        let mut required_flags = FileFlags::empty();
        if flags.contains(MappingFlags::READ) {
            required_flags |= FileFlags::READ;
        }
        if flags.contains(MappingFlags::WRITE) {
            required_flags |= FileFlags::WRITE;
        }

        if !self.0.flags.contains(required_flags) {
            return Err(AxError::PermissionDenied);
        }
        Ok(())
    }

    /// Clone with a different start address and a fresh evict listener.
    pub fn with_start(&self, new_start: VirtAddr, aspace: &Arc<Mutex<AddrSpace>>) -> Self {
        let mut file_data = self.0.file_data.lock().clone();
        file_data.start = new_start;
        let inner = Arc::new(FileBackendInner {
            shared: self.0.shared,
            file_data: Mutex::new(file_data),
            cache: self.0.cache.clone(),
            flags: self.0.flags,
            handle: AtomicUsize::new(0),
            futex_handle: self.0.futex_handle.clone(),
        });
        inner.register_listener(aspace);
        Self(inner, aspace.downgrade())
    }

    pub fn futex_handle(&self) -> Weak<()> {
        Arc::downgrade(&self.0.futex_handle)
    }

    pub fn is_shared(&self) -> bool {
        self.0.shared
    }

    pub fn cache(&self) -> &CachedFile {
        &self.0.cache
    }

    pub fn writeback_and_protect(
        &self,
        _aspace: &mut AddrSpace,
        range_start: VirtAddr,
        range_end: VirtAddr,
        _area_flags: MappingFlags,
    ) -> AxResult {
        let file_data = self.0.file_data.lock();

        let offset_page = file_data.offset_page;
        let mapping_start = file_data.start;
        let mapping_size = (range_end - range_start).min(
            range_end
                .as_usize()
                .saturating_sub(mapping_start.as_usize()),
        );
        let local_start = range_start
            .as_usize()
            .saturating_sub(mapping_start.as_usize());
        let local_end = local_start + mapping_size;

        let start_pn = offset_page + (local_start / PAGE_SIZE_4K) as u32;
        let end_pn = offset_page + local_end.div_ceil(PAGE_SIZE_4K) as u32;

        let dirty_pns = self.0.cache.dirty_pages_in_range(start_pn, end_pn);

        if dirty_pns.is_empty() {
            return Ok(());
        }

        self.0
            .cache
            .writeback_pages(&dirty_pns)
            .map_err(|_| AxError::Io)?;

        Ok(())
    }

    pub fn file_info(&self) -> AxResult<BackendFileInfo> {
        let loc = self.0.cache.location();
        let name = loc.absolute_path().map(|pb| pb.to_string())?;
        let offset = (self.0.file_data.lock().offset_page as u64) * PAGE_SIZE_4K as u64;
        let inode = loc.inode();
        let dev = loc.metadata()?.device;
        Ok(BackendFileInfo {
            path: name,
            offset: Some(offset),
            inode: Some(inode),
            dev: Some(dev),
            shared: self.0.shared,
        })
    }
}

impl BackendOps for FileBackend {
    fn page_size(&self) -> PageSize {
        PageSize::Size4K
    }

    fn map(
        &self,
        _range: VirtAddrRange,
        flags: MappingFlags,
        _pt: &mut PageTableCursor,
    ) -> AxResult {
        self.check_flags(flags)
    }

    fn unmap(&self, range: VirtAddrRange, pt: &mut PageTableCursor) -> AxResult {
        for addr in pages_in(range, PageSize::Size4K)? {
            match pt.unmap(addr) {
                Ok(_) | Err(PagingError::NotMapped) => {}
                Err(err) => {
                    warn!("Failed to unmap page {:?}: {:?}", addr, err);
                    return Err(err.into());
                }
            }
        }
        Ok(())
    }

    fn on_protect(
        &self,
        _range: VirtAddrRange,
        new_flags: MappingFlags,
        _pt: &mut PageTableCursor,
    ) -> AxResult {
        self.check_flags(new_flags)
    }

    fn populate(
        &self,
        range: VirtAddrRange,
        flags: MappingFlags,
        access_flags: MappingFlags,
        pt: &mut PageTableCursor,
    ) -> AxResult<(usize, Option<PopulateCallback>)> {
        let mut pages = 0;
        let mut to_be_evicted = Vec::new();
        let file_data = self.0.file_data.lock();
        let start_page =
            ((range.start - file_data.start) / PAGE_SIZE_4K) as u32 + file_data.offset_page;
        for (i, addr) in pages_in(range, PageSize::Size4K)?.enumerate() {
            let pn = start_page + i as u32;
            match pt.query(addr) {
                Ok((paddr, page_flags, _)) => {
                    if access_flags.contains(MappingFlags::WRITE)
                        && !page_flags.contains(MappingFlags::WRITE)
                    {
                        let in_memory = self.0.cache.in_memory();
                        self.0.cache.with_page(pn, |page| {
                            if !in_memory {
                                page.expect("page should be present").mark_dirty();
                            }
                            pt.remap(addr, paddr, flags)?;
                            pages += 1;
                            AxResult::Ok(())
                        })?;
                    } else if page_flags.contains(access_flags) {
                        pages += 1;
                    }
                }
                // If the page is not mapped, try map it.
                Err(PagingError::NotMapped) => {
                    let map_flags = if self.0.cache.in_memory() {
                        // For in memory files, we don't need to (and also
                        // musn't) mark them dirty, so we can use the original
                        // flags.
                        flags
                    } else {
                        flags - MappingFlags::WRITE
                    };
                    self.0.cache.with_page_or_insert(pn, |page, evicted| {
                        if let Some((pn, _)) = evicted {
                            to_be_evicted.push(pn);
                        }
                        pt.map(addr, page.paddr(), PageSize::Size4K, map_flags)?;
                        pages += 1;
                        Ok(())
                    })?;
                }
                Err(_) => return Err(AxError::BadAddress),
            }
        }
        Ok((
            pages,
            if to_be_evicted.is_empty() {
                None
            } else {
                let inner = self.0.clone();
                Some(Box::new(move |aspace: &mut AddrSpace| {
                    for pn in to_be_evicted {
                        inner.on_evict(pn, aspace);
                    }
                }))
            },
        ))
    }

    fn clone_map(
        &self,
        _range: VirtAddrRange,
        _flags: MappingFlags,
        _old_pt: &mut PageTableCursor,
        _new_pt: &mut PageTableCursor,
        new_aspace: &Arc<Mutex<AddrSpace>>,
    ) -> AxResult<Backend> {
        let start = self.0.file_data.lock().start;
        Ok(Backend::File(self.with_start(start, new_aspace)))
    }

    fn split(&mut self, align_diff: usize) -> Option<Backend> {
        assert!(align_diff.is_multiple_of(PAGE_SIZE_4K));
        if align_diff == 0 {
            return None;
        }
        let file_data = self.0.file_data.lock();
        let inner = Arc::new(FileBackendInner {
            shared: self.0.shared,
            file_data: Mutex::new(FileBackendInnerData {
                start: file_data.start + align_diff,
                offset_page: file_data.offset_page + (align_diff / PAGE_SIZE_4K) as u32,
            }),
            cache: self.0.cache.clone(),
            flags: self.0.flags,
            handle: AtomicUsize::new(0),
            futex_handle: self.0.futex_handle.clone(),
        });

        {
            let aspace = self.1.upgrade()?;
            inner.register_listener(&aspace);
        }

        Some(Backend::File(FileBackend(inner, self.1.clone())))
    }

    fn shrink_left(&mut self, shrink_size: usize) {
        assert!(shrink_size.is_multiple_of(PAGE_SIZE_4K));

        let mut file_data = self.0.file_data.lock();
        file_data.start += shrink_size;
        file_data.offset_page += (shrink_size / PAGE_SIZE_4K) as u32;
    }

    fn shrink_right(&mut self, _shrink_size: usize) {
        // shrinking right does not require any action since the file backend does not have any state
    }
}

impl Backend {
    pub fn new_file(
        start: VirtAddr,
        cache: CachedFile,
        flags: FileFlags,
        offset: usize,
        aspace: &Arc<Mutex<AddrSpace>>,
        shared: bool,
    ) -> Self {
        let offset_page = (offset / PAGE_SIZE_4K) as u32;
        let inner = Arc::new(FileBackendInner {
            shared,
            file_data: Mutex::new(FileBackendInnerData { start, offset_page }),
            cache,
            flags,
            handle: AtomicUsize::new(0),
            futex_handle: Arc::new(()),
        });
        inner.register_listener(aspace);
        Self::File(FileBackend(inner, aspace.downgrade()))
    }
}