Skip to main content

secure_exec_kernel/
kernel.rs

1use crate::bridge::LifecycleState;
2use crate::command_registry::{CommandDriver, CommandRegistry};
3use crate::device_layer::{create_device_layer, DeviceLayer};
4use crate::dns::{
5    format_dns_resource, resolve_dns, resolve_dns_records, DnsConfig, DnsLookupPolicy,
6    DnsRecordResolution, DnsResolution, DnsResolverErrorKind, HickoryDnsResolver,
7    SharedDnsResolver,
8};
9use crate::fd_table::{
10    FdEntry, FdStat, FdTableError, FdTableManager, FileDescription, FileLockManager,
11    FileLockTarget, FlockOperation, ProcessFdTable, FILETYPE_CHARACTER_DEVICE, FILETYPE_DIRECTORY,
12    FILETYPE_PIPE, FILETYPE_REGULAR_FILE, FILETYPE_SYMBOLIC_LINK, F_DUPFD, O_APPEND, O_CREAT,
13    O_EXCL, O_NONBLOCK, O_TRUNC,
14};
15use crate::mount_table::{MountEntry, MountOptions, MountTable, MountedFileSystem};
16use crate::permissions::{
17    check_command_execution, check_network_access, FsOperation, NetworkOperation, PermissionError,
18    PermissionedFileSystem, Permissions,
19};
20use crate::pipe_manager::{PipeError, PipeManager};
21use crate::poll::{
22    PollEvents, PollFd, PollNotifier, PollResult, PollTarget, PollTargetEntry, PollTargetResult,
23    POLLERR, POLLHUP, POLLIN, POLLNVAL, POLLOUT,
24};
25use crate::process_table::{
26    DriverProcess, ProcessContext, ProcessExitCallback, ProcessInfo, ProcessStatus, ProcessTable,
27    ProcessTableError, ProcessWaitResult, SigmaskHow, SignalSet, DEFAULT_PROCESS_UMASK, SIGCONT,
28    SIGPIPE, SIGSTOP, SIGTSTP, SIGWINCH,
29};
30use crate::pty::{LineDisciplineConfig, PartialTermios, PtyError, PtyManager, Termios};
31use crate::resource_accounting::{
32    measure_filesystem_usage, FileSystemUsage, ResourceAccountant, ResourceError, ResourceLimits,
33    ResourceSnapshot, DEFAULT_MAX_OPEN_FDS,
34};
35use crate::root_fs::{RootFileSystem, RootFilesystemError, RootFilesystemSnapshot};
36use crate::socket_table::{
37    DatagramSocketOption, InetSocketAddress, ReceivedDatagram, SocketId, SocketMulticastMembership,
38    SocketRecord, SocketShutdown, SocketSpec, SocketState, SocketTable, SocketTableError,
39    SocketType,
40};
41use crate::user::{ProcessIdentity, UserConfig, UserManager};
42use crate::vfs::{
43    normalize_path, VfsError, VfsResult, VirtualFileSystem, VirtualStat, VirtualTimeSpec,
44    VirtualUtimeSpec,
45};
46use hickory_resolver::proto::rr::RecordType;
47use std::any::Any;
48use std::collections::{BTreeMap, BTreeSet};
49use std::error::Error;
50use std::fmt;
51#[cfg(test)]
52use std::sync::OnceLock;
53use std::sync::{Arc, Condvar, Mutex, MutexGuard, WaitTimeoutResult};
54use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
55
56pub type KernelResult<T> = Result<T, KernelError>;
57pub use crate::process_table::{ProcessWaitEvent as WaitPidEvent, WaitPidFlags};
58
59pub const SEEK_SET: u8 = 0;
60pub const SEEK_CUR: u8 = 1;
61pub const SEEK_END: u8 = 2;
62const EXECUTABLE_PERMISSION_BITS: u32 = 0o111;
63const SHEBANG_LINE_MAX_BYTES: usize = 256;
64
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct KernelError {
67    code: &'static str,
68    message: String,
69}
70
71impl KernelError {
72    pub fn code(&self) -> &'static str {
73        self.code
74    }
75
76    fn new(code: &'static str, message: impl Into<String>) -> Self {
77        Self {
78            code,
79            message: message.into(),
80        }
81    }
82
83    fn disposed() -> Self {
84        Self::new("EINVAL", "kernel VM is disposed")
85    }
86
87    fn no_such_process(pid: u32) -> Self {
88        Self::new("ESRCH", format!("no such process {pid}"))
89    }
90
91    fn bad_file_descriptor(fd: u32) -> Self {
92        Self::new("EBADF", format!("bad file descriptor {fd}"))
93    }
94
95    fn permission_denied(message: impl Into<String>) -> Self {
96        Self::new("EPERM", message)
97    }
98
99    fn command_not_found(command: &str) -> Self {
100        Self::new("ENOENT", format!("command not found: {command}"))
101    }
102}
103
104impl fmt::Display for KernelError {
105    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106        write!(f, "{}: {}", self.code, self.message)
107    }
108}
109
110impl Error for KernelError {}
111
112#[derive(Clone)]
113pub struct KernelVmConfig {
114    pub vm_id: String,
115    pub env: BTreeMap<String, String>,
116    pub cwd: String,
117    pub user: UserConfig,
118    pub permissions: Permissions,
119    pub dns: DnsConfig,
120    pub dns_resolver: SharedDnsResolver,
121    pub resources: ResourceLimits,
122    pub zombie_ttl: Duration,
123}
124
125impl KernelVmConfig {
126    pub fn new(vm_id: impl Into<String>) -> Self {
127        Self {
128            vm_id: vm_id.into(),
129            env: BTreeMap::new(),
130            cwd: String::from("/workspace"),
131            user: UserConfig::default(),
132            permissions: Permissions::default(),
133            dns: DnsConfig::default(),
134            dns_resolver: Arc::new(HickoryDnsResolver),
135            resources: ResourceLimits::default(),
136            zombie_ttl: Duration::from_secs(60),
137        }
138    }
139}
140
141#[derive(Debug, Clone, Default)]
142pub struct SpawnOptions {
143    pub requester_driver: Option<String>,
144    pub parent_pid: Option<u32>,
145    pub env: BTreeMap<String, String>,
146    pub cwd: Option<String>,
147}
148
149#[derive(Debug, Clone, Default, PartialEq, Eq)]
150pub struct VirtualProcessOptions {
151    pub parent_pid: Option<u32>,
152    pub env: BTreeMap<String, String>,
153    pub cwd: Option<String>,
154}
155
156#[derive(Debug, Clone, Default, PartialEq, Eq)]
157pub struct ExecOptions {
158    pub requester_driver: Option<String>,
159    pub parent_pid: Option<u32>,
160    pub env: BTreeMap<String, String>,
161    pub cwd: Option<String>,
162}
163
164#[derive(Debug, Clone, Default, PartialEq, Eq)]
165pub struct OpenShellOptions {
166    pub requester_driver: Option<String>,
167    pub command: Option<String>,
168    pub args: Vec<String>,
169    pub env: BTreeMap<String, String>,
170    pub cwd: Option<String>,
171}
172
173#[derive(Debug, Clone, PartialEq, Eq)]
174pub struct WaitPidResult {
175    pub pid: u32,
176    pub status: i32,
177}
178
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct WaitPidEventResult {
181    pub pid: u32,
182    pub status: i32,
183    pub event: WaitPidEvent,
184}
185
186#[derive(Debug, Clone)]
187struct ResolvedSpawnCommand {
188    command: String,
189    args: Vec<String>,
190    driver: CommandDriver,
191}
192
193#[derive(Debug, Clone)]
194struct ShebangCommand {
195    interpreter: String,
196    args: Vec<String>,
197}
198
199#[derive(Clone)]
200pub struct KernelProcessHandle {
201    pid: u32,
202    driver: String,
203    process: Arc<StubDriverProcess>,
204}
205
206impl fmt::Debug for KernelProcessHandle {
207    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208        f.debug_struct("KernelProcessHandle")
209            .field("pid", &self.pid)
210            .field("driver", &self.driver)
211            .finish_non_exhaustive()
212    }
213}
214
215impl KernelProcessHandle {
216    pub fn pid(&self) -> u32 {
217        self.pid
218    }
219
220    pub fn driver(&self) -> &str {
221        &self.driver
222    }
223
224    pub fn finish(&self, exit_code: i32) {
225        self.process.finish(exit_code);
226    }
227
228    pub fn kill(&self, signal: i32) {
229        self.process.kill(signal);
230    }
231
232    pub fn wait(&self, timeout: Duration) -> Option<i32> {
233        self.process.wait(timeout)
234    }
235
236    pub fn kill_signals(&self) -> Vec<i32> {
237        self.process.kill_signals()
238    }
239}
240
241#[derive(Debug, Clone)]
242pub struct OpenShellHandle {
243    process: KernelProcessHandle,
244    master_fd: u32,
245    slave_fd: u32,
246    pty_path: String,
247}
248
249impl OpenShellHandle {
250    pub fn process(&self) -> &KernelProcessHandle {
251        &self.process
252    }
253
254    pub fn pid(&self) -> u32 {
255        self.process.pid()
256    }
257
258    pub fn master_fd(&self) -> u32 {
259        self.master_fd
260    }
261
262    pub fn slave_fd(&self) -> u32 {
263        self.slave_fd
264    }
265
266    pub fn pty_path(&self) -> &str {
267        &self.pty_path
268    }
269}
270
271pub struct KernelVm<F> {
272    vm_id: String,
273    boot_time_ms: u64,
274    boot_instant: Instant,
275    filesystem: PermissionedFileSystem<DeviceLayer<F>>,
276    permissions: Permissions,
277    dns: DnsConfig,
278    dns_resolver: SharedDnsResolver,
279    env: BTreeMap<String, String>,
280    cwd: String,
281    commands: CommandRegistry,
282    fd_tables: Arc<Mutex<FdTableManager>>,
283    processes: ProcessTable,
284    pipes: PipeManager,
285    ptys: PtyManager,
286    sockets: SocketTable,
287    poll_notifier: PollNotifier,
288    users: UserManager,
289    resources: ResourceAccountant,
290    file_locks: FileLockManager,
291    driver_pids: Arc<Mutex<BTreeMap<String, BTreeSet<u32>>>>,
292    terminated: bool,
293}
294
295fn cleanup_process_resources(
296    fd_tables: &Mutex<FdTableManager>,
297    file_locks: &FileLockManager,
298    pipes: &PipeManager,
299    ptys: &PtyManager,
300    sockets: &SocketTable,
301    driver_pids: &Mutex<BTreeMap<String, BTreeSet<u32>>>,
302    pid: u32,
303) {
304    let mut cleanup = Vec::new();
305    {
306        let mut tables = lock_or_recover(fd_tables);
307        let descriptors = tables
308            .get(pid)
309            .map(|table| {
310                table
311                    .iter()
312                    .map(|entry| (entry.fd, Arc::clone(&entry.description), entry.filetype))
313                    .collect::<Vec<_>>()
314            })
315            .unwrap_or_default();
316
317        cleanup_process_resources_test_hook();
318
319        if let Some(table) = tables.get_mut(pid) {
320            for (fd, description, filetype) in &descriptors {
321                table.close(*fd);
322                cleanup.push((Arc::clone(description), *filetype));
323            }
324        }
325        tables.remove(pid);
326    }
327
328    for (description, filetype) in cleanup {
329        close_special_resource_if_needed(file_locks, pipes, ptys, &description, filetype);
330    }
331
332    sockets.remove_all_for_pid(pid);
333
334    let mut owners = lock_or_recover(driver_pids);
335    for pids in owners.values_mut() {
336        pids.remove(&pid);
337    }
338}
339
340fn dispose_kernel_vm_resources<F>(kernel: &mut KernelVm<F>) {
341    kernel.processes.terminate_all();
342    let pids = lock_or_recover(&kernel.fd_tables).pids();
343    for pid in pids {
344        cleanup_process_resources(
345            kernel.fd_tables.as_ref(),
346            &kernel.file_locks,
347            &kernel.pipes,
348            &kernel.ptys,
349            &kernel.sockets,
350            kernel.driver_pids.as_ref(),
351            pid,
352        );
353    }
354    lock_or_recover(&kernel.driver_pids).clear();
355    kernel.terminated = true;
356}
357
358#[cfg(test)]
359type CleanupProcessResourcesHook = Arc<dyn Fn() + Send + Sync + 'static>;
360
361#[cfg(test)]
362fn cleanup_process_resources_test_hook() {
363    let hook = lock_or_recover(cleanup_process_resources_test_hook_slot()).clone();
364    if let Some(hook) = hook {
365        hook();
366    }
367}
368
369#[cfg(not(test))]
370fn cleanup_process_resources_test_hook() {}
371
372#[cfg(test)]
373fn cleanup_process_resources_test_hook_slot() -> &'static Mutex<Option<CleanupProcessResourcesHook>>
374{
375    static HOOK: OnceLock<Mutex<Option<CleanupProcessResourcesHook>>> = OnceLock::new();
376    HOOK.get_or_init(|| Mutex::new(None))
377}
378
379#[cfg(test)]
380fn set_cleanup_process_resources_test_hook(hook: Option<CleanupProcessResourcesHook>) {
381    *lock_or_recover(cleanup_process_resources_test_hook_slot()) = hook;
382}
383
384fn close_special_resource_if_needed(
385    file_locks: &FileLockManager,
386    pipes: &PipeManager,
387    ptys: &PtyManager,
388    description: &Arc<FileDescription>,
389    filetype: u8,
390) {
391    if description.ref_count() != 0 {
392        return;
393    }
394
395    file_locks.release_owner(description.id());
396
397    if filetype == FILETYPE_PIPE && pipes.is_pipe(description.id()) {
398        pipes.close(description.id());
399    }
400
401    if ptys.is_pty(description.id()) {
402        ptys.close(description.id());
403    }
404}
405
406#[derive(Debug, Clone, PartialEq, Eq)]
407enum ProcNode {
408    RootDir,
409    MountsFile,
410    CpuInfoFile,
411    MemInfoFile,
412    LoadAvgFile,
413    UptimeFile,
414    VersionFile,
415    SelfLink { pid: u32 },
416    PidDir { pid: u32 },
417    PidFdDir { pid: u32 },
418    PidCmdline { pid: u32 },
419    PidEnviron { pid: u32 },
420    PidCwdLink { pid: u32 },
421    PidStatFile { pid: u32 },
422    PidStatusFile { pid: u32 },
423    PidFdLink { pid: u32, fd: u32 },
424}
425
426impl<F: VirtualFileSystem + 'static> KernelVm<F> {
427    pub fn new(filesystem: F, config: KernelVmConfig) -> Self {
428        let vm_id = config.vm_id;
429        let boot_time_ms = now_ms();
430        let boot_instant = Instant::now();
431        let permissions = config.permissions.clone();
432        let users = UserManager::from_config(config.user);
433        let process_table = ProcessTable::with_zombie_ttl(config.zombie_ttl);
434        let process_table_for_pty = process_table.clone();
435        let fd_tables = Arc::new(Mutex::new(FdTableManager::with_max_fds(
436            config
437                .resources
438                .max_open_fds
439                .unwrap_or(DEFAULT_MAX_OPEN_FDS),
440        )));
441        let file_locks = FileLockManager::new();
442        let driver_pids = Arc::new(Mutex::new(BTreeMap::new()));
443        let poll_notifier = PollNotifier::default();
444        let pipes = PipeManager::with_notifier(poll_notifier.clone());
445        let ptys = PtyManager::with_signal_handler_and_notifier(
446            Arc::new(move |pgid, signal| {
447                let _ = process_table_for_pty.kill(-(pgid as i32), signal);
448            }),
449            poll_notifier.clone(),
450        );
451        let sockets = SocketTable::new();
452
453        let fd_tables_for_exit = Arc::clone(&fd_tables);
454        let file_locks_for_exit = file_locks.clone();
455        let driver_pids_for_exit = Arc::clone(&driver_pids);
456        let pipes_for_exit = pipes.clone();
457        let ptys_for_exit = ptys.clone();
458        let sockets_for_exit = sockets.clone();
459        process_table.set_on_process_exit(Some(Arc::new(move |pid| {
460            cleanup_process_resources(
461                fd_tables_for_exit.as_ref(),
462                &file_locks_for_exit,
463                &pipes_for_exit,
464                &ptys_for_exit,
465                &sockets_for_exit,
466                driver_pids_for_exit.as_ref(),
467                pid,
468            );
469        })));
470
471        Self {
472            vm_id: vm_id.clone(),
473            boot_time_ms,
474            boot_instant,
475            filesystem: PermissionedFileSystem::new(
476                create_device_layer(filesystem),
477                vm_id,
478                permissions.clone(),
479            ),
480            permissions,
481            dns: config.dns,
482            dns_resolver: config.dns_resolver,
483            env: config.env,
484            cwd: config.cwd,
485            commands: CommandRegistry::new(),
486            fd_tables,
487            processes: process_table,
488            pipes,
489            ptys,
490            sockets,
491            poll_notifier,
492            users,
493            resources: ResourceAccountant::new(config.resources),
494            file_locks,
495            driver_pids,
496            terminated: false,
497        }
498    }
499
500    pub fn vm_id(&self) -> &str {
501        &self.vm_id
502    }
503
504    pub fn state(&self) -> LifecycleState {
505        if self.terminated {
506            LifecycleState::Terminated
507        } else if self.processes.running_count() > 0 {
508            LifecycleState::Busy
509        } else {
510            LifecycleState::Ready
511        }
512    }
513
514    pub fn commands(&self) -> BTreeMap<String, String> {
515        self.commands.list()
516    }
517
518    pub fn filesystem(&self) -> &PermissionedFileSystem<DeviceLayer<F>> {
519        &self.filesystem
520    }
521
522    pub fn filesystem_mut(&mut self) -> &mut PermissionedFileSystem<DeviceLayer<F>> {
523        &mut self.filesystem
524    }
525
526    pub fn user_manager(&self) -> &UserManager {
527        &self.users
528    }
529
530    pub fn process_identity(
531        &self,
532        requester_driver: &str,
533        pid: u32,
534    ) -> KernelResult<ProcessIdentity> {
535        self.assert_driver_owns(requester_driver, pid)?;
536        Ok(self
537            .processes
538            .get(pid)
539            .ok_or_else(|| KernelError::no_such_process(pid))?
540            .identity)
541    }
542
543    pub fn user_profile(&self) -> UserManager {
544        self.users.clone()
545    }
546
547    pub fn getuid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
548        Ok(self.process_identity(requester_driver, pid)?.uid)
549    }
550
551    pub fn getgid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
552        Ok(self.process_identity(requester_driver, pid)?.gid)
553    }
554
555    pub fn geteuid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
556        Ok(self.process_identity(requester_driver, pid)?.euid)
557    }
558
559    pub fn getegid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
560        Ok(self.process_identity(requester_driver, pid)?.egid)
561    }
562
563    pub fn getgroups(&self, requester_driver: &str, pid: u32) -> KernelResult<Vec<u32>> {
564        Ok(self
565            .process_identity(requester_driver, pid)?
566            .supplementary_gids)
567    }
568
569    pub fn getpwuid(&self, uid: u32) -> KernelResult<String> {
570        self.users
571            .getpwuid(uid)
572            .ok_or_else(|| KernelError::new("ENOENT", format!("unknown uid {uid}")))
573    }
574
575    pub fn getgrgid(&self, gid: u32) -> KernelResult<String> {
576        self.users
577            .getgrgid(gid)
578            .ok_or_else(|| KernelError::new("ENOENT", format!("unknown gid {gid}")))
579    }
580
581    pub fn resource_snapshot(&self) -> ResourceSnapshot {
582        let fd_tables = lock_or_recover(&self.fd_tables);
583        self.resources.snapshot(
584            &self.processes,
585            &fd_tables,
586            &self.pipes,
587            &self.ptys,
588            &self.sockets,
589        )
590    }
591
592    pub fn resource_limits(&self) -> &ResourceLimits {
593        self.resources.limits()
594    }
595
596    pub fn resolve_dns(
597        &self,
598        hostname: &str,
599        policy: DnsLookupPolicy,
600    ) -> KernelResult<DnsResolution> {
601        self.assert_not_terminated()?;
602        if matches!(policy, DnsLookupPolicy::CheckPermissions) {
603            let resource = format_dns_resource(hostname).map_err(map_dns_resolver_error)?;
604            check_network_access(
605                &self.vm_id,
606                &self.permissions,
607                NetworkOperation::Dns,
608                &resource,
609            )?;
610        }
611
612        resolve_dns(&self.dns, self.dns_resolver.as_ref(), hostname).map_err(map_dns_resolver_error)
613    }
614
615    pub fn resolve_dns_records(
616        &self,
617        hostname: &str,
618        record_type: RecordType,
619        policy: DnsLookupPolicy,
620    ) -> KernelResult<DnsRecordResolution> {
621        self.assert_not_terminated()?;
622        if matches!(policy, DnsLookupPolicy::CheckPermissions) {
623            let resource = format_dns_resource(hostname).map_err(map_dns_resolver_error)?;
624            check_network_access(
625                &self.vm_id,
626                &self.permissions,
627                NetworkOperation::Dns,
628                &resource,
629            )?;
630        }
631
632        resolve_dns_records(&self.dns, self.dns_resolver.as_ref(), hostname, record_type)
633            .map_err(map_dns_resolver_error)
634    }
635
636    pub fn register_driver(&mut self, driver: CommandDriver) -> KernelResult<()> {
637        self.assert_not_terminated()?;
638        let driver_name = driver.name().to_owned();
639        let populate_driver = driver.clone();
640        self.commands.register(driver)?;
641        lock_or_recover(&self.driver_pids)
642            .entry(driver_name)
643            .or_default();
644        self.commands
645            .populate_driver_bin(&mut self.filesystem, &populate_driver)?;
646        Ok(())
647    }
648
649    pub fn exec(
650        &mut self,
651        command: &str,
652        options: ExecOptions,
653    ) -> KernelResult<KernelProcessHandle> {
654        self.spawn_process(
655            "sh",
656            vec![String::from("-c"), String::from(command)],
657            SpawnOptions {
658                requester_driver: options.requester_driver,
659                parent_pid: options.parent_pid,
660                env: options.env,
661                cwd: options.cwd,
662            },
663        )
664    }
665
666    pub fn open_shell(&mut self, options: OpenShellOptions) -> KernelResult<OpenShellHandle> {
667        let command = options.command.unwrap_or_else(|| String::from("sh"));
668        let requester_driver = options.requester_driver.clone();
669        let process = self.spawn_process(
670            &command,
671            options.args,
672            SpawnOptions {
673                requester_driver: requester_driver.clone(),
674                parent_pid: None,
675                env: options.env,
676                cwd: options.cwd,
677            },
678        )?;
679        let owner = requester_driver.as_deref().unwrap_or(process.driver());
680        let (master_fd, slave_fd, pty_path) = self.open_pty(owner, process.pid())?;
681        self.setpgid(owner, process.pid(), process.pid())?;
682        self.pty_set_foreground_pgid(owner, process.pid(), master_fd, process.pid())?;
683        Ok(OpenShellHandle {
684            process,
685            master_fd,
686            slave_fd,
687            pty_path,
688        })
689    }
690
691    pub fn read_file(&mut self, path: &str) -> KernelResult<Vec<u8>> {
692        self.assert_not_terminated()?;
693        self.read_file_internal(None, path)
694    }
695
696    pub fn pread_file(&mut self, path: &str, offset: u64, length: usize) -> KernelResult<Vec<u8>> {
697        self.assert_not_terminated()?;
698        self.resources.check_pread_length(length)?;
699        Ok(VirtualFileSystem::pread(
700            &mut self.filesystem,
701            path,
702            offset,
703            length,
704        )?)
705    }
706
707    pub fn read_file_for_process(
708        &mut self,
709        requester_driver: &str,
710        pid: u32,
711        path: &str,
712    ) -> KernelResult<Vec<u8>> {
713        self.assert_not_terminated()?;
714        self.assert_driver_owns(requester_driver, pid)?;
715        self.read_file_internal(Some(pid), path)
716    }
717
718    pub fn write_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> KernelResult<()> {
719        self.assert_not_terminated()?;
720        self.reject_read_only_resolved_write_path(path)?;
721        let content = content.into();
722        self.check_write_file_limits(path, content.len() as u64)?;
723        Ok(self.filesystem.write_file(path, content)?)
724    }
725
726    pub fn write_file_for_process(
727        &mut self,
728        requester_driver: &str,
729        pid: u32,
730        path: &str,
731        content: impl Into<Vec<u8>>,
732        mode: Option<u32>,
733    ) -> KernelResult<()> {
734        self.assert_not_terminated()?;
735        self.assert_driver_owns(requester_driver, pid)?;
736        let existed = self.exists_internal(Some(pid), path)?;
737        let content = content.into();
738        self.reject_read_only_resolved_write_path(path)?;
739        self.check_write_file_limits(path, content.len() as u64)?;
740        VirtualFileSystem::write_file_with_mode(&mut self.filesystem, path, content, mode)?;
741        if !existed {
742            let umask = self.processes.get_umask(pid)?;
743            self.apply_creation_mode(path, mode.unwrap_or(0o666), umask)?;
744        }
745        Ok(())
746    }
747
748    pub fn create_dir(&mut self, path: &str) -> KernelResult<()> {
749        self.assert_not_terminated()?;
750        self.reject_read_only_entry_write_path(path)?;
751        self.check_create_dir_limits(path)?;
752        Ok(self.filesystem.create_dir(path)?)
753    }
754
755    pub fn create_dir_for_process(
756        &mut self,
757        requester_driver: &str,
758        pid: u32,
759        path: &str,
760        mode: Option<u32>,
761    ) -> KernelResult<()> {
762        self.assert_not_terminated()?;
763        self.assert_driver_owns(requester_driver, pid)?;
764        let existed = self.exists_internal(Some(pid), path)?;
765        self.reject_read_only_entry_write_path(path)?;
766        self.check_create_dir_limits(path)?;
767        VirtualFileSystem::create_dir_with_mode(&mut self.filesystem, path, mode)?;
768        if !existed {
769            let umask = self.processes.get_umask(pid)?;
770            self.apply_creation_mode(path, mode.unwrap_or(0o777), umask)?;
771        }
772        Ok(())
773    }
774
775    pub fn mkdir(&mut self, path: &str, recursive: bool) -> KernelResult<()> {
776        self.assert_not_terminated()?;
777        self.reject_read_only_entry_write_path(path)?;
778        self.check_mkdir_limits(path, recursive)?;
779        Ok(self.filesystem.mkdir(path, recursive)?)
780    }
781
782    pub fn mkdir_for_process(
783        &mut self,
784        requester_driver: &str,
785        pid: u32,
786        path: &str,
787        recursive: bool,
788        mode: Option<u32>,
789    ) -> KernelResult<()> {
790        self.assert_not_terminated()?;
791        self.assert_driver_owns(requester_driver, pid)?;
792        let created_paths = self.missing_directory_paths(path, recursive)?;
793        self.reject_read_only_entry_write_path(path)?;
794        self.check_mkdir_limits(path, recursive)?;
795        VirtualFileSystem::mkdir_with_mode(&mut self.filesystem, path, recursive, mode)?;
796        if !created_paths.is_empty() {
797            let umask = self.processes.get_umask(pid)?;
798            let mode = mode.unwrap_or(0o777);
799            for created_path in created_paths {
800                self.apply_creation_mode(&created_path, mode, umask)?;
801            }
802        }
803        Ok(())
804    }
805
806    pub fn umask(
807        &self,
808        requester_driver: &str,
809        pid: u32,
810        new_mask: Option<u32>,
811    ) -> KernelResult<u32> {
812        self.assert_driver_owns(requester_driver, pid)?;
813        match new_mask {
814            Some(mask) => Ok(self.processes.set_umask(pid, mask)?),
815            None => Ok(self.processes.get_umask(pid)?),
816        }
817    }
818
819    pub fn exists(&self, path: &str) -> KernelResult<bool> {
820        self.assert_not_terminated()?;
821        self.exists_internal(None, path)
822    }
823
824    pub fn exists_for_process(
825        &self,
826        requester_driver: &str,
827        pid: u32,
828        path: &str,
829    ) -> KernelResult<bool> {
830        self.assert_not_terminated()?;
831        self.assert_driver_owns(requester_driver, pid)?;
832        self.exists_internal(Some(pid), path)
833    }
834
835    pub fn stat(&mut self, path: &str) -> KernelResult<VirtualStat> {
836        self.assert_not_terminated()?;
837        self.stat_internal(None, path)
838    }
839
840    pub fn stat_for_process(
841        &mut self,
842        requester_driver: &str,
843        pid: u32,
844        path: &str,
845    ) -> KernelResult<VirtualStat> {
846        self.assert_not_terminated()?;
847        self.assert_driver_owns(requester_driver, pid)?;
848        self.stat_internal(Some(pid), path)
849    }
850
851    pub fn lstat(&self, path: &str) -> KernelResult<VirtualStat> {
852        self.assert_not_terminated()?;
853        self.lstat_internal(None, path)
854    }
855
856    pub fn lstat_for_process(
857        &self,
858        requester_driver: &str,
859        pid: u32,
860        path: &str,
861    ) -> KernelResult<VirtualStat> {
862        self.assert_not_terminated()?;
863        self.assert_driver_owns(requester_driver, pid)?;
864        self.lstat_internal(Some(pid), path)
865    }
866
867    pub fn read_link(&self, path: &str) -> KernelResult<String> {
868        self.assert_not_terminated()?;
869        self.read_link_internal(None, path)
870    }
871
872    pub fn read_link_for_process(
873        &self,
874        requester_driver: &str,
875        pid: u32,
876        path: &str,
877    ) -> KernelResult<String> {
878        self.assert_not_terminated()?;
879        self.assert_driver_owns(requester_driver, pid)?;
880        self.read_link_internal(Some(pid), path)
881    }
882
883    pub fn read_dir(&mut self, path: &str) -> KernelResult<Vec<String>> {
884        self.assert_not_terminated()?;
885        let entries = self.read_dir_internal(None, path)?;
886        self.resources.check_readdir_entries(entries.len())?;
887        Ok(entries)
888    }
889
890    pub fn read_dir_for_process(
891        &mut self,
892        requester_driver: &str,
893        pid: u32,
894        path: &str,
895    ) -> KernelResult<Vec<String>> {
896        self.assert_not_terminated()?;
897        self.assert_driver_owns(requester_driver, pid)?;
898        let entries = self.read_dir_internal(Some(pid), path)?;
899        self.resources.check_readdir_entries(entries.len())?;
900        Ok(entries)
901    }
902
903    pub fn remove_file(&mut self, path: &str) -> KernelResult<()> {
904        self.assert_not_terminated()?;
905        self.reject_read_only_entry_write_path(path)?;
906        Ok(self.filesystem.remove_file(path)?)
907    }
908
909    pub fn remove_dir(&mut self, path: &str) -> KernelResult<()> {
910        self.assert_not_terminated()?;
911        self.reject_read_only_entry_write_path(path)?;
912        Ok(self.filesystem.remove_dir(path)?)
913    }
914
915    pub fn rename(&mut self, old_path: &str, new_path: &str) -> KernelResult<()> {
916        self.assert_not_terminated()?;
917        self.reject_read_only_entry_write_path(old_path)?;
918        self.reject_read_only_entry_write_path(new_path)?;
919        self.check_rename_copy_up_limits(old_path, new_path)?;
920        Ok(self.filesystem.rename(old_path, new_path)?)
921    }
922
923    pub fn realpath(&self, path: &str) -> KernelResult<String> {
924        self.assert_not_terminated()?;
925        self.realpath_internal(None, path)
926    }
927
928    pub fn realpath_for_process(
929        &self,
930        requester_driver: &str,
931        pid: u32,
932        path: &str,
933    ) -> KernelResult<String> {
934        self.assert_not_terminated()?;
935        self.assert_driver_owns(requester_driver, pid)?;
936        self.realpath_internal(Some(pid), path)
937    }
938
939    pub fn symlink(&mut self, target: &str, link_path: &str) -> KernelResult<()> {
940        self.assert_not_terminated()?;
941        if is_proc_path(target) {
942            self.filesystem
943                .check_virtual_path(FsOperation::Write, link_path)
944                .map_err(KernelError::from)?;
945            return Err(read_only_filesystem_error(link_path));
946        }
947        self.reject_read_only_entry_write_path(link_path)?;
948        self.check_symlink_limits(target, link_path)?;
949        Ok(self.filesystem.symlink(target, link_path)?)
950    }
951
952    pub fn chmod(&mut self, path: &str, mode: u32) -> KernelResult<()> {
953        self.assert_not_terminated()?;
954        self.reject_read_only_resolved_write_path(path)?;
955        Ok(self.filesystem.chmod(path, mode)?)
956    }
957
958    pub fn link(&mut self, old_path: &str, new_path: &str) -> KernelResult<()> {
959        self.assert_not_terminated()?;
960        if is_proc_path(old_path) {
961            self.filesystem
962                .check_virtual_path(FsOperation::Write, new_path)
963                .map_err(KernelError::from)?;
964            return Err(read_only_filesystem_error(new_path));
965        }
966        self.reject_read_only_resolved_write_path(old_path)?;
967        self.reject_read_only_entry_write_path(new_path)?;
968        Ok(self.filesystem.link(old_path, new_path)?)
969    }
970
971    pub fn chown(&mut self, path: &str, uid: u32, gid: u32) -> KernelResult<()> {
972        self.assert_not_terminated()?;
973        self.reject_read_only_resolved_write_path(path)?;
974        Ok(self.filesystem.chown(path, uid, gid)?)
975    }
976
977    pub fn utimes(&mut self, path: &str, atime_ms: u64, mtime_ms: u64) -> KernelResult<()> {
978        self.utimes_spec(
979            path,
980            VirtualUtimeSpec::Set(VirtualTimeSpec::from_millis(atime_ms)),
981            VirtualUtimeSpec::Set(VirtualTimeSpec::from_millis(mtime_ms)),
982        )
983    }
984
985    pub fn utimes_spec(
986        &mut self,
987        path: &str,
988        atime: VirtualUtimeSpec,
989        mtime: VirtualUtimeSpec,
990    ) -> KernelResult<()> {
991        self.assert_not_terminated()?;
992        self.reject_read_only_resolved_write_path(path)?;
993        Ok(self.filesystem.utimes_spec(path, atime, mtime, true)?)
994    }
995
996    pub fn lutimes(
997        &mut self,
998        path: &str,
999        atime: VirtualUtimeSpec,
1000        mtime: VirtualUtimeSpec,
1001    ) -> KernelResult<()> {
1002        self.assert_not_terminated()?;
1003        self.reject_read_only_entry_write_path(path)?;
1004        Ok(self.filesystem.utimes_spec(path, atime, mtime, false)?)
1005    }
1006
1007    pub fn futimes(
1008        &mut self,
1009        requester_driver: &str,
1010        pid: u32,
1011        fd: u32,
1012        atime: VirtualUtimeSpec,
1013        mtime: VirtualUtimeSpec,
1014    ) -> KernelResult<()> {
1015        self.assert_not_terminated()?;
1016        let path = self
1017            .description_for_fd(requester_driver, pid, fd)?
1018            .path()
1019            .to_owned();
1020        self.reject_read_only_resolved_write_path(&path)?;
1021        Ok(self.filesystem.utimes_spec(&path, atime, mtime, true)?)
1022    }
1023
1024    pub fn truncate(&mut self, path: &str, length: u64) -> KernelResult<()> {
1025        self.assert_not_terminated()?;
1026        self.reject_read_only_resolved_write_path(path)?;
1027        self.check_truncate_limits(path, length)?;
1028        Ok(self.filesystem.truncate(path, length)?)
1029    }
1030
1031    pub fn list_processes(&self) -> BTreeMap<u32, ProcessInfo> {
1032        self.processes.list_processes()
1033    }
1034
1035    pub fn zombie_timer_count(&self) -> usize {
1036        self.processes.zombie_timer_count()
1037    }
1038
1039    pub fn spawn_process(
1040        &mut self,
1041        command: &str,
1042        args: Vec<String>,
1043        options: SpawnOptions,
1044    ) -> KernelResult<KernelProcessHandle> {
1045        self.assert_not_terminated()?;
1046        if let (Some(requester), Some(parent_pid)) =
1047            (options.requester_driver.as_deref(), options.parent_pid)
1048        {
1049            self.assert_driver_owns(requester, parent_pid)?;
1050        }
1051
1052        let cwd = options.cwd.clone().unwrap_or_else(|| self.cwd.clone());
1053        let resolved = self.resolve_spawn_command(command, &args, &cwd)?;
1054
1055        self.resources
1056            .check_process_argv_bytes(&resolved.command, &resolved.args)?;
1057        self.resources
1058            .check_process_env_bytes(&self.env, &options.env)?;
1059
1060        let mut env = self.env.clone();
1061        env.extend(options.env.clone());
1062        check_command_execution(
1063            &self.vm_id,
1064            &self.permissions,
1065            &resolved.command,
1066            &resolved.args,
1067            Some(&cwd),
1068            &env,
1069        )?;
1070
1071        let inherited_fds = {
1072            let tables = lock_or_recover(&self.fd_tables);
1073            options
1074                .parent_pid
1075                .and_then(|pid| tables.get(pid).map(ProcessFdTable::len))
1076                .unwrap_or(3)
1077        };
1078        self.resources
1079            .check_process_spawn(&self.resource_snapshot(), inherited_fds)?;
1080
1081        self.register_process(
1082            resolved.driver.name().to_owned(),
1083            resolved.command,
1084            resolved.args,
1085            ProcessContext {
1086                pid: 0,
1087                ppid: options.parent_pid.unwrap_or(0),
1088                env,
1089                cwd,
1090                umask: DEFAULT_PROCESS_UMASK,
1091                fds: Default::default(),
1092                identity: self.users.identity(),
1093                blocked_signals: SignalSet::empty(),
1094                pending_signals: SignalSet::empty(),
1095            },
1096            options.requester_driver.as_deref(),
1097        )
1098    }
1099
1100    pub fn create_virtual_process(
1101        &mut self,
1102        requester_driver: &str,
1103        driver: &str,
1104        command: &str,
1105        args: Vec<String>,
1106        options: VirtualProcessOptions,
1107    ) -> KernelResult<KernelProcessHandle> {
1108        self.assert_not_terminated()?;
1109        if let Some(parent_pid) = options.parent_pid {
1110            self.assert_driver_owns(requester_driver, parent_pid)?;
1111        }
1112
1113        let cwd = options.cwd.clone().unwrap_or_else(|| self.cwd.clone());
1114        self.resources.check_process_argv_bytes(command, &args)?;
1115        self.resources
1116            .check_process_env_bytes(&self.env, &options.env)?;
1117
1118        let mut env = self.env.clone();
1119        env.extend(options.env.clone());
1120        check_command_execution(
1121            &self.vm_id,
1122            &self.permissions,
1123            command,
1124            &args,
1125            Some(&cwd),
1126            &env,
1127        )?;
1128
1129        let inherited_fds = {
1130            let tables = lock_or_recover(&self.fd_tables);
1131            options
1132                .parent_pid
1133                .and_then(|pid| tables.get(pid).map(ProcessFdTable::len))
1134                .unwrap_or(3)
1135        };
1136        self.resources
1137            .check_process_spawn(&self.resource_snapshot(), inherited_fds)?;
1138
1139        self.register_process(
1140            String::from(driver),
1141            String::from(command),
1142            args,
1143            ProcessContext {
1144                pid: 0,
1145                ppid: options.parent_pid.unwrap_or(0),
1146                env,
1147                cwd,
1148                umask: DEFAULT_PROCESS_UMASK,
1149                fds: Default::default(),
1150                identity: self.users.identity(),
1151                blocked_signals: SignalSet::empty(),
1152                pending_signals: SignalSet::empty(),
1153            },
1154            Some(requester_driver),
1155        )
1156    }
1157
1158    pub fn read_process_stdin(
1159        &mut self,
1160        requester_driver: &str,
1161        pid: u32,
1162        length: usize,
1163        timeout: Option<Duration>,
1164    ) -> KernelResult<Option<Vec<u8>>> {
1165        self.fd_read_with_timeout_result(requester_driver, pid, 0, length, timeout)
1166    }
1167
1168    pub fn write_process_stdout(
1169        &mut self,
1170        requester_driver: &str,
1171        pid: u32,
1172        data: &[u8],
1173    ) -> KernelResult<usize> {
1174        self.fd_write(requester_driver, pid, 1, data)
1175    }
1176
1177    pub fn write_process_stderr(
1178        &mut self,
1179        requester_driver: &str,
1180        pid: u32,
1181        data: &[u8],
1182    ) -> KernelResult<usize> {
1183        self.fd_write(requester_driver, pid, 2, data)
1184    }
1185
1186    pub fn exit_process(
1187        &mut self,
1188        requester_driver: &str,
1189        pid: u32,
1190        exit_code: i32,
1191    ) -> KernelResult<()> {
1192        self.assert_driver_owns(requester_driver, pid)?;
1193        self.processes.mark_exited(pid, exit_code);
1194        Ok(())
1195    }
1196
1197    fn register_process(
1198        &mut self,
1199        driver_name: String,
1200        command: String,
1201        args: Vec<String>,
1202        mut ctx: ProcessContext,
1203        requester_driver: Option<&str>,
1204    ) -> KernelResult<KernelProcessHandle> {
1205        let pid = self.processes.allocate_pid()?;
1206        ctx.pid = pid;
1207
1208        {
1209            let mut tables = lock_or_recover(&self.fd_tables);
1210            if ctx.ppid != 0 {
1211                let parent_pid = ctx.ppid;
1212                tables.fork(parent_pid, pid);
1213            } else {
1214                tables.create(pid);
1215            }
1216        }
1217
1218        let process = Arc::new(StubDriverProcess::default());
1219        self.processes.register(
1220            pid,
1221            driver_name.clone(),
1222            command,
1223            args,
1224            ctx,
1225            process.clone(),
1226        );
1227
1228        let mut owners = lock_or_recover(&self.driver_pids);
1229        owners.entry(driver_name.clone()).or_default().insert(pid);
1230        if let Some(requester) = requester_driver {
1231            owners
1232                .entry(String::from(requester))
1233                .or_default()
1234                .insert(pid);
1235        }
1236
1237        Ok(KernelProcessHandle {
1238            pid,
1239            driver: driver_name,
1240            process,
1241        })
1242    }
1243
1244    pub fn waitpid(&mut self, pid: u32) -> KernelResult<WaitPidResult> {
1245        let (pid, status) = self.processes.waitpid(pid)?;
1246        self.cleanup_process_resources(pid);
1247        Ok(WaitPidResult { pid, status })
1248    }
1249
1250    pub fn waitpid_with_options(
1251        &mut self,
1252        requester_driver: &str,
1253        waiter_pid: u32,
1254        pid: i32,
1255        flags: WaitPidFlags,
1256    ) -> KernelResult<Option<WaitPidEventResult>> {
1257        self.assert_driver_owns(requester_driver, waiter_pid)?;
1258        let result = self.processes.waitpid_for(waiter_pid, pid, flags)?;
1259        Ok(result.map(|result| self.finish_waitpid_event(result)))
1260    }
1261
1262    pub fn wait_and_reap(&mut self, pid: u32) -> KernelResult<(u32, i32)> {
1263        let result = self.waitpid(pid)?;
1264        Ok((result.pid, result.status))
1265    }
1266
1267    pub fn open_pipe(&mut self, requester_driver: &str, pid: u32) -> KernelResult<(u32, u32)> {
1268        self.assert_not_terminated()?;
1269        self.assert_driver_owns(requester_driver, pid)?;
1270        self.resources
1271            .check_pipe_allocation(&self.resource_snapshot())?;
1272        let mut tables = lock_or_recover(&self.fd_tables);
1273        let table = tables
1274            .get_mut(pid)
1275            .ok_or_else(|| KernelError::no_such_process(pid))?;
1276        Ok(self.pipes.create_pipe_fds(table)?)
1277    }
1278
1279    pub fn open_pty(
1280        &mut self,
1281        requester_driver: &str,
1282        pid: u32,
1283    ) -> KernelResult<(u32, u32, String)> {
1284        self.assert_not_terminated()?;
1285        self.assert_driver_owns(requester_driver, pid)?;
1286        self.resources
1287            .check_pty_allocation(&self.resource_snapshot())?;
1288        let mut tables = lock_or_recover(&self.fd_tables);
1289        let table = tables
1290            .get_mut(pid)
1291            .ok_or_else(|| KernelError::no_such_process(pid))?;
1292        Ok(self.ptys.create_pty_fds(table)?)
1293    }
1294
1295    pub fn socket_create(
1296        &mut self,
1297        requester_driver: &str,
1298        pid: u32,
1299        spec: SocketSpec,
1300    ) -> KernelResult<SocketId> {
1301        self.assert_not_terminated()?;
1302        self.assert_driver_owns(requester_driver, pid)?;
1303        self.resources
1304            .check_socket_allocation(&self.resource_snapshot())?;
1305        Ok(self.sockets.allocate(pid, spec).id())
1306    }
1307
1308    pub fn socket_get(&self, socket_id: SocketId) -> Option<SocketRecord> {
1309        self.sockets.get(socket_id)
1310    }
1311
1312    pub fn socket_bind_inet(
1313        &mut self,
1314        requester_driver: &str,
1315        pid: u32,
1316        socket_id: SocketId,
1317        address: InetSocketAddress,
1318    ) -> KernelResult<()> {
1319        self.assert_not_terminated()?;
1320        self.assert_driver_owns(requester_driver, pid)?;
1321        let existing = self
1322            .sockets
1323            .get(socket_id)
1324            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1325        if existing.owner_pid() != pid {
1326            return Err(KernelError::permission_denied(format!(
1327                "process {pid} does not own socket {socket_id}"
1328            )));
1329        }
1330
1331        self.sockets.bind_inet(socket_id, address)?;
1332        self.poll_notifier.notify();
1333        Ok(())
1334    }
1335
1336    pub fn socket_bind_unix(
1337        &mut self,
1338        requester_driver: &str,
1339        pid: u32,
1340        socket_id: SocketId,
1341        path: impl Into<String>,
1342    ) -> KernelResult<()> {
1343        self.assert_not_terminated()?;
1344        self.assert_driver_owns(requester_driver, pid)?;
1345        let existing = self
1346            .sockets
1347            .get(socket_id)
1348            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1349        if existing.owner_pid() != pid {
1350            return Err(KernelError::permission_denied(format!(
1351                "process {pid} does not own socket {socket_id}"
1352            )));
1353        }
1354
1355        self.sockets
1356            .bind_unix(socket_id, normalize_path(&path.into()))?;
1357        self.poll_notifier.notify();
1358        Ok(())
1359    }
1360
1361    pub fn socket_listen(
1362        &mut self,
1363        requester_driver: &str,
1364        pid: u32,
1365        socket_id: SocketId,
1366        backlog: usize,
1367    ) -> KernelResult<()> {
1368        self.assert_not_terminated()?;
1369        self.assert_driver_owns(requester_driver, pid)?;
1370        let existing = self
1371            .sockets
1372            .get(socket_id)
1373            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1374        if existing.owner_pid() != pid {
1375            return Err(KernelError::permission_denied(format!(
1376                "process {pid} does not own socket {socket_id}"
1377            )));
1378        }
1379
1380        self.sockets.listen(socket_id, backlog)?;
1381        self.poll_notifier.notify();
1382        Ok(())
1383    }
1384
1385    pub fn socket_queue_incoming_tcp_connection(
1386        &mut self,
1387        requester_driver: &str,
1388        pid: u32,
1389        listener_socket_id: SocketId,
1390        peer_address: InetSocketAddress,
1391    ) -> KernelResult<()> {
1392        self.assert_not_terminated()?;
1393        self.assert_driver_owns(requester_driver, pid)?;
1394        let existing = self.sockets.get(listener_socket_id).ok_or_else(|| {
1395            KernelError::new("ENOENT", format!("no such socket {listener_socket_id}"))
1396        })?;
1397        if existing.owner_pid() != pid {
1398            return Err(KernelError::permission_denied(format!(
1399                "process {pid} does not own socket {listener_socket_id}"
1400            )));
1401        }
1402
1403        self.sockets
1404            .enqueue_incoming_tcp_connection(listener_socket_id, peer_address)?;
1405        self.poll_notifier.notify();
1406        Ok(())
1407    }
1408
1409    pub fn socket_accept(
1410        &mut self,
1411        requester_driver: &str,
1412        pid: u32,
1413        listener_socket_id: SocketId,
1414    ) -> KernelResult<SocketId> {
1415        self.assert_not_terminated()?;
1416        self.assert_driver_owns(requester_driver, pid)?;
1417        let existing = self.sockets.get(listener_socket_id).ok_or_else(|| {
1418            KernelError::new("ENOENT", format!("no such socket {listener_socket_id}"))
1419        })?;
1420        if existing.owner_pid() != pid {
1421            return Err(KernelError::permission_denied(format!(
1422                "process {pid} does not own socket {listener_socket_id}"
1423            )));
1424        }
1425
1426        let snapshot = self.resource_snapshot();
1427        self.resources.check_socket_allocation(&snapshot)?;
1428        self.resources.check_socket_state_transition(
1429            &snapshot,
1430            SocketState::Created,
1431            SocketState::Connected,
1432        )?;
1433
1434        let socket_id = self.sockets.accept(listener_socket_id)?.id();
1435        self.poll_notifier.notify();
1436        Ok(socket_id)
1437    }
1438
1439    pub fn socket_connect_pair(
1440        &mut self,
1441        requester_driver: &str,
1442        pid: u32,
1443        socket_id: SocketId,
1444        peer_socket_id: SocketId,
1445    ) -> KernelResult<()> {
1446        self.assert_not_terminated()?;
1447        self.assert_driver_owns(requester_driver, pid)?;
1448        let existing = self
1449            .sockets
1450            .get(socket_id)
1451            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1452        if existing.owner_pid() != pid {
1453            return Err(KernelError::permission_denied(format!(
1454                "process {pid} does not own socket {socket_id}"
1455            )));
1456        }
1457
1458        let peer = self.sockets.get(peer_socket_id).ok_or_else(|| {
1459            KernelError::new("ENOENT", format!("no such socket {peer_socket_id}"))
1460        })?;
1461        self.assert_driver_owns(requester_driver, peer.owner_pid())?;
1462
1463        let mut snapshot = self.resource_snapshot();
1464        for current_state in [existing.state(), peer.state()] {
1465            self.resources.check_socket_state_transition(
1466                &snapshot,
1467                current_state,
1468                SocketState::Connected,
1469            )?;
1470            if !current_state.counts_as_connection() {
1471                snapshot.socket_connections = snapshot.socket_connections.saturating_add(1);
1472            }
1473        }
1474
1475        self.sockets.connect_pair(socket_id, peer_socket_id)?;
1476        self.poll_notifier.notify();
1477        Ok(())
1478    }
1479
1480    pub fn socket_connect_unix(
1481        &mut self,
1482        requester_driver: &str,
1483        pid: u32,
1484        socket_id: SocketId,
1485        target_path: impl Into<String>,
1486    ) -> KernelResult<()> {
1487        self.assert_not_terminated()?;
1488        self.assert_driver_owns(requester_driver, pid)?;
1489        let existing = self
1490            .sockets
1491            .get(socket_id)
1492            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1493        if existing.owner_pid() != pid {
1494            return Err(KernelError::permission_denied(format!(
1495                "process {pid} does not own socket {socket_id}"
1496            )));
1497        }
1498
1499        let target_path = normalize_path(&target_path.into());
1500        self.sockets
1501            .find_bound_unix_socket(&target_path)
1502            .ok_or_else(|| {
1503                KernelError::new(
1504                    "ECONNREFUSED",
1505                    format!("no listening socket bound at path {target_path}"),
1506                )
1507            })?;
1508
1509        let mut snapshot = self.resource_snapshot();
1510        self.resources.check_socket_allocation(&snapshot)?;
1511        for current_state in [existing.state(), SocketState::Created] {
1512            self.resources.check_socket_state_transition(
1513                &snapshot,
1514                current_state,
1515                SocketState::Connected,
1516            )?;
1517            if !current_state.counts_as_connection() {
1518                snapshot.socket_connections = snapshot.socket_connections.saturating_add(1);
1519            }
1520        }
1521
1522        self.sockets
1523            .connect_to_bound_unix_stream(socket_id, target_path)?;
1524        self.poll_notifier.notify();
1525        Ok(())
1526    }
1527
1528    pub fn socket_connect_inet_loopback(
1529        &mut self,
1530        requester_driver: &str,
1531        pid: u32,
1532        socket_id: SocketId,
1533        target_address: InetSocketAddress,
1534    ) -> KernelResult<()> {
1535        self.assert_not_terminated()?;
1536        self.assert_driver_owns(requester_driver, pid)?;
1537        let existing = self
1538            .sockets
1539            .get(socket_id)
1540            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1541        if existing.owner_pid() != pid {
1542            return Err(KernelError::permission_denied(format!(
1543                "process {pid} does not own socket {socket_id}"
1544            )));
1545        }
1546
1547        self.sockets
1548            .find_bound_inet_socket(SocketSpec::tcp(), &target_address)
1549            .ok_or_else(|| {
1550                KernelError::new(
1551                    "ECONNREFUSED",
1552                    format!(
1553                        "no listening socket bound at {}:{}",
1554                        target_address.host(),
1555                        target_address.port()
1556                    ),
1557                )
1558            })?;
1559
1560        let mut snapshot = self.resource_snapshot();
1561        self.resources.check_socket_allocation(&snapshot)?;
1562        for current_state in [existing.state(), SocketState::Created] {
1563            self.resources.check_socket_state_transition(
1564                &snapshot,
1565                current_state,
1566                SocketState::Connected,
1567            )?;
1568            if !current_state.counts_as_connection() {
1569                snapshot.socket_connections = snapshot.socket_connections.saturating_add(1);
1570            }
1571        }
1572
1573        self.sockets
1574            .connect_to_bound_inet_stream(socket_id, target_address)?;
1575        self.poll_notifier.notify();
1576        Ok(())
1577    }
1578
1579    pub fn socket_send_to_inet_loopback(
1580        &mut self,
1581        requester_driver: &str,
1582        pid: u32,
1583        socket_id: SocketId,
1584        target_address: InetSocketAddress,
1585        data: &[u8],
1586    ) -> KernelResult<usize> {
1587        self.assert_not_terminated()?;
1588        self.assert_driver_owns(requester_driver, pid)?;
1589        let existing = self
1590            .sockets
1591            .get(socket_id)
1592            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1593        if existing.owner_pid() != pid {
1594            return Err(KernelError::permission_denied(format!(
1595                "process {pid} does not own socket {socket_id}"
1596            )));
1597        }
1598
1599        self.sockets
1600            .check_send_to_bound_udp_socket(socket_id, target_address.clone())?;
1601        self.resources
1602            .check_socket_datagram_enqueue(&self.resource_snapshot(), data.len())?;
1603        let written = self
1604            .sockets
1605            .send_to_bound_udp_socket(socket_id, target_address, data)?;
1606        if written > 0 {
1607            self.poll_notifier.notify();
1608        }
1609        Ok(written)
1610    }
1611
1612    pub fn socket_recv_datagram(
1613        &mut self,
1614        requester_driver: &str,
1615        pid: u32,
1616        socket_id: SocketId,
1617        max_bytes: usize,
1618    ) -> KernelResult<Option<ReceivedDatagram>> {
1619        self.assert_not_terminated()?;
1620        self.assert_driver_owns(requester_driver, pid)?;
1621        let existing = self
1622            .sockets
1623            .get(socket_id)
1624            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1625        if existing.owner_pid() != pid {
1626            return Err(KernelError::permission_denied(format!(
1627                "process {pid} does not own socket {socket_id}"
1628            )));
1629        }
1630
1631        let result = self.sockets.recv_datagram(socket_id, max_bytes)?;
1632        if result.is_some() {
1633            self.poll_notifier.notify();
1634        }
1635        Ok(result)
1636    }
1637
1638    pub fn socket_set_datagram_option(
1639        &mut self,
1640        requester_driver: &str,
1641        pid: u32,
1642        socket_id: SocketId,
1643        option: DatagramSocketOption,
1644        enabled: bool,
1645    ) -> KernelResult<()> {
1646        self.assert_not_terminated()?;
1647        self.assert_driver_owns(requester_driver, pid)?;
1648        let existing = self
1649            .sockets
1650            .get(socket_id)
1651            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1652        if existing.owner_pid() != pid {
1653            return Err(KernelError::permission_denied(format!(
1654                "process {pid} does not own socket {socket_id}"
1655            )));
1656        }
1657
1658        self.sockets
1659            .set_datagram_socket_option(socket_id, option, enabled)?;
1660        self.poll_notifier.notify();
1661        Ok(())
1662    }
1663
1664    pub fn socket_add_membership(
1665        &mut self,
1666        requester_driver: &str,
1667        pid: u32,
1668        socket_id: SocketId,
1669        membership: SocketMulticastMembership,
1670    ) -> KernelResult<()> {
1671        self.assert_not_terminated()?;
1672        self.assert_driver_owns(requester_driver, pid)?;
1673        let existing = self
1674            .sockets
1675            .get(socket_id)
1676            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1677        if existing.owner_pid() != pid {
1678            return Err(KernelError::permission_denied(format!(
1679                "process {pid} does not own socket {socket_id}"
1680            )));
1681        }
1682
1683        self.sockets
1684            .add_multicast_membership(socket_id, membership)?;
1685        self.poll_notifier.notify();
1686        Ok(())
1687    }
1688
1689    pub fn socket_drop_membership(
1690        &mut self,
1691        requester_driver: &str,
1692        pid: u32,
1693        socket_id: SocketId,
1694        membership: SocketMulticastMembership,
1695    ) -> KernelResult<()> {
1696        self.assert_not_terminated()?;
1697        self.assert_driver_owns(requester_driver, pid)?;
1698        let existing = self
1699            .sockets
1700            .get(socket_id)
1701            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1702        if existing.owner_pid() != pid {
1703            return Err(KernelError::permission_denied(format!(
1704                "process {pid} does not own socket {socket_id}"
1705            )));
1706        }
1707
1708        self.sockets
1709            .drop_multicast_membership(socket_id, membership)?;
1710        self.poll_notifier.notify();
1711        Ok(())
1712    }
1713
1714    pub fn socket_set_state(
1715        &mut self,
1716        requester_driver: &str,
1717        pid: u32,
1718        socket_id: SocketId,
1719        state: SocketState,
1720    ) -> KernelResult<()> {
1721        self.assert_not_terminated()?;
1722        self.assert_driver_owns(requester_driver, pid)?;
1723        let existing = self
1724            .sockets
1725            .get(socket_id)
1726            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1727        if existing.owner_pid() != pid {
1728            return Err(KernelError::permission_denied(format!(
1729                "process {pid} does not own socket {socket_id}"
1730            )));
1731        }
1732
1733        self.resources.check_socket_state_transition(
1734            &self.resource_snapshot(),
1735            existing.state(),
1736            state,
1737        )?;
1738        self.sockets.update_state(socket_id, state)?;
1739        self.poll_notifier.notify();
1740        Ok(())
1741    }
1742
1743    pub fn socket_write(
1744        &mut self,
1745        requester_driver: &str,
1746        pid: u32,
1747        socket_id: SocketId,
1748        data: &[u8],
1749    ) -> KernelResult<usize> {
1750        self.assert_not_terminated()?;
1751        self.assert_driver_owns(requester_driver, pid)?;
1752        let existing = self
1753            .sockets
1754            .get(socket_id)
1755            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1756        if existing.owner_pid() != pid {
1757            return Err(KernelError::permission_denied(format!(
1758                "process {pid} does not own socket {socket_id}"
1759            )));
1760        }
1761
1762        self.sockets.check_write(socket_id)?;
1763        self.resources
1764            .check_socket_buffer_growth(&self.resource_snapshot(), data.len())?;
1765        let written = self.sockets.write(socket_id, data)?;
1766        if written > 0 {
1767            self.poll_notifier.notify();
1768        }
1769        Ok(written)
1770    }
1771
1772    pub fn socket_read(
1773        &mut self,
1774        requester_driver: &str,
1775        pid: u32,
1776        socket_id: SocketId,
1777        max_bytes: usize,
1778    ) -> KernelResult<Option<Vec<u8>>> {
1779        self.assert_not_terminated()?;
1780        self.assert_driver_owns(requester_driver, pid)?;
1781        let existing = self
1782            .sockets
1783            .get(socket_id)
1784            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1785        if existing.owner_pid() != pid {
1786            return Err(KernelError::permission_denied(format!(
1787                "process {pid} does not own socket {socket_id}"
1788            )));
1789        }
1790
1791        let result = self.sockets.read(socket_id, max_bytes)?;
1792        if result.is_some() {
1793            self.poll_notifier.notify();
1794        }
1795        Ok(result)
1796    }
1797
1798    pub fn socket_shutdown(
1799        &mut self,
1800        requester_driver: &str,
1801        pid: u32,
1802        socket_id: SocketId,
1803        how: SocketShutdown,
1804    ) -> KernelResult<()> {
1805        self.assert_not_terminated()?;
1806        self.assert_driver_owns(requester_driver, pid)?;
1807        let existing = self
1808            .sockets
1809            .get(socket_id)
1810            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1811        if existing.owner_pid() != pid {
1812            return Err(KernelError::permission_denied(format!(
1813                "process {pid} does not own socket {socket_id}"
1814            )));
1815        }
1816
1817        self.sockets.shutdown(socket_id, how)?;
1818        self.poll_notifier.notify();
1819        Ok(())
1820    }
1821
1822    pub fn socket_close(
1823        &mut self,
1824        requester_driver: &str,
1825        pid: u32,
1826        socket_id: SocketId,
1827    ) -> KernelResult<()> {
1828        self.assert_not_terminated()?;
1829        self.assert_driver_owns(requester_driver, pid)?;
1830        let existing = self
1831            .sockets
1832            .get(socket_id)
1833            .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1834        if existing.owner_pid() != pid {
1835            return Err(KernelError::permission_denied(format!(
1836                "process {pid} does not own socket {socket_id}"
1837            )));
1838        }
1839
1840        self.sockets.remove(socket_id)?;
1841        self.poll_notifier.notify();
1842        Ok(())
1843    }
1844
1845    pub fn fd_open(
1846        &mut self,
1847        requester_driver: &str,
1848        pid: u32,
1849        path: &str,
1850        flags: u32,
1851        mode: Option<u32>,
1852    ) -> KernelResult<u32> {
1853        self.assert_not_terminated()?;
1854        self.assert_driver_owns(requester_driver, pid)?;
1855        if let Some(existing_fd) = parse_dev_fd_path(path)? {
1856            {
1857                let tables = lock_or_recover(&self.fd_tables);
1858                let table = tables
1859                    .get(pid)
1860                    .ok_or_else(|| KernelError::no_such_process(pid))?;
1861                table
1862                    .get(existing_fd)
1863                    .ok_or_else(|| KernelError::bad_file_descriptor(existing_fd))?;
1864            }
1865            self.resources
1866                .check_fd_allocation(&self.resource_snapshot(), 1)?;
1867            let mut tables = lock_or_recover(&self.fd_tables);
1868            let table = tables
1869                .get_mut(pid)
1870                .ok_or_else(|| KernelError::no_such_process(pid))?;
1871            let entry = table
1872                .get(existing_fd)
1873                .cloned()
1874                .ok_or_else(|| KernelError::bad_file_descriptor(existing_fd))?;
1875            return Ok(table.dup_with_status_flags(
1876                existing_fd,
1877                Some(entry.status_flags | (flags & O_NONBLOCK)),
1878            )?);
1879        }
1880
1881        if let Some(proc_node) = self.resolve_proc_node(path, Some(pid))? {
1882            if open_requires_write_access(flags) {
1883                self.filesystem
1884                    .check_virtual_path(FsOperation::Write, path)
1885                    .map_err(KernelError::from)?;
1886                return Err(read_only_filesystem_error(path));
1887            }
1888
1889            if matches!(
1890                proc_node,
1891                ProcNode::SelfLink { .. }
1892                    | ProcNode::PidCwdLink { .. }
1893                    | ProcNode::PidFdLink { .. }
1894            ) {
1895                let target = self.proc_symlink_target(&proc_node)?;
1896                return self.fd_open(requester_driver, pid, &target, flags, mode);
1897            }
1898
1899            self.filesystem
1900                .check_virtual_path(FsOperation::Read, path)
1901                .map_err(KernelError::from)?;
1902            self.resources
1903                .check_fd_allocation(&self.resource_snapshot(), 1)?;
1904            let mut tables = lock_or_recover(&self.fd_tables);
1905            let table = tables
1906                .get_mut(pid)
1907                .ok_or_else(|| KernelError::no_such_process(pid))?;
1908            return Ok(table.open_with_details(
1909                &self.proc_canonical_path(&proc_node),
1910                flags,
1911                proc_filetype(&proc_node),
1912                None,
1913            )?);
1914        }
1915
1916        if open_requires_write_access(flags) {
1917            self.reject_read_only_resolved_write_path(path)?;
1918        }
1919        let existed = if flags & O_CREAT != 0 {
1920            self.exists_internal(Some(pid), path)?
1921        } else {
1922            false
1923        };
1924        let (filetype, lock_target) = self.prepare_fd_open(path, flags, mode)?;
1925        if flags & O_CREAT != 0 && !existed {
1926            let umask = self.processes.get_umask(pid)?;
1927            self.apply_creation_mode(path, mode.unwrap_or(0o666), umask)?;
1928        }
1929        self.resources
1930            .check_fd_allocation(&self.resource_snapshot(), 1)?;
1931        let mut tables = lock_or_recover(&self.fd_tables);
1932        let table = tables
1933            .get_mut(pid)
1934            .ok_or_else(|| KernelError::no_such_process(pid))?;
1935        Ok(table.open_with_details(path, flags, filetype, lock_target)?)
1936    }
1937
1938    pub fn fd_read(
1939        &mut self,
1940        requester_driver: &str,
1941        pid: u32,
1942        fd: u32,
1943        length: usize,
1944    ) -> KernelResult<Vec<u8>> {
1945        Ok(self
1946            .fd_read_with_timeout_result(requester_driver, pid, fd, length, None)?
1947            .unwrap_or_default())
1948    }
1949
1950    pub fn fd_read_with_timeout_result(
1951        &mut self,
1952        requester_driver: &str,
1953        pid: u32,
1954        fd: u32,
1955        length: usize,
1956        timeout: Option<Duration>,
1957    ) -> KernelResult<Option<Vec<u8>>> {
1958        self.assert_driver_owns(requester_driver, pid)?;
1959        let entry = {
1960            let tables = lock_or_recover(&self.fd_tables);
1961            tables
1962                .get(pid)
1963                .and_then(|table| table.get(fd))
1964                .cloned()
1965                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
1966        };
1967
1968        if self.pipes.is_pipe(entry.description.id()) {
1969            return Ok(self.pipes.read_with_timeout(
1970                entry.description.id(),
1971                length,
1972                if entry.status_flags & O_NONBLOCK != 0 {
1973                    Some(Duration::ZERO)
1974                } else {
1975                    timeout.or_else(|| self.blocking_read_timeout())
1976                },
1977            )?);
1978        }
1979
1980        if self.ptys.is_pty(entry.description.id()) {
1981            return Ok(self.ptys.read_with_timeout(
1982                entry.description.id(),
1983                length,
1984                if entry.status_flags & O_NONBLOCK != 0 {
1985                    Some(Duration::ZERO)
1986                } else {
1987                    timeout.or_else(|| self.blocking_read_timeout())
1988                },
1989            )?);
1990        }
1991
1992        self.resources.check_pread_length(length)?;
1993
1994        if is_proc_path(entry.description.path()) {
1995            let bytes = self.proc_read_file_from_open_path(Some(pid), entry.description.path())?;
1996            let start = entry.description.cursor() as usize;
1997            let end = start.saturating_add(length).min(bytes.len());
1998            let chunk = if start >= bytes.len() {
1999                Vec::new()
2000            } else {
2001                bytes[start..end].to_vec()
2002            };
2003            entry.description.set_cursor(
2004                entry
2005                    .description
2006                    .cursor()
2007                    .saturating_add(chunk.len() as u64),
2008            );
2009            return Ok(Some(chunk));
2010        }
2011
2012        let cursor = entry.description.cursor();
2013        let bytes = VirtualFileSystem::pread(
2014            &mut self.filesystem,
2015            entry.description.path(),
2016            cursor,
2017            length,
2018        )?;
2019        entry
2020            .description
2021            .set_cursor(cursor.saturating_add(bytes.len() as u64));
2022        Ok(Some(bytes))
2023    }
2024
2025    pub fn fd_write(
2026        &mut self,
2027        requester_driver: &str,
2028        pid: u32,
2029        fd: u32,
2030        data: &[u8],
2031    ) -> KernelResult<usize> {
2032        self.assert_driver_owns(requester_driver, pid)?;
2033        self.resources.check_fd_write_size(data.len())?;
2034        let entry = {
2035            let tables = lock_or_recover(&self.fd_tables);
2036            tables
2037                .get(pid)
2038                .and_then(|table| table.get(fd))
2039                .cloned()
2040                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2041        };
2042
2043        if self.pipes.is_pipe(entry.description.id()) {
2044            return match self.pipes.write_with_mode(
2045                entry.description.id(),
2046                data,
2047                entry.status_flags & O_NONBLOCK != 0,
2048            ) {
2049                Ok(bytes) => Ok(bytes),
2050                Err(error) => {
2051                    if error.code() == "EPIPE" {
2052                        self.processes.kill(pid as i32, SIGPIPE)?;
2053                    }
2054                    Err(error.into())
2055                }
2056            };
2057        }
2058
2059        if self.ptys.is_pty(entry.description.id()) {
2060            return Ok(self.ptys.write(entry.description.id(), data)?);
2061        }
2062
2063        self.reject_read_only_resolved_write_path(entry.description.path())?;
2064
2065        let path = entry.description.path().to_owned();
2066        if is_virtual_device_storage_path(&path) {
2067            VirtualFileSystem::write_file(&mut self.filesystem, &path, data.to_vec())?;
2068            let cursor = entry.description.cursor();
2069            entry
2070                .description
2071                .set_cursor(cursor.saturating_add(data.len() as u64));
2072            return Ok(data.len());
2073        }
2074        let current_size = self.current_storage_file_size(&path)?;
2075        let cursor = entry.description.cursor();
2076        if entry.description.flags() & O_APPEND != 0 {
2077            let required_size = current_size.max(checked_write_end(current_size, data.len())?);
2078            self.check_path_resize_limits(&path, required_size)?;
2079            let new_len = VirtualFileSystem::append_file(&mut self.filesystem, &path, data)?;
2080            entry.description.set_cursor(new_len);
2081            return Ok(data.len());
2082        }
2083
2084        let required_size = current_size.max(checked_write_end(cursor, data.len())?);
2085        self.check_path_resize_limits(&path, required_size)?;
2086        VirtualFileSystem::pwrite(&mut self.filesystem, &path, data, cursor)?;
2087        entry
2088            .description
2089            .set_cursor(cursor.saturating_add(data.len() as u64));
2090        Ok(data.len())
2091    }
2092
2093    pub fn poll_fds(
2094        &self,
2095        requester_driver: &str,
2096        pid: u32,
2097        fds: Vec<PollFd>,
2098        timeout_ms: i32,
2099    ) -> KernelResult<PollResult> {
2100        let targets = fds
2101            .into_iter()
2102            .map(|poll_fd| PollTargetEntry::fd(poll_fd.fd, poll_fd.events))
2103            .collect::<Vec<_>>();
2104        let result = self.poll_targets(requester_driver, pid, targets, timeout_ms)?;
2105        Ok(PollResult {
2106            ready_count: result.ready_count,
2107            fds: result
2108                .targets
2109                .into_iter()
2110                .map(|target| match target.target {
2111                    PollTarget::Fd(fd) => PollFd {
2112                        fd,
2113                        events: target.events,
2114                        revents: target.revents,
2115                    },
2116                    PollTarget::Socket(_) => unreachable!("fd poll should only include fd targets"),
2117                })
2118                .collect(),
2119        })
2120    }
2121
2122    pub fn poll_targets(
2123        &self,
2124        requester_driver: &str,
2125        pid: u32,
2126        mut targets: Vec<PollTargetEntry>,
2127        timeout_ms: i32,
2128    ) -> KernelResult<PollTargetResult> {
2129        self.assert_driver_owns(requester_driver, pid)?;
2130        if timeout_ms < -1 {
2131            return Err(KernelError::new(
2132                "EINVAL",
2133                format!("invalid poll timeout {timeout_ms}"),
2134            ));
2135        }
2136
2137        let timeout = if timeout_ms < 0 {
2138            None
2139        } else {
2140            Some(Duration::from_millis(timeout_ms as u64))
2141        };
2142        let deadline = timeout.map(|duration| Instant::now() + duration);
2143
2144        loop {
2145            let observed_generation = self.poll_notifier.snapshot();
2146            let ready_count = self.populate_poll_target_revents(pid, &mut targets)?;
2147            if ready_count > 0 || matches!(timeout, Some(duration) if duration.is_zero()) {
2148                return Ok(PollTargetResult {
2149                    ready_count,
2150                    targets,
2151                });
2152            }
2153
2154            let remaining = deadline.map(|target| target.saturating_duration_since(Instant::now()));
2155            if matches!(remaining, Some(duration) if duration.is_zero()) {
2156                return Ok(PollTargetResult {
2157                    ready_count,
2158                    targets,
2159                });
2160            }
2161
2162            if !self
2163                .poll_notifier
2164                .wait_for_change(observed_generation, remaining)
2165            {
2166                return Ok(PollTargetResult {
2167                    ready_count,
2168                    targets,
2169                });
2170            }
2171        }
2172    }
2173
2174    pub fn fd_seek(
2175        &mut self,
2176        requester_driver: &str,
2177        pid: u32,
2178        fd: u32,
2179        offset: i64,
2180        whence: u8,
2181    ) -> KernelResult<u64> {
2182        self.assert_driver_owns(requester_driver, pid)?;
2183        let entry = {
2184            let tables = lock_or_recover(&self.fd_tables);
2185            tables
2186                .get(pid)
2187                .and_then(|table| table.get(fd))
2188                .cloned()
2189                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2190        };
2191
2192        if self.pipes.is_pipe(entry.description.id()) || self.ptys.is_pty(entry.description.id()) {
2193            return Err(KernelError::new("ESPIPE", "illegal seek"));
2194        }
2195
2196        let base = match whence {
2197            SEEK_SET => 0_i128,
2198            SEEK_CUR => i128::from(entry.description.cursor()),
2199            SEEK_END => {
2200                let size = if is_proc_path(entry.description.path()) {
2201                    self.proc_stat_from_open_path(Some(pid), entry.description.path())?
2202                        .size
2203                } else {
2204                    self.filesystem.stat(entry.description.path())?.size
2205                };
2206                i128::from(size)
2207            }
2208            _ => {
2209                return Err(KernelError::new(
2210                    "EINVAL",
2211                    format!("invalid whence {whence}"),
2212                ))
2213            }
2214        };
2215        let next = base + i128::from(offset);
2216        if next < 0 {
2217            return Err(KernelError::new("EINVAL", "negative seek position"));
2218        }
2219        let next = u64::try_from(next)
2220            .map_err(|_| KernelError::new("EINVAL", "seek position out of range"))?;
2221        entry.description.set_cursor(next);
2222        Ok(next)
2223    }
2224
2225    pub fn fd_pread(
2226        &mut self,
2227        requester_driver: &str,
2228        pid: u32,
2229        fd: u32,
2230        length: usize,
2231        offset: u64,
2232    ) -> KernelResult<Vec<u8>> {
2233        self.assert_driver_owns(requester_driver, pid)?;
2234        self.resources.check_pread_length(length)?;
2235        let entry = {
2236            let tables = lock_or_recover(&self.fd_tables);
2237            tables
2238                .get(pid)
2239                .and_then(|table| table.get(fd))
2240                .cloned()
2241                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2242        };
2243
2244        if self.pipes.is_pipe(entry.description.id()) || self.ptys.is_pty(entry.description.id()) {
2245            return Err(KernelError::new("ESPIPE", "illegal seek"));
2246        }
2247
2248        if is_proc_path(entry.description.path()) {
2249            let bytes = self.proc_read_file_from_open_path(Some(pid), entry.description.path())?;
2250            let start = usize::try_from(offset)
2251                .map_err(|_| KernelError::new("EINVAL", "pread offset out of range"))?;
2252            let end = start.saturating_add(length).min(bytes.len());
2253            return Ok(if start >= bytes.len() {
2254                Vec::new()
2255            } else {
2256                bytes[start..end].to_vec()
2257            });
2258        }
2259
2260        Ok(VirtualFileSystem::pread(
2261            &mut self.filesystem,
2262            entry.description.path(),
2263            offset,
2264            length,
2265        )?)
2266    }
2267
2268    pub fn fd_pwrite(
2269        &mut self,
2270        requester_driver: &str,
2271        pid: u32,
2272        fd: u32,
2273        data: &[u8],
2274        offset: u64,
2275    ) -> KernelResult<usize> {
2276        self.assert_driver_owns(requester_driver, pid)?;
2277        self.resources.check_fd_write_size(data.len())?;
2278        let entry = {
2279            let tables = lock_or_recover(&self.fd_tables);
2280            tables
2281                .get(pid)
2282                .and_then(|table| table.get(fd))
2283                .cloned()
2284                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2285        };
2286
2287        if self.pipes.is_pipe(entry.description.id()) || self.ptys.is_pty(entry.description.id()) {
2288            return Err(KernelError::new("ESPIPE", "illegal seek"));
2289        }
2290
2291        self.reject_read_only_resolved_write_path(entry.description.path())?;
2292
2293        let required_size = self
2294            .current_storage_file_size(entry.description.path())?
2295            .max(checked_write_end(offset, data.len())?);
2296        self.check_path_resize_limits(entry.description.path(), required_size)?;
2297        VirtualFileSystem::pwrite(
2298            &mut self.filesystem,
2299            entry.description.path(),
2300            data.to_vec(),
2301            offset,
2302        )?;
2303        Ok(data.len())
2304    }
2305
2306    pub fn fd_dup(&mut self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<u32> {
2307        self.assert_driver_owns(requester_driver, pid)?;
2308        {
2309            let tables = lock_or_recover(&self.fd_tables);
2310            let table = tables
2311                .get(pid)
2312                .ok_or_else(|| KernelError::no_such_process(pid))?;
2313            table
2314                .get(fd)
2315                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?;
2316        }
2317        self.resources
2318            .check_fd_allocation(&self.resource_snapshot(), 1)?;
2319        let mut tables = lock_or_recover(&self.fd_tables);
2320        let table = tables
2321            .get_mut(pid)
2322            .ok_or_else(|| KernelError::no_such_process(pid))?;
2323        Ok(table.dup(fd)?)
2324    }
2325
2326    pub fn fd_dup2(
2327        &mut self,
2328        requester_driver: &str,
2329        pid: u32,
2330        old_fd: u32,
2331        new_fd: u32,
2332    ) -> KernelResult<()> {
2333        self.assert_driver_owns(requester_driver, pid)?;
2334        let (replaced, needs_fd_growth) = {
2335            let tables = lock_or_recover(&self.fd_tables);
2336            let table = tables
2337                .get(pid)
2338                .ok_or_else(|| KernelError::no_such_process(pid))?;
2339            table
2340                .get(old_fd)
2341                .ok_or_else(|| KernelError::bad_file_descriptor(old_fd))?;
2342            let replaced = if old_fd == new_fd {
2343                None
2344            } else {
2345                table.get(new_fd).cloned()
2346            };
2347            if new_fd as usize >= table.max_fds() {
2348                return Err(KernelError::bad_file_descriptor(new_fd));
2349            }
2350            let needs_fd_growth = old_fd != new_fd && replaced.is_none();
2351            (replaced, needs_fd_growth)
2352        };
2353        if needs_fd_growth {
2354            self.resources
2355                .check_fd_allocation(&self.resource_snapshot(), 1)?;
2356        }
2357        {
2358            let mut tables = lock_or_recover(&self.fd_tables);
2359            let table = tables
2360                .get_mut(pid)
2361                .ok_or_else(|| KernelError::no_such_process(pid))?;
2362            table.dup2(old_fd, new_fd)?;
2363        }
2364
2365        if let Some(entry) = replaced {
2366            self.close_special_resource_if_needed(&entry.description, entry.filetype);
2367        }
2368        Ok(())
2369    }
2370
2371    pub fn fd_close(&mut self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<()> {
2372        self.assert_driver_owns(requester_driver, pid)?;
2373        let (description, filetype) = {
2374            let mut tables = lock_or_recover(&self.fd_tables);
2375            let table = tables
2376                .get_mut(pid)
2377                .ok_or_else(|| KernelError::no_such_process(pid))?;
2378            let entry = table
2379                .get(fd)
2380                .cloned()
2381                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?;
2382            table.close(fd);
2383            (entry.description, entry.filetype)
2384        };
2385        self.close_special_resource_if_needed(&description, filetype);
2386        Ok(())
2387    }
2388
2389    pub fn fd_fcntl(
2390        &mut self,
2391        requester_driver: &str,
2392        pid: u32,
2393        fd: u32,
2394        command: u32,
2395        arg: u32,
2396    ) -> KernelResult<u32> {
2397        self.assert_driver_owns(requester_driver, pid)?;
2398        if command == F_DUPFD {
2399            {
2400                let tables = lock_or_recover(&self.fd_tables);
2401                let table = tables
2402                    .get(pid)
2403                    .ok_or_else(|| KernelError::no_such_process(pid))?;
2404                table
2405                    .get(fd)
2406                    .ok_or_else(|| KernelError::bad_file_descriptor(fd))?;
2407                if arg as usize >= table.max_fds() {
2408                    return Err(KernelError::new(
2409                        "EINVAL",
2410                        format!("fd {arg} exceeds process fd limit"),
2411                    ));
2412                }
2413            }
2414            self.resources
2415                .check_fd_allocation(&self.resource_snapshot(), 1)?;
2416        }
2417        let mut tables = lock_or_recover(&self.fd_tables);
2418        let table = tables
2419            .get_mut(pid)
2420            .ok_or_else(|| KernelError::no_such_process(pid))?;
2421        let result = table.fcntl(fd, command, arg)?;
2422        if command == F_DUPFD {
2423            self.poll_notifier.notify();
2424        }
2425        Ok(result)
2426    }
2427
2428    pub fn fd_flock(
2429        &self,
2430        requester_driver: &str,
2431        pid: u32,
2432        fd: u32,
2433        operation: u32,
2434    ) -> KernelResult<()> {
2435        self.assert_driver_owns(requester_driver, pid)?;
2436        let entry = {
2437            let tables = lock_or_recover(&self.fd_tables);
2438            tables
2439                .get(pid)
2440                .and_then(|table| table.get(fd))
2441                .cloned()
2442                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2443        };
2444
2445        if entry.filetype != FILETYPE_REGULAR_FILE {
2446            return Err(KernelError::new(
2447                "EBADF",
2448                format!("file descriptor {fd} does not support advisory locking"),
2449            ));
2450        }
2451
2452        let target = entry.description.lock_target().ok_or_else(|| {
2453            KernelError::new(
2454                "EBADF",
2455                format!("file descriptor {fd} is missing advisory lock metadata"),
2456            )
2457        })?;
2458        let operation = FlockOperation::from_bits(operation)?;
2459        self.file_locks
2460            .apply(entry.description.id(), target, operation)?;
2461        Ok(())
2462    }
2463
2464    pub fn fd_stat(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<FdStat> {
2465        self.assert_driver_owns(requester_driver, pid)?;
2466        let tables = lock_or_recover(&self.fd_tables);
2467        Ok(tables
2468            .get(pid)
2469            .ok_or_else(|| KernelError::no_such_process(pid))?
2470            .stat(fd)?)
2471    }
2472
2473    pub fn fd_path(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<String> {
2474        let description = self.description_for_fd(requester_driver, pid, fd)?;
2475        Ok(description.path().to_owned())
2476    }
2477
2478    pub fn isatty(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<bool> {
2479        self.assert_driver_owns(requester_driver, pid)?;
2480        let entry = {
2481            let tables = lock_or_recover(&self.fd_tables);
2482            tables
2483                .get(pid)
2484                .and_then(|table| table.get(fd))
2485                .cloned()
2486                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2487        };
2488        Ok(self.ptys.is_slave(entry.description.id()))
2489    }
2490
2491    pub fn pty_set_discipline(
2492        &self,
2493        requester_driver: &str,
2494        pid: u32,
2495        fd: u32,
2496        config: LineDisciplineConfig,
2497    ) -> KernelResult<()> {
2498        let description = self.description_for_fd(requester_driver, pid, fd)?;
2499        self.ptys.set_discipline(description.id(), config)?;
2500        Ok(())
2501    }
2502
2503    pub fn pty_set_foreground_pgid(
2504        &self,
2505        requester_driver: &str,
2506        pid: u32,
2507        fd: u32,
2508        pgid: u32,
2509    ) -> KernelResult<()> {
2510        let description = self.description_for_fd(requester_driver, pid, fd)?;
2511        let requester_sid = self.processes.getsid(pid)?;
2512        let group = self
2513            .processes
2514            .list_processes()
2515            .into_values()
2516            .find(|process| process.pgid == pgid && process.status != ProcessStatus::Exited)
2517            .ok_or_else(|| KernelError::new("ESRCH", format!("no such process group {pgid}")))?;
2518        if group.sid != requester_sid {
2519            return Err(KernelError::permission_denied(
2520                "cannot set foreground process group in different session",
2521            ));
2522        }
2523        self.ptys.set_foreground_pgid(description.id(), pgid)?;
2524        Ok(())
2525    }
2526
2527    pub fn tcgetattr(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<Termios> {
2528        let description = self.description_for_fd(requester_driver, pid, fd)?;
2529        Ok(self.ptys.get_termios(description.id())?)
2530    }
2531
2532    pub fn tcsetattr(
2533        &self,
2534        requester_driver: &str,
2535        pid: u32,
2536        fd: u32,
2537        termios: PartialTermios,
2538    ) -> KernelResult<()> {
2539        let description = self.description_for_fd(requester_driver, pid, fd)?;
2540        self.ptys.set_termios(description.id(), termios)?;
2541        Ok(())
2542    }
2543
2544    pub fn tcgetpgrp(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<u32> {
2545        let description = self.description_for_fd(requester_driver, pid, fd)?;
2546        Ok(self.ptys.get_foreground_pgid(description.id())?)
2547    }
2548
2549    pub fn pty_resize(
2550        &self,
2551        requester_driver: &str,
2552        pid: u32,
2553        fd: u32,
2554        cols: u16,
2555        rows: u16,
2556    ) -> KernelResult<()> {
2557        let description = self.description_for_fd(requester_driver, pid, fd)?;
2558        let target_pgid = self.ptys.resize(description.id(), cols, rows)?;
2559        if let Some(pgid) = target_pgid {
2560            match self.processes.kill(-(pgid as i32), SIGWINCH) {
2561                Ok(()) => {}
2562                Err(error) if error.code() == "ESRCH" => {}
2563                Err(error) => return Err(error.into()),
2564            }
2565        }
2566        Ok(())
2567    }
2568
2569    pub fn signal_process(
2570        &self,
2571        requester_driver: &str,
2572        pid: i32,
2573        signal: i32,
2574    ) -> KernelResult<()> {
2575        if pid < 0 {
2576            let pgid = pid.unsigned_abs();
2577            let members = self
2578                .processes
2579                .list_processes()
2580                .into_values()
2581                .filter(|process| process.pgid == pgid && process.status != ProcessStatus::Exited)
2582                .collect::<Vec<_>>();
2583            if members.is_empty() {
2584                self.processes.kill(pid, signal)?;
2585                return Ok(());
2586            }
2587            if let Some(process) = members
2588                .iter()
2589                .find(|process| process.driver != requester_driver)
2590            {
2591                return Err(KernelError::permission_denied(format!(
2592                    "driver \"{requester_driver}\" does not own process group {pgid} containing PID {}",
2593                    process.pid
2594                )));
2595            }
2596            self.processes.kill(pid, signal)?;
2597            return Ok(());
2598        }
2599
2600        let pid = u32::try_from(pid)
2601            .map_err(|_| KernelError::new("EINVAL", format!("invalid pid {pid}")))?;
2602        self.assert_driver_owns(requester_driver, pid)?;
2603        self.processes.kill(pid as i32, signal)?;
2604        Ok(())
2605    }
2606
2607    pub fn kill_process(&self, requester_driver: &str, pid: u32, signal: i32) -> KernelResult<()> {
2608        let pid = i32::try_from(pid)
2609            .map_err(|_| KernelError::new("EINVAL", format!("pid {pid} exceeds i32::MAX")))?;
2610        self.signal_process(requester_driver, pid, signal)
2611    }
2612
2613    pub fn setpgid(&self, requester_driver: &str, pid: u32, pgid: u32) -> KernelResult<()> {
2614        self.assert_driver_owns(requester_driver, pid)?;
2615        let target_pgid = if pgid == 0 { pid } else { pgid };
2616        if target_pgid != pid {
2617            if let Some(group_owner) =
2618                self.processes
2619                    .list_processes()
2620                    .into_values()
2621                    .find(|process| {
2622                        process.pgid == target_pgid && process.status == ProcessStatus::Running
2623                    })
2624            {
2625                if group_owner.driver != requester_driver {
2626                    return Err(KernelError::permission_denied(format!(
2627                        "driver \"{requester_driver}\" cannot join process group {target_pgid} owned by \"{}\"",
2628                        group_owner.driver
2629                    )));
2630                }
2631            }
2632        }
2633        self.processes.setpgid(pid, pgid)?;
2634        Ok(())
2635    }
2636
2637    pub fn getpgid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2638        self.assert_driver_owns(requester_driver, pid)?;
2639        Ok(self.processes.getpgid(pid)?)
2640    }
2641
2642    pub fn getpid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2643        self.assert_driver_owns(requester_driver, pid)?;
2644        Ok(pid)
2645    }
2646
2647    pub fn sigprocmask(
2648        &self,
2649        requester_driver: &str,
2650        pid: u32,
2651        how: SigmaskHow,
2652        set: SignalSet,
2653    ) -> KernelResult<SignalSet> {
2654        self.assert_driver_owns(requester_driver, pid)?;
2655        Ok(self.processes.sigprocmask(pid, how, set)?)
2656    }
2657
2658    pub fn sigpending(&self, requester_driver: &str, pid: u32) -> KernelResult<SignalSet> {
2659        self.assert_driver_owns(requester_driver, pid)?;
2660        Ok(self.processes.sigpending(pid)?)
2661    }
2662
2663    pub fn getppid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2664        self.assert_driver_owns(requester_driver, pid)?;
2665        Ok(self.processes.getppid(pid)?)
2666    }
2667
2668    pub fn setsid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2669        self.assert_driver_owns(requester_driver, pid)?;
2670        Ok(self.processes.setsid(pid)?)
2671    }
2672
2673    pub fn getsid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2674        self.assert_driver_owns(requester_driver, pid)?;
2675        Ok(self.processes.getsid(pid)?)
2676    }
2677
2678    pub fn dev_fd_read_dir(&self, requester_driver: &str, pid: u32) -> KernelResult<Vec<String>> {
2679        self.assert_driver_owns(requester_driver, pid)?;
2680        let tables = lock_or_recover(&self.fd_tables);
2681        let table = tables
2682            .get(pid)
2683            .ok_or_else(|| KernelError::no_such_process(pid))?;
2684        let entry_count = table.len();
2685        self.resources.check_readdir_entries(entry_count)?;
2686        Ok(table.iter().map(|entry| entry.fd.to_string()).collect())
2687    }
2688
2689    pub fn dev_fd_stat(
2690        &mut self,
2691        requester_driver: &str,
2692        pid: u32,
2693        fd: u32,
2694    ) -> KernelResult<VirtualStat> {
2695        self.assert_driver_owns(requester_driver, pid)?;
2696        let entry = {
2697            let tables = lock_or_recover(&self.fd_tables);
2698            tables
2699                .get(pid)
2700                .and_then(|table| table.get(fd))
2701                .cloned()
2702                .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2703        };
2704
2705        if self.pipes.is_pipe(entry.description.id()) || self.ptys.is_pty(entry.description.id()) {
2706            return Ok(synthetic_character_device_stat(entry.description.id()));
2707        }
2708
2709        if is_proc_path(entry.description.path()) {
2710            return self.proc_stat_from_open_path(Some(pid), entry.description.path());
2711        }
2712
2713        Ok(self.filesystem.stat(entry.description.path())?)
2714    }
2715
2716    pub fn dispose(&mut self) -> KernelResult<()> {
2717        if self.terminated {
2718            return Ok(());
2719        }
2720
2721        dispose_kernel_vm_resources(self);
2722        Ok(())
2723    }
2724
2725    fn prepare_fd_open(
2726        &mut self,
2727        path: &str,
2728        flags: u32,
2729        mode: Option<u32>,
2730    ) -> KernelResult<(u8, Option<FileLockTarget>)> {
2731        if open_requires_write_access(flags) {
2732            self.reject_read_only_resolved_write_path(path)?;
2733        }
2734
2735        if flags & O_CREAT != 0 && flags & O_EXCL != 0 {
2736            self.check_write_file_limits(path, 0)?;
2737            VirtualFileSystem::create_file_exclusive_with_mode(
2738                &mut self.filesystem,
2739                path,
2740                Vec::new(),
2741                mode,
2742            )?;
2743            let stat = VirtualFileSystem::stat(&mut self.filesystem, path)?;
2744            return Ok((
2745                filetype_for_path(path, &stat),
2746                Some(FileLockTarget::new(stat.ino)),
2747            ));
2748        }
2749
2750        let exists = self.filesystem.exists(path)?;
2751        if exists {
2752            if flags & O_TRUNC != 0 {
2753                self.check_truncate_limits(path, 0)?;
2754                VirtualFileSystem::truncate(&mut self.filesystem, path, 0)?;
2755            }
2756        } else if flags & O_CREAT != 0 {
2757            self.check_write_file_limits(path, 0)?;
2758            VirtualFileSystem::write_file_with_mode(&mut self.filesystem, path, Vec::new(), mode)?;
2759        } else {
2760            let _ = VirtualFileSystem::stat(&mut self.filesystem, path)?;
2761            unreachable!("stat should return an error when opening a missing path");
2762        }
2763
2764        let stat = VirtualFileSystem::stat(&mut self.filesystem, path)?;
2765        Ok((
2766            filetype_for_path(path, &stat),
2767            Some(FileLockTarget::new(stat.ino)),
2768        ))
2769    }
2770
2771    fn reject_read_only_write_path(&mut self, path: &str) -> KernelResult<()> {
2772        if is_proc_path(path) {
2773            self.filesystem
2774                .check_virtual_path(FsOperation::Write, path)
2775                .map_err(KernelError::from)?;
2776            return Err(read_only_filesystem_error(path));
2777        }
2778
2779        if is_agentos_path(path) {
2780            return Err(read_only_filesystem_error(path));
2781        }
2782
2783        Ok(())
2784    }
2785
2786    fn reject_read_only_resolved_write_path(&mut self, path: &str) -> KernelResult<()> {
2787        self.reject_read_only_write_path(path)?;
2788
2789        if let Some(resolved) = self.resolve_write_guard_path(path, true)? {
2790            if is_agentos_path(&resolved) {
2791                return Err(read_only_filesystem_error(&resolved));
2792            }
2793            if self.has_agentos_hardlink_alias(&resolved)? {
2794                return Err(read_only_filesystem_error(&resolved));
2795            }
2796        }
2797        if self.has_agentos_hardlink_alias(path)? {
2798            return Err(read_only_filesystem_error(path));
2799        }
2800
2801        Ok(())
2802    }
2803
2804    fn reject_read_only_entry_write_path(&mut self, path: &str) -> KernelResult<()> {
2805        self.reject_read_only_write_path(path)?;
2806
2807        if let Some(resolved) = self.resolve_write_guard_path(path, false)? {
2808            if is_agentos_path(&resolved) {
2809                return Err(read_only_filesystem_error(&resolved));
2810            }
2811            if self.has_agentos_hardlink_alias(&resolved)? {
2812                return Err(read_only_filesystem_error(&resolved));
2813            }
2814        }
2815        if self.has_agentos_hardlink_alias(path)? {
2816            return Err(read_only_filesystem_error(path));
2817        }
2818
2819        Ok(())
2820    }
2821
2822    fn has_agentos_hardlink_alias(&mut self, path: &str) -> KernelResult<bool> {
2823        let Some(target) = self.storage_lstat(path)? else {
2824            return Ok(false);
2825        };
2826        if target.is_directory || target.is_symbolic_link {
2827            return Ok(false);
2828        }
2829
2830        self.agentos_subtree_contains_inode("/etc/agentos", target.dev, target.ino)
2831    }
2832
2833    fn agentos_subtree_contains_inode(
2834        &mut self,
2835        path: &str,
2836        target_dev: u64,
2837        target_ino: u64,
2838    ) -> KernelResult<bool> {
2839        let Some(stat) = self.storage_lstat(path)? else {
2840            return Ok(false);
2841        };
2842        if !stat.is_directory && !stat.is_symbolic_link {
2843            return Ok(stat.dev == target_dev && stat.ino == target_ino);
2844        }
2845        if !stat.is_directory {
2846            return Ok(false);
2847        }
2848
2849        let children = self.raw_filesystem_mut().read_dir_with_types(path)?;
2850        for child in children {
2851            if child.name == "." || child.name == ".." {
2852                continue;
2853            }
2854            let child_path = join_absolute_path(path, &child.name);
2855            if self.agentos_subtree_contains_inode(&child_path, target_dev, target_ino)? {
2856                return Ok(true);
2857            }
2858        }
2859
2860        Ok(false)
2861    }
2862
2863    fn resolve_write_guard_path(
2864        &mut self,
2865        path: &str,
2866        follow_final_symlink: bool,
2867    ) -> KernelResult<Option<String>> {
2868        let normalized = normalize_path(path);
2869        if normalized == "/" {
2870            return Ok(Some(normalized));
2871        }
2872
2873        if follow_final_symlink {
2874            if let Ok(resolved) = self.filesystem.realpath(&normalized) {
2875                return Ok(Some(resolved));
2876            }
2877        }
2878
2879        let components: Vec<&str> = normalized
2880            .split('/')
2881            .filter(|component| !component.is_empty())
2882            .collect();
2883        let mut resolved_prefix = String::from("/");
2884        let mut raw_prefix = String::from("/");
2885
2886        for (index, component) in components.iter().enumerate() {
2887            let is_final = index + 1 == components.len();
2888            if is_final && !follow_final_symlink {
2889                return Ok(Some(join_absolute_path(&resolved_prefix, component)));
2890            }
2891
2892            raw_prefix = join_absolute_path(&raw_prefix, component);
2893            match self.filesystem.realpath(&raw_prefix) {
2894                Ok(resolved) => {
2895                    resolved_prefix = resolved;
2896                }
2897                Err(error) if error.code() == "ENOENT" => {
2898                    let mut resolved = resolved_prefix;
2899                    for remaining in &components[index..] {
2900                        resolved = join_absolute_path(&resolved, remaining);
2901                    }
2902                    return Ok(Some(resolved));
2903                }
2904                Err(error) => return Err(error.into()),
2905            }
2906        }
2907
2908        Ok(Some(resolved_prefix))
2909    }
2910
2911    fn populate_poll_target_revents(
2912        &self,
2913        pid: u32,
2914        targets: &mut [PollTargetEntry],
2915    ) -> KernelResult<usize> {
2916        let mut ready_count = 0;
2917        for target in targets.iter_mut() {
2918            target.revents = self.poll_target_entry(pid, target.target, target.events)?;
2919            if !target.revents.is_empty() {
2920                ready_count += 1;
2921            }
2922        }
2923
2924        Ok(ready_count)
2925    }
2926
2927    fn poll_target_entry(
2928        &self,
2929        pid: u32,
2930        target: PollTarget,
2931        requested: PollEvents,
2932    ) -> KernelResult<PollEvents> {
2933        match target {
2934            PollTarget::Fd(fd) => {
2935                let entry = {
2936                    let tables = lock_or_recover(&self.fd_tables);
2937                    tables
2938                        .get(pid)
2939                        .ok_or_else(|| KernelError::no_such_process(pid))?
2940                        .get(fd)
2941                        .cloned()
2942                };
2943                if let Some(entry) = entry {
2944                    self.poll_entry(&entry, requested)
2945                } else {
2946                    Ok(POLLNVAL)
2947                }
2948            }
2949            PollTarget::Socket(socket_id) => {
2950                let socket = self.sockets.get(socket_id);
2951                if let Some(socket) = socket {
2952                    if socket.owner_pid() != pid {
2953                        return Err(KernelError::permission_denied(format!(
2954                            "process {pid} does not own socket {socket_id}"
2955                        )));
2956                    }
2957                    let mut events = self.sockets.poll(socket_id, requested)?;
2958                    if events.intersects(POLLOUT)
2959                        && !self.socket_pollout_has_resource_capacity(&socket)
2960                    {
2961                        events = PollEvents::from_bits(events.bits() & !POLLOUT.bits());
2962                    }
2963                    Ok(events)
2964                } else {
2965                    Ok(POLLNVAL)
2966                }
2967            }
2968        }
2969    }
2970
2971    fn socket_pollout_has_resource_capacity(&self, socket: &SocketRecord) -> bool {
2972        let snapshot = self.resource_snapshot();
2973        if self
2974            .resources
2975            .limits()
2976            .max_socket_buffered_bytes
2977            .is_some_and(|limit| snapshot.socket_buffered_bytes >= limit)
2978        {
2979            return false;
2980        }
2981
2982        if socket.spec().socket_type == SocketType::Datagram
2983            && self
2984                .resources
2985                .limits()
2986                .max_socket_datagram_queue_len
2987                .is_some_and(|limit| snapshot.socket_datagram_queue_len >= limit)
2988        {
2989            return false;
2990        }
2991
2992        true
2993    }
2994
2995    fn poll_entry(
2996        &self,
2997        entry: &crate::fd_table::FdEntry,
2998        requested: PollEvents,
2999    ) -> KernelResult<PollEvents> {
3000        if self.pipes.is_pipe(entry.description.id()) {
3001            return Ok(self.pipes.poll(entry.description.id(), requested)?);
3002        }
3003
3004        if self.ptys.is_pty(entry.description.id()) {
3005            return Ok(self.ptys.poll(entry.description.id(), requested)?);
3006        }
3007
3008        let access_mode = entry.description.flags() & 0b11;
3009        let mut events = PollEvents::empty();
3010        if requested.intersects(POLLIN) && access_mode != crate::fd_table::O_WRONLY {
3011            events |= POLLIN;
3012        }
3013        if requested.intersects(POLLOUT) && access_mode != crate::fd_table::O_RDONLY {
3014            events |= POLLOUT;
3015        }
3016        if entry.filetype == FILETYPE_DIRECTORY && requested.intersects(POLLOUT) {
3017            events |= POLLERR;
3018        }
3019        if self.terminated {
3020            events |= POLLHUP;
3021        }
3022        Ok(events)
3023    }
3024
3025    fn description_for_fd(
3026        &self,
3027        requester_driver: &str,
3028        pid: u32,
3029        fd: u32,
3030    ) -> KernelResult<Arc<FileDescription>> {
3031        self.assert_driver_owns(requester_driver, pid)?;
3032        lock_or_recover(&self.fd_tables)
3033            .get(pid)
3034            .and_then(|table| table.get(fd))
3035            .map(|entry| Arc::clone(&entry.description))
3036            .ok_or_else(|| KernelError::bad_file_descriptor(fd))
3037    }
3038
3039    fn assert_not_terminated(&self) -> KernelResult<()> {
3040        if self.terminated {
3041            Err(KernelError::disposed())
3042        } else {
3043            Ok(())
3044        }
3045    }
3046
3047    fn assert_driver_owns(&self, requester_driver: &str, pid: u32) -> KernelResult<()> {
3048        let driver_pids = lock_or_recover(&self.driver_pids);
3049        if driver_pids
3050            .get(requester_driver)
3051            .map(|pids| pids.contains(&pid))
3052            .unwrap_or(false)
3053        {
3054            return Ok(());
3055        }
3056
3057        if driver_pids.values().any(|pids| pids.contains(&pid)) {
3058            return Err(KernelError::permission_denied(format!(
3059                "driver \"{requester_driver}\" does not own PID {pid}"
3060            )));
3061        }
3062
3063        Err(KernelError::no_such_process(pid))
3064    }
3065
3066    fn cleanup_process_resources(&self, pid: u32) {
3067        cleanup_process_resources(
3068            self.fd_tables.as_ref(),
3069            &self.file_locks,
3070            &self.pipes,
3071            &self.ptys,
3072            &self.sockets,
3073            self.driver_pids.as_ref(),
3074            pid,
3075        );
3076    }
3077
3078    fn resolve_spawn_command(
3079        &mut self,
3080        command: &str,
3081        args: &[String],
3082        cwd: &str,
3083    ) -> KernelResult<ResolvedSpawnCommand> {
3084        if let Some(driver) = self.commands.resolve(command).cloned() {
3085            return Ok(ResolvedSpawnCommand {
3086                command: command.to_owned(),
3087                args: args.to_vec(),
3088                driver,
3089            });
3090        }
3091
3092        let Some(path) = self.resolve_executable_path(command, cwd)? else {
3093            return Err(KernelError::command_not_found(command));
3094        };
3095
3096        if let Some(registered_command) = self.resolve_registered_command_path(&path) {
3097            let driver = self
3098                .commands
3099                .resolve(&registered_command)
3100                .cloned()
3101                .ok_or_else(|| KernelError::command_not_found(&registered_command))?;
3102            return Ok(ResolvedSpawnCommand {
3103                command: registered_command,
3104                args: args.to_vec(),
3105                driver,
3106            });
3107        }
3108
3109        let shebang = self
3110            .parse_shebang_command(&path)?
3111            .ok_or_else(|| KernelError::new("ENOEXEC", format!("exec format error: {path}")))?;
3112        self.resolve_shebang_command(&path, args, shebang)
3113    }
3114
3115    fn resolve_executable_path(
3116        &mut self,
3117        command: &str,
3118        cwd: &str,
3119    ) -> KernelResult<Option<String>> {
3120        if !command.contains('/') {
3121            return Ok(None);
3122        }
3123
3124        let path = if command.starts_with('/') {
3125            normalize_path(command)
3126        } else {
3127            normalize_path(&format!("{cwd}/{command}"))
3128        };
3129        let stat = self.filesystem.stat(&path)?;
3130        if stat.is_directory {
3131            return Err(KernelError::new(
3132                "EACCES",
3133                format!("permission denied, execute '{path}'"),
3134            ));
3135        }
3136        if stat.mode & EXECUTABLE_PERMISSION_BITS == 0 {
3137            return Err(KernelError::new(
3138                "EACCES",
3139                format!("permission denied, execute '{path}'"),
3140            ));
3141        }
3142        Ok(Some(path))
3143    }
3144
3145    fn resolve_registered_command_path(&self, path: &str) -> Option<String> {
3146        let normalized = normalize_path(path);
3147        for prefix in ["/bin/", "/usr/bin/", "/usr/local/bin/"] {
3148            let Some(name) = normalized.strip_prefix(prefix) else {
3149                continue;
3150            };
3151            if !name.is_empty() && !name.contains('/') && self.commands.resolve(name).is_some() {
3152                return Some(name.to_owned());
3153            }
3154        }
3155
3156        if let Some(name) = normalized
3157            .strip_prefix("/__secure_exec/commands/")
3158            .and_then(|suffix| suffix.rsplit('/').next())
3159        {
3160            if !name.is_empty() && !name.contains('/') && self.commands.resolve(name).is_some() {
3161                return Some(name.to_owned());
3162            }
3163        }
3164
3165        None
3166    }
3167
3168    fn parse_shebang_command(&mut self, path: &str) -> KernelResult<Option<ShebangCommand>> {
3169        let header = self.filesystem.pread(path, 0, SHEBANG_LINE_MAX_BYTES + 1)?;
3170        if !header.starts_with(b"#!") {
3171            return Ok(None);
3172        }
3173
3174        let line_end = match header.iter().position(|byte| *byte == b'\n') {
3175            Some(index) => index,
3176            None if header.len() <= SHEBANG_LINE_MAX_BYTES => header.len(),
3177            None => {
3178                return Err(KernelError::new(
3179                    "ENOEXEC",
3180                    format!("shebang line exceeds {SHEBANG_LINE_MAX_BYTES} bytes: {path}"),
3181                ))
3182            }
3183        };
3184        let line = header[2..line_end]
3185            .strip_suffix(b"\r")
3186            .unwrap_or(&header[2..line_end]);
3187        let text = std::str::from_utf8(line)
3188            .map_err(|_| KernelError::new("ENOEXEC", format!("invalid shebang line: {path}")))?;
3189        let mut parts = text.split_ascii_whitespace();
3190        let interpreter = parts
3191            .next()
3192            .ok_or_else(|| KernelError::new("ENOEXEC", format!("invalid shebang line: {path}")))?;
3193        Ok(Some(ShebangCommand {
3194            interpreter: interpreter.to_owned(),
3195            args: parts.map(ToOwned::to_owned).collect(),
3196        }))
3197    }
3198
3199    fn resolve_shebang_command(
3200        &self,
3201        path: &str,
3202        args: &[String],
3203        shebang: ShebangCommand,
3204    ) -> KernelResult<ResolvedSpawnCommand> {
3205        let mut interpreter_args = shebang.args;
3206        let interpreter = normalize_path(&shebang.interpreter);
3207        let command = if interpreter == "/usr/bin/env" || interpreter == "/bin/env" {
3208            if interpreter_args.is_empty() {
3209                return Err(KernelError::new(
3210                    "ENOENT",
3211                    format!("missing interpreter after /usr/bin/env in shebang: {path}"),
3212                ));
3213            }
3214            interpreter_args.remove(0)
3215        } else if let Some(command) = self.resolve_registered_command_path(&interpreter) {
3216            command
3217        } else if self.commands.resolve(&shebang.interpreter).is_some() {
3218            shebang.interpreter
3219        } else {
3220            return Err(KernelError::command_not_found(&shebang.interpreter));
3221        };
3222
3223        let driver = self
3224            .commands
3225            .resolve(&command)
3226            .cloned()
3227            .ok_or_else(|| KernelError::command_not_found(&command))?;
3228        let mut resolved_args = interpreter_args;
3229        resolved_args.push(path.to_owned());
3230        resolved_args.extend(args.iter().cloned());
3231        Ok(ResolvedSpawnCommand {
3232            command,
3233            args: resolved_args,
3234            driver,
3235        })
3236    }
3237
3238    fn finish_waitpid_event(&mut self, result: ProcessWaitResult) -> WaitPidEventResult {
3239        if result.event == WaitPidEvent::Exited {
3240            self.cleanup_process_resources(result.pid);
3241        }
3242        WaitPidEventResult {
3243            pid: result.pid,
3244            status: result.status,
3245            event: result.event,
3246        }
3247    }
3248
3249    fn raw_filesystem_mut(&mut self) -> &mut F {
3250        self.filesystem.inner_mut().inner_mut()
3251    }
3252
3253    fn read_file_internal(
3254        &mut self,
3255        current_pid: Option<u32>,
3256        path: &str,
3257    ) -> KernelResult<Vec<u8>> {
3258        if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3259            self.filesystem
3260                .check_virtual_path(FsOperation::Read, path)
3261                .map_err(KernelError::from)?;
3262            return self.proc_read_file(current_pid, &proc_node);
3263        }
3264
3265        Ok(self.filesystem.read_file(path)?)
3266    }
3267
3268    fn exists_internal(&self, current_pid: Option<u32>, path: &str) -> KernelResult<bool> {
3269        match self.resolve_proc_node(path, current_pid) {
3270            Ok(Some(_)) => {
3271                self.filesystem
3272                    .check_virtual_path(FsOperation::Read, path)
3273                    .map_err(KernelError::from)?;
3274                Ok(true)
3275            }
3276            Ok(None) => Ok(self.filesystem.exists(path)?),
3277            Err(error) if error.code() == "ENOENT" => Ok(false),
3278            Err(error) => Err(error),
3279        }
3280    }
3281
3282    fn stat_internal(&mut self, current_pid: Option<u32>, path: &str) -> KernelResult<VirtualStat> {
3283        if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3284            self.filesystem
3285                .check_virtual_path(FsOperation::Read, path)
3286                .map_err(KernelError::from)?;
3287            return self.proc_stat(current_pid, &proc_node);
3288        }
3289
3290        Ok(self.filesystem.stat(path)?)
3291    }
3292
3293    fn lstat_internal(&self, current_pid: Option<u32>, path: &str) -> KernelResult<VirtualStat> {
3294        if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3295            self.filesystem
3296                .check_virtual_path(FsOperation::Read, path)
3297                .map_err(KernelError::from)?;
3298            return self.proc_lstat(&proc_node);
3299        }
3300
3301        Ok(self.filesystem.lstat(path)?)
3302    }
3303
3304    fn read_link_internal(&self, current_pid: Option<u32>, path: &str) -> KernelResult<String> {
3305        if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3306            self.filesystem
3307                .check_virtual_path(FsOperation::Read, path)
3308                .map_err(KernelError::from)?;
3309            return self.proc_read_link(&proc_node);
3310        }
3311
3312        Ok(self.filesystem.read_link(path)?)
3313    }
3314
3315    fn read_dir_internal(
3316        &mut self,
3317        current_pid: Option<u32>,
3318        path: &str,
3319    ) -> KernelResult<Vec<String>> {
3320        if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3321            self.filesystem
3322                .check_virtual_path(FsOperation::Read, path)
3323                .map_err(KernelError::from)?;
3324            return self.proc_read_dir(current_pid, &proc_node);
3325        }
3326
3327        if let Some(limit) = self.resources.max_readdir_entries() {
3328            Ok(self.filesystem.read_dir_limited(path, limit)?)
3329        } else {
3330            Ok(self.filesystem.read_dir(path)?)
3331        }
3332    }
3333
3334    fn realpath_internal(&self, current_pid: Option<u32>, path: &str) -> KernelResult<String> {
3335        if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3336            self.filesystem
3337                .check_virtual_path(FsOperation::Read, path)
3338                .map_err(KernelError::from)?;
3339            return self.proc_realpath(current_pid, &proc_node);
3340        }
3341
3342        Ok(self.filesystem.realpath(path)?)
3343    }
3344
3345    fn resolve_proc_node(
3346        &self,
3347        path: &str,
3348        current_pid: Option<u32>,
3349    ) -> KernelResult<Option<ProcNode>> {
3350        let normalized = normalize_path(path);
3351        if !is_proc_path(&normalized) {
3352            return Ok(None);
3353        }
3354
3355        if normalized == "/proc" {
3356            return Ok(Some(ProcNode::RootDir));
3357        }
3358
3359        let suffix = normalized
3360            .strip_prefix("/proc/")
3361            .expect("proc path should have /proc prefix");
3362        let parts = suffix.split('/').collect::<Vec<_>>();
3363        if parts.is_empty() {
3364            return Ok(Some(ProcNode::RootDir));
3365        }
3366
3367        let root_node = match parts.as_slice() {
3368            ["mounts"] => Some(ProcNode::MountsFile),
3369            ["cpuinfo"] => Some(ProcNode::CpuInfoFile),
3370            ["meminfo"] => Some(ProcNode::MemInfoFile),
3371            ["loadavg"] => Some(ProcNode::LoadAvgFile),
3372            ["uptime"] => Some(ProcNode::UptimeFile),
3373            ["version"] => Some(ProcNode::VersionFile),
3374            _ => None,
3375        };
3376        if let Some(node) = root_node {
3377            return Ok(Some(node));
3378        }
3379
3380        let pid = match parts[0] {
3381            "self" => current_pid.ok_or_else(|| proc_not_found_error(&normalized))?,
3382            raw => raw
3383                .parse::<u32>()
3384                .map_err(|_| proc_not_found_error(&normalized))?,
3385        };
3386        self.proc_entry(pid)?;
3387
3388        let node = match parts.as_slice() {
3389            ["self"] => ProcNode::SelfLink { pid },
3390            [_pid] => ProcNode::PidDir { pid },
3391            [_pid, "fd"] => ProcNode::PidFdDir { pid },
3392            [_pid, "cmdline"] => ProcNode::PidCmdline { pid },
3393            [_pid, "environ"] => ProcNode::PidEnviron { pid },
3394            [_pid, "cwd"] => ProcNode::PidCwdLink { pid },
3395            [_pid, "stat"] => ProcNode::PidStatFile { pid },
3396            [_pid, "status"] => ProcNode::PidStatusFile { pid },
3397            [_pid, "fd", fd] => {
3398                let fd = fd
3399                    .parse::<u32>()
3400                    .map_err(|_| proc_not_found_error(&normalized))?;
3401                self.proc_fd_entry(pid, fd)?;
3402                ProcNode::PidFdLink { pid, fd }
3403            }
3404            _ => return Err(proc_not_found_error(&normalized)),
3405        };
3406
3407        Ok(Some(node))
3408    }
3409
3410    fn proc_entry(&self, pid: u32) -> KernelResult<crate::process_table::ProcessEntry> {
3411        self.processes
3412            .get(pid)
3413            .ok_or_else(|| proc_not_found_error(&format!("/proc/{pid}")))
3414    }
3415
3416    fn proc_fd_entry(&self, pid: u32, fd: u32) -> KernelResult<FdEntry> {
3417        lock_or_recover(&self.fd_tables)
3418            .get(pid)
3419            .and_then(|table| table.get(fd))
3420            .cloned()
3421            .ok_or_else(|| proc_not_found_error(&format!("/proc/{pid}/fd/{fd}")))
3422    }
3423
3424    fn proc_read_file(
3425        &mut self,
3426        current_pid: Option<u32>,
3427        node: &ProcNode,
3428    ) -> KernelResult<Vec<u8>> {
3429        match node {
3430            ProcNode::SelfLink { .. }
3431            | ProcNode::PidCwdLink { .. }
3432            | ProcNode::PidFdLink { .. } => {
3433                let target = self.proc_symlink_target(node)?;
3434                self.read_file_internal(current_pid, &target)
3435            }
3436            ProcNode::MountsFile => Ok(self.proc_mounts_bytes()),
3437            ProcNode::CpuInfoFile => Ok(self.proc_cpuinfo_bytes()),
3438            ProcNode::MemInfoFile => Ok(self.proc_meminfo_bytes()),
3439            ProcNode::LoadAvgFile => Ok(self.proc_loadavg_bytes()),
3440            ProcNode::UptimeFile => Ok(self.proc_uptime_bytes()),
3441            ProcNode::VersionFile => Ok(self.proc_version_bytes()),
3442            ProcNode::PidCmdline { pid } => Ok(self.proc_cmdline_bytes(*pid)),
3443            ProcNode::PidEnviron { pid } => Ok(self.proc_environ_bytes(*pid)),
3444            ProcNode::PidStatFile { pid } => Ok(self.proc_stat_bytes(*pid)),
3445            ProcNode::PidStatusFile { pid } => Ok(self.proc_status_bytes(*pid)),
3446            ProcNode::RootDir | ProcNode::PidDir { .. } | ProcNode::PidFdDir { .. } => {
3447                Err(KernelError::new(
3448                    "EISDIR",
3449                    format!(
3450                        "illegal operation on a directory, read '{}'",
3451                        self.proc_canonical_path(node)
3452                    ),
3453                ))
3454            }
3455        }
3456    }
3457
3458    fn proc_stat(
3459        &mut self,
3460        current_pid: Option<u32>,
3461        node: &ProcNode,
3462    ) -> KernelResult<VirtualStat> {
3463        match node {
3464            ProcNode::SelfLink { .. }
3465            | ProcNode::PidCwdLink { .. }
3466            | ProcNode::PidFdLink { .. } => {
3467                let target = self.proc_symlink_target(node)?;
3468                self.stat_internal(current_pid, &target)
3469            }
3470            _ => self.proc_lstat(node),
3471        }
3472    }
3473
3474    fn proc_lstat(&self, node: &ProcNode) -> KernelResult<VirtualStat> {
3475        match node {
3476            ProcNode::RootDir | ProcNode::PidDir { .. } | ProcNode::PidFdDir { .. } => {
3477                Ok(proc_dir_stat(proc_inode(node)))
3478            }
3479            ProcNode::MountsFile => Ok(proc_file_stat(
3480                proc_inode(node),
3481                self.proc_mounts_bytes().len() as u64,
3482            )),
3483            ProcNode::CpuInfoFile => Ok(proc_file_stat(
3484                proc_inode(node),
3485                self.proc_cpuinfo_bytes().len() as u64,
3486            )),
3487            ProcNode::MemInfoFile => Ok(proc_file_stat(
3488                proc_inode(node),
3489                self.proc_meminfo_bytes().len() as u64,
3490            )),
3491            ProcNode::LoadAvgFile => Ok(proc_file_stat(
3492                proc_inode(node),
3493                self.proc_loadavg_bytes().len() as u64,
3494            )),
3495            ProcNode::UptimeFile => Ok(proc_file_stat(
3496                proc_inode(node),
3497                self.proc_uptime_bytes().len() as u64,
3498            )),
3499            ProcNode::VersionFile => Ok(proc_file_stat(
3500                proc_inode(node),
3501                self.proc_version_bytes().len() as u64,
3502            )),
3503            ProcNode::PidCmdline { pid } => Ok(proc_file_stat(
3504                proc_inode(node),
3505                self.proc_cmdline_bytes(*pid).len() as u64,
3506            )),
3507            ProcNode::PidEnviron { pid } => Ok(proc_file_stat(
3508                proc_inode(node),
3509                self.proc_environ_bytes(*pid).len() as u64,
3510            )),
3511            ProcNode::PidStatFile { pid } => Ok(proc_file_stat(
3512                proc_inode(node),
3513                self.proc_stat_bytes(*pid).len() as u64,
3514            )),
3515            ProcNode::PidStatusFile { pid } => Ok(proc_file_stat(
3516                proc_inode(node),
3517                self.proc_status_bytes(*pid).len() as u64,
3518            )),
3519            ProcNode::SelfLink { .. }
3520            | ProcNode::PidCwdLink { .. }
3521            | ProcNode::PidFdLink { .. } => Ok(proc_symlink_stat(
3522                proc_inode(node),
3523                self.proc_read_link(node)?.len() as u64,
3524            )),
3525        }
3526    }
3527
3528    fn proc_read_link(&self, node: &ProcNode) -> KernelResult<String> {
3529        match node {
3530            ProcNode::SelfLink { .. }
3531            | ProcNode::PidCwdLink { .. }
3532            | ProcNode::PidFdLink { .. } => self.proc_symlink_target(node),
3533            _ => Err(KernelError::new(
3534                "EINVAL",
3535                format!(
3536                    "invalid argument, readlink '{}'",
3537                    self.proc_canonical_path(node)
3538                ),
3539            )),
3540        }
3541    }
3542
3543    fn proc_read_dir(
3544        &mut self,
3545        current_pid: Option<u32>,
3546        node: &ProcNode,
3547    ) -> KernelResult<Vec<String>> {
3548        match node {
3549            ProcNode::SelfLink { .. }
3550            | ProcNode::PidCwdLink { .. }
3551            | ProcNode::PidFdLink { .. } => {
3552                let target = self.proc_symlink_target(node)?;
3553                self.read_dir_internal(current_pid, &target)
3554            }
3555            ProcNode::RootDir => {
3556                let mut entries = self
3557                    .processes
3558                    .list_processes()
3559                    .keys()
3560                    .map(|pid| pid.to_string())
3561                    .collect::<Vec<_>>();
3562                entries.push(String::from("cpuinfo"));
3563                entries.push(String::from("loadavg"));
3564                entries.push(String::from("meminfo"));
3565                entries.push(String::from("mounts"));
3566                entries.push(String::from("self"));
3567                entries.push(String::from("uptime"));
3568                entries.push(String::from("version"));
3569                entries.sort();
3570                Ok(entries)
3571            }
3572            ProcNode::PidDir { .. } => Ok(vec![
3573                String::from("cmdline"),
3574                String::from("cwd"),
3575                String::from("environ"),
3576                String::from("fd"),
3577                String::from("stat"),
3578                String::from("status"),
3579            ]),
3580            ProcNode::PidFdDir { pid } => {
3581                let tables = lock_or_recover(&self.fd_tables);
3582                let table = tables
3583                    .get(*pid)
3584                    .ok_or_else(|| proc_not_found_error(&format!("/proc/{pid}/fd")))?;
3585                Ok(table.iter().map(|entry| entry.fd.to_string()).collect())
3586            }
3587            _ => Err(KernelError::new(
3588                "ENOTDIR",
3589                format!(
3590                    "not a directory, scandir '{}'",
3591                    self.proc_canonical_path(node)
3592                ),
3593            )),
3594        }
3595    }
3596
3597    fn proc_realpath(&self, current_pid: Option<u32>, node: &ProcNode) -> KernelResult<String> {
3598        match node {
3599            ProcNode::SelfLink { .. }
3600            | ProcNode::PidCwdLink { .. }
3601            | ProcNode::PidFdLink { .. } => {
3602                let target = self.proc_symlink_target(node)?;
3603                self.realpath_internal(current_pid, &target)
3604            }
3605            _ => Ok(self.proc_canonical_path(node)),
3606        }
3607    }
3608
3609    fn proc_symlink_target(&self, node: &ProcNode) -> KernelResult<String> {
3610        match node {
3611            ProcNode::SelfLink { pid } => Ok(format!("/proc/{pid}")),
3612            ProcNode::PidCwdLink { pid } => Ok(self.proc_entry(*pid)?.cwd),
3613            ProcNode::PidFdLink { pid, fd } => {
3614                Ok(self.proc_fd_entry(*pid, *fd)?.description.path().to_owned())
3615            }
3616            _ => Err(KernelError::new(
3617                "EINVAL",
3618                format!(
3619                    "'{}' is not a symbolic link",
3620                    self.proc_canonical_path(node)
3621                ),
3622            )),
3623        }
3624    }
3625
3626    fn proc_canonical_path(&self, node: &ProcNode) -> String {
3627        match node {
3628            ProcNode::RootDir => String::from("/proc"),
3629            ProcNode::MountsFile => String::from("/proc/mounts"),
3630            ProcNode::CpuInfoFile => String::from("/proc/cpuinfo"),
3631            ProcNode::MemInfoFile => String::from("/proc/meminfo"),
3632            ProcNode::LoadAvgFile => String::from("/proc/loadavg"),
3633            ProcNode::UptimeFile => String::from("/proc/uptime"),
3634            ProcNode::VersionFile => String::from("/proc/version"),
3635            ProcNode::SelfLink { pid } => format!("/proc/{pid}"),
3636            ProcNode::PidDir { pid } => format!("/proc/{pid}"),
3637            ProcNode::PidFdDir { pid } => format!("/proc/{pid}/fd"),
3638            ProcNode::PidCmdline { pid } => format!("/proc/{pid}/cmdline"),
3639            ProcNode::PidEnviron { pid } => format!("/proc/{pid}/environ"),
3640            ProcNode::PidCwdLink { pid } => format!("/proc/{pid}/cwd"),
3641            ProcNode::PidStatFile { pid } => format!("/proc/{pid}/stat"),
3642            ProcNode::PidStatusFile { pid } => format!("/proc/{pid}/status"),
3643            ProcNode::PidFdLink { pid, fd } => format!("/proc/{pid}/fd/{fd}"),
3644        }
3645    }
3646
3647    fn proc_cmdline_bytes(&self, pid: u32) -> Vec<u8> {
3648        let entry = self
3649            .processes
3650            .get(pid)
3651            .expect("process must exist while procfs path is resolved");
3652        let mut argv = vec![entry.command];
3653        argv.extend(entry.args);
3654        null_separated_bytes(argv)
3655    }
3656
3657    fn proc_environ_bytes(&self, pid: u32) -> Vec<u8> {
3658        let entry = self
3659            .processes
3660            .get(pid)
3661            .expect("process must exist while procfs path is resolved");
3662        null_separated_bytes(
3663            entry
3664                .env
3665                .into_iter()
3666                .map(|(key, value)| format!("{key}={value}"))
3667                .collect(),
3668        )
3669    }
3670
3671    fn proc_stat_bytes(&self, pid: u32) -> Vec<u8> {
3672        let entry = self
3673            .processes
3674            .get(pid)
3675            .expect("process must exist while procfs path is resolved");
3676        let command = entry.command.replace(')', "]");
3677        let state = match entry.status {
3678            ProcessStatus::Running => 'R',
3679            ProcessStatus::Stopped => 'T',
3680            ProcessStatus::Exited => 'Z',
3681        };
3682        format!(
3683            "{pid} ({command}) {state} {ppid} {pgid} {sid} 0 0 0 0 0 0 0 0 0 0 20 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
3684            ppid = entry.ppid,
3685            pgid = entry.pgid,
3686            sid = entry.sid,
3687        )
3688        .into_bytes()
3689    }
3690
3691    fn proc_mounts_bytes(&self) -> Vec<u8> {
3692        let mounts = if let Some(table) =
3693            (self.filesystem.inner().inner() as &dyn Any).downcast_ref::<MountTable>()
3694        {
3695            table.get_mounts()
3696        } else {
3697            vec![MountEntry {
3698                path: String::from("/"),
3699                plugin_id: String::from("root"),
3700                read_only: false,
3701            }]
3702        };
3703
3704        mounts
3705            .into_iter()
3706            .map(|mount| {
3707                let options = if mount.read_only { "ro" } else { "rw" };
3708                format!(
3709                    "{source} {target} {fstype} {options} 0 0\n",
3710                    source = mount.plugin_id,
3711                    target = mount.path,
3712                    fstype = mount.plugin_id,
3713                )
3714            })
3715            .collect::<String>()
3716            .into_bytes()
3717    }
3718
3719    fn proc_cpu_count(&self) -> usize {
3720        self.resource_limits().virtual_cpu_count.unwrap_or(1)
3721    }
3722
3723    fn proc_cpuinfo_bytes(&self) -> Vec<u8> {
3724        let mut body = String::new();
3725        for processor in 0..self.proc_cpu_count() {
3726            body.push_str(&format!(
3727                "processor\t: {processor}\nmodel name\t: secure-exec Virtual CPU\ncpu MHz\t\t: 1000.000\nsiblings\t: 1\ncpu cores\t: 1\n\n"
3728            ));
3729        }
3730        body.into_bytes()
3731    }
3732
3733    fn proc_mem_total_bytes(&self) -> u64 {
3734        self.resource_limits()
3735            .max_wasm_memory_bytes
3736            .or(self.resource_limits().max_filesystem_bytes)
3737            .unwrap_or(DEFAULT_MAX_OPEN_FDS as u64 * 1024 * 1024)
3738    }
3739
3740    fn proc_meminfo_bytes(&self) -> Vec<u8> {
3741        let total_kb = self.proc_mem_total_bytes().div_ceil(1024);
3742        let zero_kb = 0;
3743        format!(
3744            "MemTotal:{total_kb:>8} kB\nMemFree:{total_kb:>9} kB\nMemAvailable:{total_kb:>4} kB\nBuffers:{zero_kb:>9} kB\nCached:{zero_kb:>10} kB\n"
3745        )
3746        .into_bytes()
3747    }
3748
3749    fn proc_loadavg_bytes(&self) -> Vec<u8> {
3750        let processes = self.processes.list_processes();
3751        let running = processes
3752            .values()
3753            .filter(|process| process.status == ProcessStatus::Running)
3754            .count();
3755        let total = processes.len().max(1);
3756        let last_pid = processes.keys().next_back().copied().unwrap_or(0);
3757        format!("0.00 0.00 0.00 {running}/{total} {last_pid}\n").into_bytes()
3758    }
3759
3760    fn proc_uptime_bytes(&self) -> Vec<u8> {
3761        let uptime = self.boot_instant.elapsed().as_secs_f64();
3762        format!("{uptime:.2} {uptime:.2}\n").into_bytes()
3763    }
3764
3765    fn proc_version_bytes(&self) -> Vec<u8> {
3766        format!(
3767            "Linux version 6.8.0-agentos (agentos@localhost) #1 SMP boot={}\n",
3768            self.boot_time_ms
3769        )
3770        .into_bytes()
3771    }
3772
3773    fn proc_status_bytes(&self, pid: u32) -> Vec<u8> {
3774        let entry = self
3775            .processes
3776            .get(pid)
3777            .expect("process must exist while procfs path is resolved");
3778        let (state_code, state_name) = match entry.status {
3779            ProcessStatus::Running => ('R', "running"),
3780            ProcessStatus::Stopped => ('T', "stopped"),
3781            ProcessStatus::Exited => ('Z', "zombie"),
3782        };
3783        format!(
3784            "Name:\t{name}\nState:\t{state_code} ({state_name})\nPid:\t{pid}\nPPid:\t{ppid}\nUid:\t{uid}\t{euid}\t{euid}\t{euid}\nGid:\t{gid}\t{egid}\t{egid}\t{egid}\nVmSize:\t{:>8} kB\nVmRSS:\t{:>9} kB\nThreads:\t1\n",
3785            0,
3786            0,
3787            name = entry.command,
3788            ppid = entry.ppid,
3789            uid = entry.identity.uid,
3790            euid = entry.identity.euid,
3791            gid = entry.identity.gid,
3792            egid = entry.identity.egid,
3793        )
3794        .into_bytes()
3795    }
3796
3797    fn proc_read_file_from_open_path(
3798        &mut self,
3799        current_pid: Option<u32>,
3800        path: &str,
3801    ) -> KernelResult<Vec<u8>> {
3802        let node = self
3803            .resolve_proc_node(path, current_pid)?
3804            .ok_or_else(|| proc_not_found_error(path))?;
3805        self.proc_read_file(current_pid, &node)
3806    }
3807
3808    fn proc_stat_from_open_path(
3809        &mut self,
3810        current_pid: Option<u32>,
3811        path: &str,
3812    ) -> KernelResult<VirtualStat> {
3813        let node = self
3814            .resolve_proc_node(path, current_pid)?
3815            .ok_or_else(|| proc_not_found_error(path))?;
3816        self.proc_stat(current_pid, &node)
3817    }
3818
3819    fn filesystem_usage(&mut self) -> KernelResult<FileSystemUsage> {
3820        let filesystem = self.raw_filesystem_mut();
3821        let filesystem_any = filesystem as &mut dyn Any;
3822        if let Some(mount_table) = filesystem_any.downcast_mut::<MountTable>() {
3823            return Ok(mount_table.root_usage()?);
3824        }
3825        Ok(measure_filesystem_usage(filesystem)?)
3826    }
3827
3828    fn storage_stat(&mut self, path: &str) -> KernelResult<Option<VirtualStat>> {
3829        if is_virtual_device_storage_path(path) {
3830            return Ok(None);
3831        }
3832
3833        match self.raw_filesystem_mut().stat(path) {
3834            Ok(stat) => Ok(Some(stat)),
3835            Err(error) if error.code() == "ENOENT" => Ok(None),
3836            Err(error) => Err(error.into()),
3837        }
3838    }
3839
3840    fn storage_lstat(&mut self, path: &str) -> KernelResult<Option<VirtualStat>> {
3841        if is_virtual_device_storage_path(path) {
3842            return Ok(None);
3843        }
3844
3845        match self.raw_filesystem_mut().lstat(path) {
3846            Ok(stat) => Ok(Some(stat)),
3847            Err(error) if error.code() == "ENOENT" => Ok(None),
3848            Err(error) => Err(error.into()),
3849        }
3850    }
3851
3852    fn current_storage_file_size(&mut self, path: &str) -> KernelResult<u64> {
3853        Ok(self
3854            .storage_stat(path)?
3855            .filter(|stat| !stat.is_directory)
3856            .map(|stat| stat.size)
3857            .unwrap_or(0))
3858    }
3859
3860    fn apply_creation_mode(&mut self, path: &str, mode: u32, umask: u32) -> KernelResult<()> {
3861        let masked_mode = (mode & !0o777) | ((mode & 0o777) & !(umask & 0o777));
3862        Ok(self.filesystem.chmod(path, masked_mode)?)
3863    }
3864
3865    fn missing_directory_paths(
3866        &mut self,
3867        path: &str,
3868        recursive: bool,
3869    ) -> KernelResult<Vec<String>> {
3870        let normalized = normalize_path(path);
3871        if normalized == "/" {
3872            return Ok(Vec::new());
3873        }
3874
3875        if !recursive {
3876            return Ok(if self.storage_lstat(&normalized)?.is_none() {
3877                vec![normalized]
3878            } else {
3879                Vec::new()
3880            });
3881        }
3882
3883        let mut created = Vec::new();
3884        let mut current = String::from("/");
3885        for component in normalized
3886            .split('/')
3887            .filter(|component| !component.is_empty())
3888        {
3889            current = if current == "/" {
3890                format!("/{component}")
3891            } else {
3892                format!("{current}/{component}")
3893            };
3894            if self.storage_lstat(&current)?.is_none() {
3895                created.push(current.clone());
3896            }
3897        }
3898        Ok(created)
3899    }
3900
3901    fn check_write_file_limits(&mut self, path: &str, new_size: u64) -> KernelResult<()> {
3902        if is_virtual_device_storage_path(path) {
3903            return Ok(());
3904        }
3905
3906        let usage = self.filesystem_usage()?;
3907        if let Some(existing) = self.storage_stat(path)? {
3908            if existing.is_directory {
3909                return Ok(());
3910            }
3911
3912            self.resources.check_filesystem_usage(
3913                &usage,
3914                usage
3915                    .total_bytes
3916                    .saturating_sub(existing.size)
3917                    .saturating_add(new_size),
3918                usage.inode_count,
3919            )?;
3920            return Ok(());
3921        }
3922
3923        let new_inodes =
3924            count_missing_directory_components(self.raw_filesystem_mut(), path, false)?
3925                .saturating_add(1);
3926        self.resources.check_filesystem_usage(
3927            &usage,
3928            usage.total_bytes.saturating_add(new_size),
3929            usage.inode_count.saturating_add(new_inodes),
3930        )?;
3931        Ok(())
3932    }
3933
3934    fn check_create_dir_limits(&mut self, path: &str) -> KernelResult<()> {
3935        if is_virtual_device_storage_path(path) || self.storage_lstat(path)?.is_some() {
3936            return Ok(());
3937        }
3938
3939        let parent = parent_path(path);
3940        let Some(parent_stat) = self.storage_stat(&parent)? else {
3941            return Ok(());
3942        };
3943        if !parent_stat.is_directory {
3944            return Ok(());
3945        }
3946
3947        let usage = self.filesystem_usage()?;
3948        self.resources.check_filesystem_usage(
3949            &usage,
3950            usage.total_bytes,
3951            usage.inode_count.saturating_add(1),
3952        )?;
3953        Ok(())
3954    }
3955
3956    fn check_mkdir_limits(&mut self, path: &str, recursive: bool) -> KernelResult<()> {
3957        if is_virtual_device_storage_path(path) {
3958            return Ok(());
3959        }
3960
3961        if !recursive {
3962            return self.check_create_dir_limits(path);
3963        }
3964
3965        let usage = self.filesystem_usage()?;
3966        let new_inodes = count_missing_directory_components(self.raw_filesystem_mut(), path, true)?;
3967        self.resources.check_filesystem_usage(
3968            &usage,
3969            usage.total_bytes,
3970            usage.inode_count.saturating_add(new_inodes),
3971        )?;
3972        Ok(())
3973    }
3974
3975    fn check_symlink_limits(&mut self, target: &str, link_path: &str) -> KernelResult<()> {
3976        if is_virtual_device_storage_path(link_path) || self.storage_lstat(link_path)?.is_some() {
3977            return Ok(());
3978        }
3979
3980        let parent = parent_path(link_path);
3981        let Some(parent_stat) = self.storage_stat(&parent)? else {
3982            return Ok(());
3983        };
3984        if !parent_stat.is_directory {
3985            return Ok(());
3986        }
3987
3988        let usage = self.filesystem_usage()?;
3989        self.resources.check_filesystem_usage(
3990            &usage,
3991            usage.total_bytes.saturating_add(target.len() as u64),
3992            usage.inode_count.saturating_add(1),
3993        )?;
3994        Ok(())
3995    }
3996
3997    fn check_truncate_limits(&mut self, path: &str, length: u64) -> KernelResult<()> {
3998        self.check_path_resize_limits(path, length)
3999    }
4000
4001    fn check_rename_copy_up_limits(&mut self, old_path: &str, new_path: &str) -> KernelResult<()> {
4002        let max_bytes = self.resource_limits().max_filesystem_bytes;
4003        let max_inodes = self.resource_limits().max_inode_count;
4004        let filesystem_any = self.raw_filesystem_mut() as &mut dyn Any;
4005
4006        if let Some(root) = filesystem_any.downcast_mut::<RootFileSystem>() {
4007            root.check_rename_copy_up_limits(old_path, new_path, max_bytes, max_inodes)?;
4008            return Ok(());
4009        }
4010
4011        if let Some(mount_table) = filesystem_any.downcast_mut::<MountTable>() {
4012            mount_table.check_rename_copy_up_limits(old_path, new_path, max_bytes, max_inodes)?;
4013        }
4014
4015        Ok(())
4016    }
4017
4018    fn check_path_resize_limits(&mut self, path: &str, new_size: u64) -> KernelResult<()> {
4019        if is_virtual_device_storage_path(path) {
4020            return Ok(());
4021        }
4022
4023        let Some(existing) = self.storage_stat(path)? else {
4024            return Ok(());
4025        };
4026        if existing.is_directory {
4027            return Ok(());
4028        }
4029
4030        let usage = self.filesystem_usage()?;
4031        self.resources.check_filesystem_usage(
4032            &usage,
4033            usage
4034                .total_bytes
4035                .saturating_sub(existing.size)
4036                .saturating_add(new_size),
4037            usage.inode_count,
4038        )?;
4039        Ok(())
4040    }
4041
4042    fn blocking_read_timeout(&self) -> Option<Duration> {
4043        self.resources
4044            .limits()
4045            .max_blocking_read_ms
4046            .map(Duration::from_millis)
4047    }
4048
4049    fn close_special_resource_if_needed(&self, description: &Arc<FileDescription>, filetype: u8) {
4050        close_special_resource_if_needed(
4051            &self.file_locks,
4052            &self.pipes,
4053            &self.ptys,
4054            description,
4055            filetype,
4056        );
4057    }
4058}
4059
4060impl KernelVm<MountTable> {
4061    fn check_mount_permissions(&self, path: &str) -> KernelResult<()> {
4062        self.filesystem
4063            .check_path(FsOperation::Write, path)
4064            .map_err(KernelError::from)?;
4065        if is_sensitive_mount_path(path) {
4066            self.filesystem
4067                .check_path(FsOperation::MountSensitive, path)
4068                .map_err(KernelError::from)?;
4069        }
4070        Ok(())
4071    }
4072
4073    pub fn mount_filesystem(
4074        &mut self,
4075        path: &str,
4076        filesystem: impl VirtualFileSystem + 'static,
4077        options: MountOptions,
4078    ) -> KernelResult<()> {
4079        self.assert_not_terminated()?;
4080        self.check_mount_permissions(path)?;
4081        self.filesystem
4082            .inner_mut()
4083            .inner_mut()
4084            .mount(path, filesystem, options)
4085            .map_err(KernelError::from)
4086    }
4087
4088    pub fn mount_boxed_filesystem(
4089        &mut self,
4090        path: &str,
4091        filesystem: Box<dyn MountedFileSystem>,
4092        options: MountOptions,
4093    ) -> KernelResult<()> {
4094        self.assert_not_terminated()?;
4095        self.check_mount_permissions(path)?;
4096        self.filesystem
4097            .inner_mut()
4098            .inner_mut()
4099            .mount_boxed(path, filesystem, options)
4100            .map_err(KernelError::from)
4101    }
4102
4103    pub fn unmount_filesystem(&mut self, path: &str) -> KernelResult<()> {
4104        self.assert_not_terminated()?;
4105        self.check_mount_permissions(path)?;
4106        self.filesystem
4107            .inner_mut()
4108            .inner_mut()
4109            .unmount(path)
4110            .map_err(KernelError::from)
4111    }
4112
4113    pub fn mounted_filesystems(&self) -> Vec<MountEntry> {
4114        self.filesystem.inner().inner().get_mounts()
4115    }
4116
4117    pub fn root_filesystem_mut(&mut self) -> Option<&mut RootFileSystem> {
4118        self.filesystem
4119            .inner_mut()
4120            .inner_mut()
4121            .root_virtual_filesystem_mut::<RootFileSystem>()
4122    }
4123
4124    pub fn snapshot_root_filesystem(&mut self) -> KernelResult<RootFilesystemSnapshot> {
4125        let usage = self.filesystem_usage()?;
4126        self.resources
4127            .check_filesystem_usage(&usage, usage.total_bytes, usage.inode_count)?;
4128        let root = self
4129            .root_filesystem_mut()
4130            .ok_or_else(|| KernelError::new("EINVAL", "native root filesystem is not available"))?;
4131        root.snapshot().map_err(KernelError::from)
4132    }
4133}
4134
4135#[derive(Default)]
4136struct StubDriverState {
4137    exit_code: Option<i32>,
4138    on_exit: Option<ProcessExitCallback>,
4139    kill_signals: Vec<i32>,
4140}
4141
4142#[derive(Default)]
4143struct StubDriverProcess {
4144    state: Mutex<StubDriverState>,
4145    waiters: Condvar,
4146}
4147
4148impl StubDriverProcess {
4149    fn finish(&self, exit_code: i32) {
4150        let callback = {
4151            let mut state = lock_or_recover(&self.state);
4152            if state.exit_code.is_some() {
4153                return;
4154            }
4155            state.exit_code = Some(exit_code);
4156            self.waiters.notify_all();
4157            state.on_exit.clone()
4158        };
4159
4160        if let Some(callback) = callback {
4161            callback(exit_code);
4162        }
4163    }
4164
4165    fn kill_signals(&self) -> Vec<i32> {
4166        lock_or_recover(&self.state).kill_signals.clone()
4167    }
4168}
4169
4170impl DriverProcess for StubDriverProcess {
4171    fn kill(&self, signal: i32) {
4172        {
4173            let mut state = lock_or_recover(&self.state);
4174            state.kill_signals.push(signal);
4175        }
4176        if matches!(
4177            signal,
4178            crate::process_table::SIGCHLD | SIGCONT | SIGSTOP | SIGTSTP | SIGWINCH
4179        ) {
4180            return;
4181        }
4182        self.finish(128 + signal);
4183    }
4184
4185    fn wait(&self, timeout: Duration) -> Option<i32> {
4186        let state = lock_or_recover(&self.state);
4187        if let Some(code) = state.exit_code {
4188            return Some(code);
4189        }
4190
4191        let (state, _) = wait_timeout_or_recover(&self.waiters, state, timeout);
4192        state.exit_code
4193    }
4194
4195    fn set_on_exit(&self, callback: ProcessExitCallback) {
4196        let maybe_exit = {
4197            let mut state = lock_or_recover(&self.state);
4198            state.on_exit = Some(callback.clone());
4199            state.exit_code
4200        };
4201
4202        if let Some(code) = maybe_exit {
4203            callback(code);
4204        }
4205    }
4206}
4207
4208impl From<VfsError> for KernelError {
4209    fn from(error: VfsError) -> Self {
4210        map_error(error.code(), error.to_string())
4211    }
4212}
4213
4214fn lock_or_recover<'a, T>(mutex: &'a Mutex<T>) -> MutexGuard<'a, T> {
4215    match mutex.lock() {
4216        Ok(guard) => guard,
4217        Err(poisoned) => poisoned.into_inner(),
4218    }
4219}
4220
4221fn wait_timeout_or_recover<'a, T>(
4222    condvar: &Condvar,
4223    guard: MutexGuard<'a, T>,
4224    timeout: Duration,
4225) -> (MutexGuard<'a, T>, WaitTimeoutResult) {
4226    match condvar.wait_timeout(guard, timeout) {
4227        Ok(result) => result,
4228        Err(poisoned) => poisoned.into_inner(),
4229    }
4230}
4231
4232fn is_sensitive_mount_path(path: &str) -> bool {
4233    let normalized = crate::vfs::normalize_path(path);
4234    normalized == "/"
4235        || normalized == "/etc"
4236        || normalized.starts_with("/etc/")
4237        || normalized == "/proc"
4238        || normalized.starts_with("/proc/")
4239}
4240
4241impl From<FdTableError> for KernelError {
4242    fn from(error: FdTableError) -> Self {
4243        map_error(error.code(), error.to_string())
4244    }
4245}
4246
4247impl From<PipeError> for KernelError {
4248    fn from(error: PipeError) -> Self {
4249        map_error(error.code(), error.to_string())
4250    }
4251}
4252
4253impl From<PtyError> for KernelError {
4254    fn from(error: PtyError) -> Self {
4255        map_error(error.code(), error.to_string())
4256    }
4257}
4258
4259impl From<ProcessTableError> for KernelError {
4260    fn from(error: ProcessTableError) -> Self {
4261        map_error(error.code(), error.to_string())
4262    }
4263}
4264
4265impl From<PermissionError> for KernelError {
4266    fn from(error: PermissionError) -> Self {
4267        map_error(error.code(), error.to_string())
4268    }
4269}
4270
4271impl From<ResourceError> for KernelError {
4272    fn from(error: ResourceError) -> Self {
4273        map_error(error.code(), error.to_string())
4274    }
4275}
4276
4277impl From<SocketTableError> for KernelError {
4278    fn from(error: SocketTableError) -> Self {
4279        map_error(error.code(), error.to_string())
4280    }
4281}
4282
4283impl From<RootFilesystemError> for KernelError {
4284    fn from(error: RootFilesystemError) -> Self {
4285        map_error("EINVAL", error.to_string())
4286    }
4287}
4288
4289fn map_dns_resolver_error(error: crate::dns::DnsResolverError) -> KernelError {
4290    let code = match error.kind() {
4291        DnsResolverErrorKind::InvalidInput => "EINVAL",
4292        DnsResolverErrorKind::LookupFailed => "EHOSTUNREACH",
4293    };
4294    map_error(code, error.to_string())
4295}
4296
4297fn map_error(code: &'static str, message: String) -> KernelError {
4298    let trimmed = strip_error_prefix(code, &message)
4299        .map(ToOwned::to_owned)
4300        .unwrap_or(message);
4301    KernelError::new(code, trimmed)
4302}
4303
4304fn strip_error_prefix<'a>(code: &str, message: &'a str) -> Option<&'a str> {
4305    let prefix = format!("{code}: ");
4306    message.strip_prefix(&prefix)
4307}
4308
4309fn parse_dev_fd_path(path: &str) -> KernelResult<Option<u32>> {
4310    let Some(raw_fd) = path.strip_prefix("/dev/fd/") else {
4311        return Ok(None);
4312    };
4313    if raw_fd.is_empty() {
4314        return Err(KernelError::new(
4315            "EBADF",
4316            format!("bad file descriptor: {path}"),
4317        ));
4318    }
4319    let fd = raw_fd
4320        .parse::<u32>()
4321        .map_err(|_| KernelError::new("EBADF", format!("bad file descriptor: {path}")))?;
4322    Ok(Some(fd))
4323}
4324
4325fn count_missing_directory_components<F: VirtualFileSystem>(
4326    filesystem: &mut F,
4327    path: &str,
4328    include_final: bool,
4329) -> VfsResult<usize> {
4330    let normalized = normalize_path(path);
4331    let parts = normalized
4332        .split('/')
4333        .filter(|part| !part.is_empty())
4334        .collect::<Vec<_>>();
4335    let limit = if include_final {
4336        parts.len()
4337    } else {
4338        parts.len().saturating_sub(1)
4339    };
4340
4341    let mut current = String::from("/");
4342    for (index, part) in parts.iter().take(limit).enumerate() {
4343        let candidate = if current == "/" {
4344            format!("/{}", part)
4345        } else {
4346            format!("{current}/{}", part)
4347        };
4348
4349        match filesystem.stat(&candidate) {
4350            Ok(stat) => {
4351                if !stat.is_directory {
4352                    return Err(VfsError::new(
4353                        "ENOTDIR",
4354                        format!("not a directory, mkdir '{candidate}'"),
4355                    ));
4356                }
4357                current = candidate;
4358            }
4359            Err(error) if error.code() == "ENOENT" => {
4360                return Ok(limit.saturating_sub(index));
4361            }
4362            Err(error) => return Err(error),
4363        }
4364    }
4365
4366    Ok(0)
4367}
4368
4369fn parent_path(path: &str) -> String {
4370    let normalized = normalize_path(path);
4371    let Some((head, _)) = normalized.rsplit_once('/') else {
4372        return String::from("/");
4373    };
4374
4375    if head.is_empty() {
4376        String::from("/")
4377    } else {
4378        String::from(head)
4379    }
4380}
4381
4382fn join_absolute_path(parent: &str, child: &str) -> String {
4383    if parent == "/" {
4384        format!("/{child}")
4385    } else {
4386        format!("{parent}/{child}")
4387    }
4388}
4389
4390fn is_virtual_device_storage_path(path: &str) -> bool {
4391    matches!(
4392        path,
4393        "/dev/null" | "/dev/zero" | "/dev/stdin" | "/dev/stdout" | "/dev/stderr" | "/dev/urandom"
4394    ) || path == "/dev"
4395        || path == "/dev/fd"
4396        || path == "/dev/pts"
4397        || path.starts_with("/dev/fd/")
4398        || path.starts_with("/dev/pts/")
4399}
4400
4401fn is_proc_path(path: &str) -> bool {
4402    let normalized = normalize_path(path);
4403    normalized == "/proc" || normalized.starts_with("/proc/")
4404}
4405
4406fn is_agentos_path(path: &str) -> bool {
4407    let normalized = normalize_path(path);
4408    normalized == "/etc/agentos" || normalized.starts_with("/etc/agentos/")
4409}
4410
4411fn open_requires_write_access(flags: u32) -> bool {
4412    flags & (O_CREAT | O_EXCL | O_TRUNC) != 0 || (flags & 0b11) != crate::fd_table::O_RDONLY
4413}
4414
4415fn checked_write_end(offset: u64, len: usize) -> KernelResult<u64> {
4416    offset
4417        .checked_add(len as u64)
4418        .ok_or_else(|| KernelError::new("EINVAL", "write offset out of range"))
4419}
4420
4421fn filetype_for_path(path: &str, stat: &VirtualStat) -> u8 {
4422    if stat.is_directory {
4423        FILETYPE_DIRECTORY
4424    } else if path.starts_with("/dev/") {
4425        FILETYPE_CHARACTER_DEVICE
4426    } else if stat.is_symbolic_link {
4427        FILETYPE_SYMBOLIC_LINK
4428    } else {
4429        FILETYPE_REGULAR_FILE
4430    }
4431}
4432
4433fn synthetic_character_device_stat(ino: u64) -> VirtualStat {
4434    let now = now_ms();
4435    VirtualStat {
4436        mode: 0o666,
4437        size: 0,
4438        blocks: 0,
4439        dev: 2,
4440        rdev: 0,
4441        is_directory: false,
4442        is_symbolic_link: false,
4443        atime_ms: now,
4444        atime_nsec: 0,
4445        mtime_ms: now,
4446        mtime_nsec: 0,
4447        ctime_ms: now,
4448        ctime_nsec: 0,
4449        birthtime_ms: now,
4450        ino,
4451        nlink: 1,
4452        uid: 0,
4453        gid: 0,
4454    }
4455}
4456
4457fn proc_dir_stat(ino: u64) -> VirtualStat {
4458    let now = now_ms();
4459    VirtualStat {
4460        mode: 0o555,
4461        size: 0,
4462        blocks: 0,
4463        dev: 3,
4464        rdev: 0,
4465        is_directory: true,
4466        is_symbolic_link: false,
4467        atime_ms: now,
4468        atime_nsec: 0,
4469        mtime_ms: now,
4470        mtime_nsec: 0,
4471        ctime_ms: now,
4472        ctime_nsec: 0,
4473        birthtime_ms: now,
4474        ino,
4475        nlink: 2,
4476        uid: 0,
4477        gid: 0,
4478    }
4479}
4480
4481fn proc_file_stat(ino: u64, size: u64) -> VirtualStat {
4482    let now = now_ms();
4483    VirtualStat {
4484        mode: 0o444,
4485        size,
4486        blocks: if size == 0 { 0 } else { size.div_ceil(512) },
4487        dev: 3,
4488        rdev: 0,
4489        is_directory: false,
4490        is_symbolic_link: false,
4491        atime_ms: now,
4492        atime_nsec: 0,
4493        mtime_ms: now,
4494        mtime_nsec: 0,
4495        ctime_ms: now,
4496        ctime_nsec: 0,
4497        birthtime_ms: now,
4498        ino,
4499        nlink: 1,
4500        uid: 0,
4501        gid: 0,
4502    }
4503}
4504
4505fn proc_symlink_stat(ino: u64, size: u64) -> VirtualStat {
4506    let now = now_ms();
4507    VirtualStat {
4508        mode: 0o777,
4509        size,
4510        blocks: if size == 0 { 0 } else { size.div_ceil(512) },
4511        dev: 3,
4512        rdev: 0,
4513        is_directory: false,
4514        is_symbolic_link: true,
4515        atime_ms: now,
4516        atime_nsec: 0,
4517        mtime_ms: now,
4518        mtime_nsec: 0,
4519        ctime_ms: now,
4520        ctime_nsec: 0,
4521        birthtime_ms: now,
4522        ino,
4523        nlink: 1,
4524        uid: 0,
4525        gid: 0,
4526    }
4527}
4528
4529fn proc_filetype(node: &ProcNode) -> u8 {
4530    match node {
4531        ProcNode::RootDir | ProcNode::PidDir { .. } | ProcNode::PidFdDir { .. } => {
4532            FILETYPE_DIRECTORY
4533        }
4534        ProcNode::SelfLink { .. } | ProcNode::PidCwdLink { .. } | ProcNode::PidFdLink { .. } => {
4535            FILETYPE_SYMBOLIC_LINK
4536        }
4537        ProcNode::MountsFile
4538        | ProcNode::CpuInfoFile
4539        | ProcNode::MemInfoFile
4540        | ProcNode::LoadAvgFile
4541        | ProcNode::UptimeFile
4542        | ProcNode::VersionFile
4543        | ProcNode::PidCmdline { .. }
4544        | ProcNode::PidEnviron { .. }
4545        | ProcNode::PidStatFile { .. }
4546        | ProcNode::PidStatusFile { .. } => FILETYPE_REGULAR_FILE,
4547    }
4548}
4549
4550fn proc_inode(node: &ProcNode) -> u64 {
4551    match node {
4552        ProcNode::RootDir => 0xfffe_0001,
4553        ProcNode::MountsFile => 0xfffe_0002,
4554        ProcNode::CpuInfoFile => 0xfffe_0003,
4555        ProcNode::MemInfoFile => 0xfffe_0004,
4556        ProcNode::LoadAvgFile => 0xfffe_0005,
4557        ProcNode::UptimeFile => 0xfffe_0006,
4558        ProcNode::VersionFile => 0xfffe_0007,
4559        ProcNode::SelfLink { pid } => 0xfffe_1000 + u64::from(*pid),
4560        ProcNode::PidDir { pid } => 0xfffe_2000 + u64::from(*pid),
4561        ProcNode::PidFdDir { pid } => 0xfffe_3000 + u64::from(*pid),
4562        ProcNode::PidCmdline { pid } => 0xfffe_4000 + u64::from(*pid),
4563        ProcNode::PidEnviron { pid } => 0xfffe_5000 + u64::from(*pid),
4564        ProcNode::PidCwdLink { pid } => 0xfffe_6000 + u64::from(*pid),
4565        ProcNode::PidStatFile { pid } => 0xfffe_7000 + u64::from(*pid),
4566        ProcNode::PidStatusFile { pid } => 0xfffe_8000 + u64::from(*pid),
4567        ProcNode::PidFdLink { pid, fd } => 0xffff_0000 + ((u64::from(*pid)) << 8) + u64::from(*fd),
4568    }
4569}
4570
4571fn null_separated_bytes(parts: Vec<String>) -> Vec<u8> {
4572    if parts.is_empty() {
4573        return Vec::new();
4574    }
4575
4576    let mut bytes = parts.join("\0").into_bytes();
4577    bytes.push(0);
4578    bytes
4579}
4580
4581fn proc_not_found_error(path: &str) -> KernelError {
4582    KernelError::new(
4583        "ENOENT",
4584        format!("no such file or directory, stat '{path}'"),
4585    )
4586}
4587
4588fn read_only_filesystem_error(path: &str) -> KernelError {
4589    KernelError::new("EROFS", format!("read-only filesystem: {path}"))
4590}
4591
4592fn now_ms() -> u64 {
4593    SystemTime::now()
4594        .duration_since(UNIX_EPOCH)
4595        .unwrap_or_default()
4596        .as_millis() as u64
4597}
4598
4599impl<F> Drop for KernelVm<F> {
4600    fn drop(&mut self) {
4601        if !self.terminated {
4602            dispose_kernel_vm_resources(self);
4603        }
4604    }
4605}
4606
4607#[cfg(test)]
4608mod tests {
4609    use super::*;
4610    use crate::vfs::MemoryFileSystem;
4611    use std::panic::{catch_unwind, AssertUnwindSafe};
4612    use std::thread;
4613
4614    struct RetainedKernelResources {
4615        process: KernelProcessHandle,
4616        fd_tables: Arc<Mutex<FdTableManager>>,
4617        pipes: PipeManager,
4618        ptys: PtyManager,
4619        sockets: SocketTable,
4620        driver_pids: Arc<Mutex<BTreeMap<String, BTreeSet<u32>>>>,
4621    }
4622
4623    fn kernel_with_live_resources() -> (KernelVm<MemoryFileSystem>, RetainedKernelResources) {
4624        let mut config = KernelVmConfig::new("vm-drop-resources");
4625        config.permissions = Permissions::allow_all();
4626        let mut kernel = KernelVm::new(MemoryFileSystem::new(), config);
4627        kernel
4628            .register_driver(CommandDriver::new("shell", ["sh"]))
4629            .expect("register shell");
4630
4631        let process = kernel
4632            .spawn_process(
4633                "sh",
4634                Vec::new(),
4635                SpawnOptions {
4636                    requester_driver: Some(String::from("shell")),
4637                    ..SpawnOptions::default()
4638                },
4639            )
4640            .expect("spawn shell");
4641        let _ = kernel.open_pipe("shell", process.pid()).expect("open pipe");
4642        let _ = kernel.open_pty("shell", process.pid()).expect("open pty");
4643        let socket = kernel
4644            .socket_create("shell", process.pid(), SocketSpec::tcp())
4645            .expect("create socket");
4646        kernel
4647            .socket_set_state("shell", process.pid(), socket, SocketState::Listening)
4648            .expect("mark listener");
4649
4650        let retained = RetainedKernelResources {
4651            process: process.clone(),
4652            fd_tables: Arc::clone(&kernel.fd_tables),
4653            pipes: kernel.pipes.clone(),
4654            ptys: kernel.ptys.clone(),
4655            sockets: kernel.sockets.clone(),
4656            driver_pids: Arc::clone(&kernel.driver_pids),
4657        };
4658
4659        assert_eq!(lock_or_recover(retained.fd_tables.as_ref()).len(), 1);
4660        assert_eq!(retained.pipes.pipe_count(), 1);
4661        assert_eq!(retained.ptys.pty_count(), 1);
4662        assert_eq!(retained.sockets.snapshot().sockets, 1);
4663
4664        (kernel, retained)
4665    }
4666
4667    fn assert_kernel_drop_released_resources(retained: &RetainedKernelResources) {
4668        assert_eq!(retained.process.wait(Duration::from_millis(50)), Some(143));
4669        assert_eq!(retained.process.kill_signals(), vec![15]);
4670        assert!(
4671            lock_or_recover(retained.fd_tables.as_ref()).is_empty(),
4672            "kernel drop should remove fd tables"
4673        );
4674        assert_eq!(
4675            retained.pipes.pipe_count(),
4676            0,
4677            "kernel drop should close pipes"
4678        );
4679        assert_eq!(
4680            retained.ptys.pty_count(),
4681            0,
4682            "kernel drop should close PTYs"
4683        );
4684        assert_eq!(
4685            retained.sockets.snapshot().sockets,
4686            0,
4687            "kernel drop should reclaim sockets"
4688        );
4689        assert!(
4690            lock_or_recover(retained.driver_pids.as_ref()).is_empty(),
4691            "kernel drop should clear driver-owned pid tracking"
4692        );
4693    }
4694
4695    #[test]
4696    fn setpgid_rejects_joining_a_process_group_owned_by_another_driver() {
4697        let kernel = KernelVm::new(MemoryFileSystem::new(), KernelVmConfig::new("vm-setpgid"));
4698
4699        let leader_pid = kernel.processes.allocate_pid().expect("allocate pid");
4700        kernel.processes.register(
4701            leader_pid,
4702            String::from("driver-a"),
4703            String::from("sh"),
4704            Vec::new(),
4705            ProcessContext {
4706                pid: leader_pid,
4707                ppid: 0,
4708                env: BTreeMap::new(),
4709                cwd: String::from("/"),
4710                umask: DEFAULT_PROCESS_UMASK,
4711                fds: Default::default(),
4712                identity: ProcessIdentity::default(),
4713                blocked_signals: SignalSet::empty(),
4714                pending_signals: SignalSet::empty(),
4715            },
4716            Arc::new(StubDriverProcess::default()),
4717        );
4718
4719        let peer_pid = kernel.processes.allocate_pid().expect("allocate pid");
4720        kernel.processes.register(
4721            peer_pid,
4722            String::from("driver-b"),
4723            String::from("sh"),
4724            Vec::new(),
4725            ProcessContext {
4726                pid: peer_pid,
4727                ppid: leader_pid,
4728                env: BTreeMap::new(),
4729                cwd: String::from("/"),
4730                umask: DEFAULT_PROCESS_UMASK,
4731                fds: Default::default(),
4732                identity: ProcessIdentity::default(),
4733                blocked_signals: SignalSet::empty(),
4734                pending_signals: SignalSet::empty(),
4735            },
4736            Arc::new(StubDriverProcess::default()),
4737        );
4738
4739        lock_or_recover(&kernel.driver_pids)
4740            .entry(String::from("driver-a"))
4741            .or_default()
4742            .insert(leader_pid);
4743        lock_or_recover(&kernel.driver_pids)
4744            .entry(String::from("driver-b"))
4745            .or_default()
4746            .insert(peer_pid);
4747
4748        let error = kernel
4749            .setpgid("driver-b", peer_pid, leader_pid)
4750            .expect_err("cross-driver process-group join should be denied");
4751        assert_eq!(error.code(), "EPERM");
4752    }
4753
4754    #[test]
4755    fn sigprocmask_and_sigpending_require_process_ownership() {
4756        let mut kernel = KernelVm::new(MemoryFileSystem::new(), KernelVmConfig::new("vm-sigmask"));
4757        let process = kernel
4758            .register_process(
4759                String::from("driver-a"),
4760                String::from("sleep"),
4761                Vec::new(),
4762                ProcessContext {
4763                    pid: 0,
4764                    ppid: 0,
4765                    env: BTreeMap::new(),
4766                    cwd: String::from("/"),
4767                    umask: DEFAULT_PROCESS_UMASK,
4768                    fds: Default::default(),
4769                    identity: ProcessIdentity::default(),
4770                    blocked_signals: SignalSet::empty(),
4771                    pending_signals: SignalSet::empty(),
4772                },
4773                None,
4774            )
4775            .expect("create virtual process");
4776        let mask =
4777            SignalSet::from_signal(crate::process_table::SIGCHLD).expect("SIGCHLD should be valid");
4778
4779        let previous = kernel
4780            .sigprocmask("driver-a", process.pid(), SigmaskHow::Block, mask)
4781            .expect("owner should update signal mask");
4782        assert_eq!(previous, SignalSet::empty());
4783        assert_eq!(
4784            kernel
4785                .sigpending("driver-a", process.pid())
4786                .expect("owner should read pending signals"),
4787            SignalSet::empty()
4788        );
4789
4790        let error = kernel
4791            .sigprocmask("driver-b", process.pid(), SigmaskHow::Block, mask)
4792            .expect_err("foreign driver should be rejected");
4793        assert_eq!(error.code(), "EPERM");
4794        let error = kernel
4795            .sigpending("driver-b", process.pid())
4796            .expect_err("foreign driver should be rejected");
4797        assert_eq!(error.code(), "EPERM");
4798    }
4799
4800    #[test]
4801    fn cleanup_process_resources_blocks_concurrent_dup2_until_pipe_cleanup_finishes() {
4802        let fd_tables = Arc::new(Mutex::new(FdTableManager::new()));
4803        let file_locks = FileLockManager::new();
4804        let pipes = PipeManager::new();
4805        let ptys = PtyManager::new();
4806        let sockets = SocketTable::new();
4807        let driver_pids = Arc::new(Mutex::new(BTreeMap::from([(
4808            String::from("driver"),
4809            BTreeSet::from([41]),
4810        )])));
4811        let pipe = pipes.create_pipe();
4812
4813        {
4814            let mut tables = lock_or_recover(fd_tables.as_ref());
4815            let table = tables.create(41);
4816            table
4817                .open_with(
4818                    Arc::clone(&pipe.read.description),
4819                    pipe.read.filetype,
4820                    Some(10),
4821                )
4822                .expect("open pipe read end");
4823            table
4824                .open_with(
4825                    Arc::clone(&pipe.write.description),
4826                    pipe.write.filetype,
4827                    Some(11),
4828                )
4829                .expect("open pipe write end");
4830        }
4831
4832        let hook_state = Arc::new((Mutex::new((false, false)), Condvar::new()));
4833        let hook_state_for_cleanup = Arc::clone(&hook_state);
4834        set_cleanup_process_resources_test_hook(Some(Arc::new(move || {
4835            let (state, wake) = &*hook_state_for_cleanup;
4836            let mut state = lock_or_recover(state);
4837            state.0 = true;
4838            wake.notify_all();
4839            while !state.1 {
4840                state = wake.wait(state).expect("wait for cleanup release");
4841            }
4842        })));
4843
4844        let fd_tables_for_cleanup = Arc::clone(&fd_tables);
4845        let pipes_for_cleanup = pipes.clone();
4846        let driver_pids_for_cleanup = Arc::clone(&driver_pids);
4847        let cleanup_thread = thread::spawn(move || {
4848            cleanup_process_resources(
4849                fd_tables_for_cleanup.as_ref(),
4850                &file_locks,
4851                &pipes_for_cleanup,
4852                &ptys,
4853                &sockets,
4854                driver_pids_for_cleanup.as_ref(),
4855                41,
4856            );
4857        });
4858
4859        {
4860            let (state, wake) = &*hook_state;
4861            let mut state = lock_or_recover(state);
4862            while !state.0 {
4863                state = wake.wait(state).expect("wait for cleanup hook");
4864            }
4865        }
4866
4867        let fd_tables_for_dup = Arc::clone(&fd_tables);
4868        let dup_thread = thread::spawn(move || {
4869            let mut tables = lock_or_recover(fd_tables_for_dup.as_ref());
4870            let Some(table) = tables.get_mut(41) else {
4871                return Err(String::from("ESRCH"));
4872            };
4873            table.dup2(10, 12).map_err(|error| error.code().to_string())
4874        });
4875
4876        {
4877            let (state, wake) = &*hook_state;
4878            let mut state = lock_or_recover(state);
4879            state.1 = true;
4880            wake.notify_all();
4881        }
4882
4883        cleanup_thread.join().expect("cleanup thread should finish");
4884        let dup_result = dup_thread.join().expect("dup thread should finish");
4885        set_cleanup_process_resources_test_hook(None);
4886
4887        assert_eq!(dup_result, Err(String::from("ESRCH")));
4888        assert!(
4889            lock_or_recover(fd_tables.as_ref()).get(41).is_none(),
4890            "cleanup should remove the process FD table"
4891        );
4892        assert_eq!(pipes.pipe_count(), 0, "pipe cleanup should not leak");
4893        assert!(
4894            lock_or_recover(driver_pids.as_ref())
4895                .get("driver")
4896                .is_none_or(|pids| pids.is_empty()),
4897            "driver ownership should be cleared"
4898        );
4899    }
4900
4901    #[test]
4902    fn drop_disposes_live_kernel_vm_resources() {
4903        let (kernel, retained) = kernel_with_live_resources();
4904        drop(kernel);
4905        assert_kernel_drop_released_resources(&retained);
4906    }
4907
4908    #[test]
4909    fn drop_during_panic_still_disposes_live_kernel_vm_resources() {
4910        let retained = Arc::new(Mutex::new(None::<RetainedKernelResources>));
4911        let retained_for_panic = Arc::clone(&retained);
4912
4913        let panic_result = catch_unwind(AssertUnwindSafe(move || {
4914            let (kernel, resources) = kernel_with_live_resources();
4915            *lock_or_recover(retained_for_panic.as_ref()) = Some(resources);
4916            let _kernel = kernel;
4917            panic!("intentional panic to exercise KernelVm::drop");
4918        }));
4919
4920        assert!(panic_result.is_err(), "panic should be observed");
4921        let retained = lock_or_recover(retained.as_ref())
4922            .take()
4923            .expect("panic path should retain resources for assertions");
4924        assert_kernel_drop_released_resources(&retained);
4925    }
4926}