1use crate::bridge::LifecycleState;
2use crate::command_registry::{CommandDriver, CommandRegistry};
3use crate::device_layer::{create_device_layer, DeviceLayer};
4use crate::dns::{
5 format_dns_resource, resolve_dns, resolve_dns_records, DnsConfig, DnsLookupPolicy,
6 DnsRecordResolution, DnsResolution, DnsResolverErrorKind, HickoryDnsResolver,
7 SharedDnsResolver,
8};
9use crate::fd_table::{
10 FdEntry, FdStat, FdTableError, FdTableManager, FileDescription, FileLockManager,
11 FileLockTarget, FlockOperation, ProcessFdTable, FILETYPE_CHARACTER_DEVICE, FILETYPE_DIRECTORY,
12 FILETYPE_PIPE, FILETYPE_REGULAR_FILE, FILETYPE_SYMBOLIC_LINK, F_DUPFD, O_APPEND, O_CREAT,
13 O_EXCL, O_NONBLOCK, O_TRUNC,
14};
15use crate::mount_table::{MountEntry, MountOptions, MountTable, MountedFileSystem};
16use crate::permissions::{
17 check_command_execution, check_network_access, FsOperation, NetworkOperation, PermissionError,
18 PermissionedFileSystem, Permissions,
19};
20use crate::pipe_manager::{PipeError, PipeManager};
21use crate::poll::{
22 PollEvents, PollFd, PollNotifier, PollResult, PollTarget, PollTargetEntry, PollTargetResult,
23 POLLERR, POLLHUP, POLLIN, POLLNVAL, POLLOUT,
24};
25use crate::process_table::{
26 DriverProcess, ProcessContext, ProcessExitCallback, ProcessInfo, ProcessStatus, ProcessTable,
27 ProcessTableError, ProcessWaitResult, SigmaskHow, SignalSet, DEFAULT_PROCESS_UMASK, SIGCONT,
28 SIGPIPE, SIGSTOP, SIGTSTP, SIGWINCH,
29};
30use crate::pty::{LineDisciplineConfig, PartialTermios, PtyError, PtyManager, Termios};
31use crate::resource_accounting::{
32 measure_filesystem_usage, FileSystemUsage, ResourceAccountant, ResourceError, ResourceLimits,
33 ResourceSnapshot, DEFAULT_MAX_OPEN_FDS,
34};
35use crate::root_fs::{RootFileSystem, RootFilesystemError, RootFilesystemSnapshot};
36use crate::socket_table::{
37 DatagramSocketOption, InetSocketAddress, ReceivedDatagram, SocketId, SocketMulticastMembership,
38 SocketRecord, SocketShutdown, SocketSpec, SocketState, SocketTable, SocketTableError,
39 SocketType,
40};
41use crate::user::{ProcessIdentity, UserConfig, UserManager};
42use crate::vfs::{
43 normalize_path, VfsError, VfsResult, VirtualFileSystem, VirtualStat, VirtualTimeSpec,
44 VirtualUtimeSpec,
45};
46use hickory_resolver::proto::rr::RecordType;
47use std::any::Any;
48use std::collections::{BTreeMap, BTreeSet};
49use std::error::Error;
50use std::fmt;
51#[cfg(test)]
52use std::sync::OnceLock;
53use std::sync::{Arc, Condvar, Mutex, MutexGuard, WaitTimeoutResult};
54use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
55
56pub type KernelResult<T> = Result<T, KernelError>;
57pub use crate::process_table::{ProcessWaitEvent as WaitPidEvent, WaitPidFlags};
58
59pub const SEEK_SET: u8 = 0;
60pub const SEEK_CUR: u8 = 1;
61pub const SEEK_END: u8 = 2;
62const EXECUTABLE_PERMISSION_BITS: u32 = 0o111;
63const SHEBANG_LINE_MAX_BYTES: usize = 256;
64
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct KernelError {
67 code: &'static str,
68 message: String,
69}
70
71impl KernelError {
72 pub fn code(&self) -> &'static str {
73 self.code
74 }
75
76 fn new(code: &'static str, message: impl Into<String>) -> Self {
77 Self {
78 code,
79 message: message.into(),
80 }
81 }
82
83 fn disposed() -> Self {
84 Self::new("EINVAL", "kernel VM is disposed")
85 }
86
87 fn no_such_process(pid: u32) -> Self {
88 Self::new("ESRCH", format!("no such process {pid}"))
89 }
90
91 fn bad_file_descriptor(fd: u32) -> Self {
92 Self::new("EBADF", format!("bad file descriptor {fd}"))
93 }
94
95 fn permission_denied(message: impl Into<String>) -> Self {
96 Self::new("EPERM", message)
97 }
98
99 fn command_not_found(command: &str) -> Self {
100 Self::new("ENOENT", format!("command not found: {command}"))
101 }
102}
103
104impl fmt::Display for KernelError {
105 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
106 write!(f, "{}: {}", self.code, self.message)
107 }
108}
109
110impl Error for KernelError {}
111
112#[derive(Clone)]
113pub struct KernelVmConfig {
114 pub vm_id: String,
115 pub env: BTreeMap<String, String>,
116 pub cwd: String,
117 pub user: UserConfig,
118 pub permissions: Permissions,
119 pub dns: DnsConfig,
120 pub dns_resolver: SharedDnsResolver,
121 pub resources: ResourceLimits,
122 pub zombie_ttl: Duration,
123}
124
125impl KernelVmConfig {
126 pub fn new(vm_id: impl Into<String>) -> Self {
127 Self {
128 vm_id: vm_id.into(),
129 env: BTreeMap::new(),
130 cwd: String::from("/workspace"),
131 user: UserConfig::default(),
132 permissions: Permissions::default(),
133 dns: DnsConfig::default(),
134 dns_resolver: Arc::new(HickoryDnsResolver),
135 resources: ResourceLimits::default(),
136 zombie_ttl: Duration::from_secs(60),
137 }
138 }
139}
140
141#[derive(Debug, Clone, Default)]
142pub struct SpawnOptions {
143 pub requester_driver: Option<String>,
144 pub parent_pid: Option<u32>,
145 pub env: BTreeMap<String, String>,
146 pub cwd: Option<String>,
147}
148
149#[derive(Debug, Clone, Default, PartialEq, Eq)]
150pub struct VirtualProcessOptions {
151 pub parent_pid: Option<u32>,
152 pub env: BTreeMap<String, String>,
153 pub cwd: Option<String>,
154}
155
156#[derive(Debug, Clone, Default, PartialEq, Eq)]
157pub struct ExecOptions {
158 pub requester_driver: Option<String>,
159 pub parent_pid: Option<u32>,
160 pub env: BTreeMap<String, String>,
161 pub cwd: Option<String>,
162}
163
164#[derive(Debug, Clone, Default, PartialEq, Eq)]
165pub struct OpenShellOptions {
166 pub requester_driver: Option<String>,
167 pub command: Option<String>,
168 pub args: Vec<String>,
169 pub env: BTreeMap<String, String>,
170 pub cwd: Option<String>,
171}
172
173#[derive(Debug, Clone, PartialEq, Eq)]
174pub struct WaitPidResult {
175 pub pid: u32,
176 pub status: i32,
177}
178
179#[derive(Debug, Clone, PartialEq, Eq)]
180pub struct WaitPidEventResult {
181 pub pid: u32,
182 pub status: i32,
183 pub event: WaitPidEvent,
184}
185
186#[derive(Debug, Clone)]
187struct ResolvedSpawnCommand {
188 command: String,
189 args: Vec<String>,
190 driver: CommandDriver,
191}
192
193#[derive(Debug, Clone)]
194struct ShebangCommand {
195 interpreter: String,
196 args: Vec<String>,
197}
198
199#[derive(Clone)]
200pub struct KernelProcessHandle {
201 pid: u32,
202 driver: String,
203 process: Arc<StubDriverProcess>,
204}
205
206impl fmt::Debug for KernelProcessHandle {
207 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
208 f.debug_struct("KernelProcessHandle")
209 .field("pid", &self.pid)
210 .field("driver", &self.driver)
211 .finish_non_exhaustive()
212 }
213}
214
215impl KernelProcessHandle {
216 pub fn pid(&self) -> u32 {
217 self.pid
218 }
219
220 pub fn driver(&self) -> &str {
221 &self.driver
222 }
223
224 pub fn finish(&self, exit_code: i32) {
225 self.process.finish(exit_code);
226 }
227
228 pub fn kill(&self, signal: i32) {
229 self.process.kill(signal);
230 }
231
232 pub fn wait(&self, timeout: Duration) -> Option<i32> {
233 self.process.wait(timeout)
234 }
235
236 pub fn kill_signals(&self) -> Vec<i32> {
237 self.process.kill_signals()
238 }
239}
240
241#[derive(Debug, Clone)]
242pub struct OpenShellHandle {
243 process: KernelProcessHandle,
244 master_fd: u32,
245 slave_fd: u32,
246 pty_path: String,
247}
248
249impl OpenShellHandle {
250 pub fn process(&self) -> &KernelProcessHandle {
251 &self.process
252 }
253
254 pub fn pid(&self) -> u32 {
255 self.process.pid()
256 }
257
258 pub fn master_fd(&self) -> u32 {
259 self.master_fd
260 }
261
262 pub fn slave_fd(&self) -> u32 {
263 self.slave_fd
264 }
265
266 pub fn pty_path(&self) -> &str {
267 &self.pty_path
268 }
269}
270
271pub struct KernelVm<F> {
272 vm_id: String,
273 boot_time_ms: u64,
274 boot_instant: Instant,
275 filesystem: PermissionedFileSystem<DeviceLayer<F>>,
276 permissions: Permissions,
277 dns: DnsConfig,
278 dns_resolver: SharedDnsResolver,
279 env: BTreeMap<String, String>,
280 cwd: String,
281 commands: CommandRegistry,
282 fd_tables: Arc<Mutex<FdTableManager>>,
283 processes: ProcessTable,
284 pipes: PipeManager,
285 ptys: PtyManager,
286 sockets: SocketTable,
287 poll_notifier: PollNotifier,
288 users: UserManager,
289 resources: ResourceAccountant,
290 file_locks: FileLockManager,
291 driver_pids: Arc<Mutex<BTreeMap<String, BTreeSet<u32>>>>,
292 terminated: bool,
293}
294
295fn cleanup_process_resources(
296 fd_tables: &Mutex<FdTableManager>,
297 file_locks: &FileLockManager,
298 pipes: &PipeManager,
299 ptys: &PtyManager,
300 sockets: &SocketTable,
301 driver_pids: &Mutex<BTreeMap<String, BTreeSet<u32>>>,
302 pid: u32,
303) {
304 let mut cleanup = Vec::new();
305 {
306 let mut tables = lock_or_recover(fd_tables);
307 let descriptors = tables
308 .get(pid)
309 .map(|table| {
310 table
311 .iter()
312 .map(|entry| (entry.fd, Arc::clone(&entry.description), entry.filetype))
313 .collect::<Vec<_>>()
314 })
315 .unwrap_or_default();
316
317 cleanup_process_resources_test_hook();
318
319 if let Some(table) = tables.get_mut(pid) {
320 for (fd, description, filetype) in &descriptors {
321 table.close(*fd);
322 cleanup.push((Arc::clone(description), *filetype));
323 }
324 }
325 tables.remove(pid);
326 }
327
328 for (description, filetype) in cleanup {
329 close_special_resource_if_needed(file_locks, pipes, ptys, &description, filetype);
330 }
331
332 sockets.remove_all_for_pid(pid);
333
334 let mut owners = lock_or_recover(driver_pids);
335 for pids in owners.values_mut() {
336 pids.remove(&pid);
337 }
338}
339
340fn dispose_kernel_vm_resources<F>(kernel: &mut KernelVm<F>) {
341 kernel.processes.terminate_all();
342 let pids = lock_or_recover(&kernel.fd_tables).pids();
343 for pid in pids {
344 cleanup_process_resources(
345 kernel.fd_tables.as_ref(),
346 &kernel.file_locks,
347 &kernel.pipes,
348 &kernel.ptys,
349 &kernel.sockets,
350 kernel.driver_pids.as_ref(),
351 pid,
352 );
353 }
354 lock_or_recover(&kernel.driver_pids).clear();
355 kernel.terminated = true;
356}
357
358#[cfg(test)]
359type CleanupProcessResourcesHook = Arc<dyn Fn() + Send + Sync + 'static>;
360
361#[cfg(test)]
362fn cleanup_process_resources_test_hook() {
363 let hook = lock_or_recover(cleanup_process_resources_test_hook_slot()).clone();
364 if let Some(hook) = hook {
365 hook();
366 }
367}
368
369#[cfg(not(test))]
370fn cleanup_process_resources_test_hook() {}
371
372#[cfg(test)]
373fn cleanup_process_resources_test_hook_slot() -> &'static Mutex<Option<CleanupProcessResourcesHook>>
374{
375 static HOOK: OnceLock<Mutex<Option<CleanupProcessResourcesHook>>> = OnceLock::new();
376 HOOK.get_or_init(|| Mutex::new(None))
377}
378
379#[cfg(test)]
380fn set_cleanup_process_resources_test_hook(hook: Option<CleanupProcessResourcesHook>) {
381 *lock_or_recover(cleanup_process_resources_test_hook_slot()) = hook;
382}
383
384fn close_special_resource_if_needed(
385 file_locks: &FileLockManager,
386 pipes: &PipeManager,
387 ptys: &PtyManager,
388 description: &Arc<FileDescription>,
389 filetype: u8,
390) {
391 if description.ref_count() != 0 {
392 return;
393 }
394
395 file_locks.release_owner(description.id());
396
397 if filetype == FILETYPE_PIPE && pipes.is_pipe(description.id()) {
398 pipes.close(description.id());
399 }
400
401 if ptys.is_pty(description.id()) {
402 ptys.close(description.id());
403 }
404}
405
406#[derive(Debug, Clone, PartialEq, Eq)]
407enum ProcNode {
408 RootDir,
409 MountsFile,
410 CpuInfoFile,
411 MemInfoFile,
412 LoadAvgFile,
413 UptimeFile,
414 VersionFile,
415 SelfLink { pid: u32 },
416 PidDir { pid: u32 },
417 PidFdDir { pid: u32 },
418 PidCmdline { pid: u32 },
419 PidEnviron { pid: u32 },
420 PidCwdLink { pid: u32 },
421 PidStatFile { pid: u32 },
422 PidStatusFile { pid: u32 },
423 PidFdLink { pid: u32, fd: u32 },
424}
425
426impl<F: VirtualFileSystem + 'static> KernelVm<F> {
427 pub fn new(filesystem: F, config: KernelVmConfig) -> Self {
428 let vm_id = config.vm_id;
429 let boot_time_ms = now_ms();
430 let boot_instant = Instant::now();
431 let permissions = config.permissions.clone();
432 let users = UserManager::from_config(config.user);
433 let process_table = ProcessTable::with_zombie_ttl(config.zombie_ttl);
434 let process_table_for_pty = process_table.clone();
435 let fd_tables = Arc::new(Mutex::new(FdTableManager::with_max_fds(
436 config
437 .resources
438 .max_open_fds
439 .unwrap_or(DEFAULT_MAX_OPEN_FDS),
440 )));
441 let file_locks = FileLockManager::new();
442 let driver_pids = Arc::new(Mutex::new(BTreeMap::new()));
443 let poll_notifier = PollNotifier::default();
444 let pipes = PipeManager::with_notifier(poll_notifier.clone());
445 let ptys = PtyManager::with_signal_handler_and_notifier(
446 Arc::new(move |pgid, signal| {
447 let _ = process_table_for_pty.kill(-(pgid as i32), signal);
448 }),
449 poll_notifier.clone(),
450 );
451 let sockets = SocketTable::new();
452
453 let fd_tables_for_exit = Arc::clone(&fd_tables);
454 let file_locks_for_exit = file_locks.clone();
455 let driver_pids_for_exit = Arc::clone(&driver_pids);
456 let pipes_for_exit = pipes.clone();
457 let ptys_for_exit = ptys.clone();
458 let sockets_for_exit = sockets.clone();
459 process_table.set_on_process_exit(Some(Arc::new(move |pid| {
460 cleanup_process_resources(
461 fd_tables_for_exit.as_ref(),
462 &file_locks_for_exit,
463 &pipes_for_exit,
464 &ptys_for_exit,
465 &sockets_for_exit,
466 driver_pids_for_exit.as_ref(),
467 pid,
468 );
469 })));
470
471 Self {
472 vm_id: vm_id.clone(),
473 boot_time_ms,
474 boot_instant,
475 filesystem: PermissionedFileSystem::new(
476 create_device_layer(filesystem),
477 vm_id,
478 permissions.clone(),
479 ),
480 permissions,
481 dns: config.dns,
482 dns_resolver: config.dns_resolver,
483 env: config.env,
484 cwd: config.cwd,
485 commands: CommandRegistry::new(),
486 fd_tables,
487 processes: process_table,
488 pipes,
489 ptys,
490 sockets,
491 poll_notifier,
492 users,
493 resources: ResourceAccountant::new(config.resources),
494 file_locks,
495 driver_pids,
496 terminated: false,
497 }
498 }
499
500 pub fn vm_id(&self) -> &str {
501 &self.vm_id
502 }
503
504 pub fn state(&self) -> LifecycleState {
505 if self.terminated {
506 LifecycleState::Terminated
507 } else if self.processes.running_count() > 0 {
508 LifecycleState::Busy
509 } else {
510 LifecycleState::Ready
511 }
512 }
513
514 pub fn commands(&self) -> BTreeMap<String, String> {
515 self.commands.list()
516 }
517
518 pub fn filesystem(&self) -> &PermissionedFileSystem<DeviceLayer<F>> {
519 &self.filesystem
520 }
521
522 pub fn filesystem_mut(&mut self) -> &mut PermissionedFileSystem<DeviceLayer<F>> {
523 &mut self.filesystem
524 }
525
526 pub fn user_manager(&self) -> &UserManager {
527 &self.users
528 }
529
530 pub fn process_identity(
531 &self,
532 requester_driver: &str,
533 pid: u32,
534 ) -> KernelResult<ProcessIdentity> {
535 self.assert_driver_owns(requester_driver, pid)?;
536 Ok(self
537 .processes
538 .get(pid)
539 .ok_or_else(|| KernelError::no_such_process(pid))?
540 .identity)
541 }
542
543 pub fn user_profile(&self) -> UserManager {
544 self.users.clone()
545 }
546
547 pub fn getuid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
548 Ok(self.process_identity(requester_driver, pid)?.uid)
549 }
550
551 pub fn getgid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
552 Ok(self.process_identity(requester_driver, pid)?.gid)
553 }
554
555 pub fn geteuid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
556 Ok(self.process_identity(requester_driver, pid)?.euid)
557 }
558
559 pub fn getegid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
560 Ok(self.process_identity(requester_driver, pid)?.egid)
561 }
562
563 pub fn getgroups(&self, requester_driver: &str, pid: u32) -> KernelResult<Vec<u32>> {
564 Ok(self
565 .process_identity(requester_driver, pid)?
566 .supplementary_gids)
567 }
568
569 pub fn getpwuid(&self, uid: u32) -> KernelResult<String> {
570 self.users
571 .getpwuid(uid)
572 .ok_or_else(|| KernelError::new("ENOENT", format!("unknown uid {uid}")))
573 }
574
575 pub fn getgrgid(&self, gid: u32) -> KernelResult<String> {
576 self.users
577 .getgrgid(gid)
578 .ok_or_else(|| KernelError::new("ENOENT", format!("unknown gid {gid}")))
579 }
580
581 pub fn resource_snapshot(&self) -> ResourceSnapshot {
582 let fd_tables = lock_or_recover(&self.fd_tables);
583 self.resources.snapshot(
584 &self.processes,
585 &fd_tables,
586 &self.pipes,
587 &self.ptys,
588 &self.sockets,
589 )
590 }
591
592 pub fn resource_limits(&self) -> &ResourceLimits {
593 self.resources.limits()
594 }
595
596 pub fn resolve_dns(
597 &self,
598 hostname: &str,
599 policy: DnsLookupPolicy,
600 ) -> KernelResult<DnsResolution> {
601 self.assert_not_terminated()?;
602 if matches!(policy, DnsLookupPolicy::CheckPermissions) {
603 let resource = format_dns_resource(hostname).map_err(map_dns_resolver_error)?;
604 check_network_access(
605 &self.vm_id,
606 &self.permissions,
607 NetworkOperation::Dns,
608 &resource,
609 )?;
610 }
611
612 resolve_dns(&self.dns, self.dns_resolver.as_ref(), hostname).map_err(map_dns_resolver_error)
613 }
614
615 pub fn resolve_dns_records(
616 &self,
617 hostname: &str,
618 record_type: RecordType,
619 policy: DnsLookupPolicy,
620 ) -> KernelResult<DnsRecordResolution> {
621 self.assert_not_terminated()?;
622 if matches!(policy, DnsLookupPolicy::CheckPermissions) {
623 let resource = format_dns_resource(hostname).map_err(map_dns_resolver_error)?;
624 check_network_access(
625 &self.vm_id,
626 &self.permissions,
627 NetworkOperation::Dns,
628 &resource,
629 )?;
630 }
631
632 resolve_dns_records(&self.dns, self.dns_resolver.as_ref(), hostname, record_type)
633 .map_err(map_dns_resolver_error)
634 }
635
636 pub fn register_driver(&mut self, driver: CommandDriver) -> KernelResult<()> {
637 self.assert_not_terminated()?;
638 let driver_name = driver.name().to_owned();
639 let populate_driver = driver.clone();
640 self.commands.register(driver)?;
641 lock_or_recover(&self.driver_pids)
642 .entry(driver_name)
643 .or_default();
644 self.commands
645 .populate_driver_bin(&mut self.filesystem, &populate_driver)?;
646 Ok(())
647 }
648
649 pub fn exec(
650 &mut self,
651 command: &str,
652 options: ExecOptions,
653 ) -> KernelResult<KernelProcessHandle> {
654 self.spawn_process(
655 "sh",
656 vec![String::from("-c"), String::from(command)],
657 SpawnOptions {
658 requester_driver: options.requester_driver,
659 parent_pid: options.parent_pid,
660 env: options.env,
661 cwd: options.cwd,
662 },
663 )
664 }
665
666 pub fn open_shell(&mut self, options: OpenShellOptions) -> KernelResult<OpenShellHandle> {
667 let command = options.command.unwrap_or_else(|| String::from("sh"));
668 let requester_driver = options.requester_driver.clone();
669 let process = self.spawn_process(
670 &command,
671 options.args,
672 SpawnOptions {
673 requester_driver: requester_driver.clone(),
674 parent_pid: None,
675 env: options.env,
676 cwd: options.cwd,
677 },
678 )?;
679 let owner = requester_driver.as_deref().unwrap_or(process.driver());
680 let (master_fd, slave_fd, pty_path) = self.open_pty(owner, process.pid())?;
681 self.setpgid(owner, process.pid(), process.pid())?;
682 self.pty_set_foreground_pgid(owner, process.pid(), master_fd, process.pid())?;
683 Ok(OpenShellHandle {
684 process,
685 master_fd,
686 slave_fd,
687 pty_path,
688 })
689 }
690
691 pub fn read_file(&mut self, path: &str) -> KernelResult<Vec<u8>> {
692 self.assert_not_terminated()?;
693 self.read_file_internal(None, path)
694 }
695
696 pub fn pread_file(&mut self, path: &str, offset: u64, length: usize) -> KernelResult<Vec<u8>> {
697 self.assert_not_terminated()?;
698 self.resources.check_pread_length(length)?;
699 Ok(VirtualFileSystem::pread(
700 &mut self.filesystem,
701 path,
702 offset,
703 length,
704 )?)
705 }
706
707 pub fn read_file_for_process(
708 &mut self,
709 requester_driver: &str,
710 pid: u32,
711 path: &str,
712 ) -> KernelResult<Vec<u8>> {
713 self.assert_not_terminated()?;
714 self.assert_driver_owns(requester_driver, pid)?;
715 self.read_file_internal(Some(pid), path)
716 }
717
718 pub fn write_file(&mut self, path: &str, content: impl Into<Vec<u8>>) -> KernelResult<()> {
719 self.assert_not_terminated()?;
720 self.reject_read_only_resolved_write_path(path)?;
721 let content = content.into();
722 self.check_write_file_limits(path, content.len() as u64)?;
723 Ok(self.filesystem.write_file(path, content)?)
724 }
725
726 pub fn write_file_for_process(
727 &mut self,
728 requester_driver: &str,
729 pid: u32,
730 path: &str,
731 content: impl Into<Vec<u8>>,
732 mode: Option<u32>,
733 ) -> KernelResult<()> {
734 self.assert_not_terminated()?;
735 self.assert_driver_owns(requester_driver, pid)?;
736 let existed = self.exists_internal(Some(pid), path)?;
737 let content = content.into();
738 self.reject_read_only_resolved_write_path(path)?;
739 self.check_write_file_limits(path, content.len() as u64)?;
740 VirtualFileSystem::write_file_with_mode(&mut self.filesystem, path, content, mode)?;
741 if !existed {
742 let umask = self.processes.get_umask(pid)?;
743 self.apply_creation_mode(path, mode.unwrap_or(0o666), umask)?;
744 }
745 Ok(())
746 }
747
748 pub fn create_dir(&mut self, path: &str) -> KernelResult<()> {
749 self.assert_not_terminated()?;
750 self.reject_read_only_entry_write_path(path)?;
751 self.check_create_dir_limits(path)?;
752 Ok(self.filesystem.create_dir(path)?)
753 }
754
755 pub fn create_dir_for_process(
756 &mut self,
757 requester_driver: &str,
758 pid: u32,
759 path: &str,
760 mode: Option<u32>,
761 ) -> KernelResult<()> {
762 self.assert_not_terminated()?;
763 self.assert_driver_owns(requester_driver, pid)?;
764 let existed = self.exists_internal(Some(pid), path)?;
765 self.reject_read_only_entry_write_path(path)?;
766 self.check_create_dir_limits(path)?;
767 VirtualFileSystem::create_dir_with_mode(&mut self.filesystem, path, mode)?;
768 if !existed {
769 let umask = self.processes.get_umask(pid)?;
770 self.apply_creation_mode(path, mode.unwrap_or(0o777), umask)?;
771 }
772 Ok(())
773 }
774
775 pub fn mkdir(&mut self, path: &str, recursive: bool) -> KernelResult<()> {
776 self.assert_not_terminated()?;
777 self.reject_read_only_entry_write_path(path)?;
778 self.check_mkdir_limits(path, recursive)?;
779 Ok(self.filesystem.mkdir(path, recursive)?)
780 }
781
782 pub fn mkdir_for_process(
783 &mut self,
784 requester_driver: &str,
785 pid: u32,
786 path: &str,
787 recursive: bool,
788 mode: Option<u32>,
789 ) -> KernelResult<()> {
790 self.assert_not_terminated()?;
791 self.assert_driver_owns(requester_driver, pid)?;
792 let created_paths = self.missing_directory_paths(path, recursive)?;
793 self.reject_read_only_entry_write_path(path)?;
794 self.check_mkdir_limits(path, recursive)?;
795 VirtualFileSystem::mkdir_with_mode(&mut self.filesystem, path, recursive, mode)?;
796 if !created_paths.is_empty() {
797 let umask = self.processes.get_umask(pid)?;
798 let mode = mode.unwrap_or(0o777);
799 for created_path in created_paths {
800 self.apply_creation_mode(&created_path, mode, umask)?;
801 }
802 }
803 Ok(())
804 }
805
806 pub fn umask(
807 &self,
808 requester_driver: &str,
809 pid: u32,
810 new_mask: Option<u32>,
811 ) -> KernelResult<u32> {
812 self.assert_driver_owns(requester_driver, pid)?;
813 match new_mask {
814 Some(mask) => Ok(self.processes.set_umask(pid, mask)?),
815 None => Ok(self.processes.get_umask(pid)?),
816 }
817 }
818
819 pub fn exists(&self, path: &str) -> KernelResult<bool> {
820 self.assert_not_terminated()?;
821 self.exists_internal(None, path)
822 }
823
824 pub fn exists_for_process(
825 &self,
826 requester_driver: &str,
827 pid: u32,
828 path: &str,
829 ) -> KernelResult<bool> {
830 self.assert_not_terminated()?;
831 self.assert_driver_owns(requester_driver, pid)?;
832 self.exists_internal(Some(pid), path)
833 }
834
835 pub fn stat(&mut self, path: &str) -> KernelResult<VirtualStat> {
836 self.assert_not_terminated()?;
837 self.stat_internal(None, path)
838 }
839
840 pub fn stat_for_process(
841 &mut self,
842 requester_driver: &str,
843 pid: u32,
844 path: &str,
845 ) -> KernelResult<VirtualStat> {
846 self.assert_not_terminated()?;
847 self.assert_driver_owns(requester_driver, pid)?;
848 self.stat_internal(Some(pid), path)
849 }
850
851 pub fn lstat(&self, path: &str) -> KernelResult<VirtualStat> {
852 self.assert_not_terminated()?;
853 self.lstat_internal(None, path)
854 }
855
856 pub fn lstat_for_process(
857 &self,
858 requester_driver: &str,
859 pid: u32,
860 path: &str,
861 ) -> KernelResult<VirtualStat> {
862 self.assert_not_terminated()?;
863 self.assert_driver_owns(requester_driver, pid)?;
864 self.lstat_internal(Some(pid), path)
865 }
866
867 pub fn read_link(&self, path: &str) -> KernelResult<String> {
868 self.assert_not_terminated()?;
869 self.read_link_internal(None, path)
870 }
871
872 pub fn read_link_for_process(
873 &self,
874 requester_driver: &str,
875 pid: u32,
876 path: &str,
877 ) -> KernelResult<String> {
878 self.assert_not_terminated()?;
879 self.assert_driver_owns(requester_driver, pid)?;
880 self.read_link_internal(Some(pid), path)
881 }
882
883 pub fn read_dir(&mut self, path: &str) -> KernelResult<Vec<String>> {
884 self.assert_not_terminated()?;
885 let entries = self.read_dir_internal(None, path)?;
886 self.resources.check_readdir_entries(entries.len())?;
887 Ok(entries)
888 }
889
890 pub fn read_dir_for_process(
891 &mut self,
892 requester_driver: &str,
893 pid: u32,
894 path: &str,
895 ) -> KernelResult<Vec<String>> {
896 self.assert_not_terminated()?;
897 self.assert_driver_owns(requester_driver, pid)?;
898 let entries = self.read_dir_internal(Some(pid), path)?;
899 self.resources.check_readdir_entries(entries.len())?;
900 Ok(entries)
901 }
902
903 pub fn remove_file(&mut self, path: &str) -> KernelResult<()> {
904 self.assert_not_terminated()?;
905 self.reject_read_only_entry_write_path(path)?;
906 Ok(self.filesystem.remove_file(path)?)
907 }
908
909 pub fn remove_dir(&mut self, path: &str) -> KernelResult<()> {
910 self.assert_not_terminated()?;
911 self.reject_read_only_entry_write_path(path)?;
912 Ok(self.filesystem.remove_dir(path)?)
913 }
914
915 pub fn rename(&mut self, old_path: &str, new_path: &str) -> KernelResult<()> {
916 self.assert_not_terminated()?;
917 self.reject_read_only_entry_write_path(old_path)?;
918 self.reject_read_only_entry_write_path(new_path)?;
919 self.check_rename_copy_up_limits(old_path, new_path)?;
920 Ok(self.filesystem.rename(old_path, new_path)?)
921 }
922
923 pub fn realpath(&self, path: &str) -> KernelResult<String> {
924 self.assert_not_terminated()?;
925 self.realpath_internal(None, path)
926 }
927
928 pub fn realpath_for_process(
929 &self,
930 requester_driver: &str,
931 pid: u32,
932 path: &str,
933 ) -> KernelResult<String> {
934 self.assert_not_terminated()?;
935 self.assert_driver_owns(requester_driver, pid)?;
936 self.realpath_internal(Some(pid), path)
937 }
938
939 pub fn symlink(&mut self, target: &str, link_path: &str) -> KernelResult<()> {
940 self.assert_not_terminated()?;
941 if is_proc_path(target) {
942 self.filesystem
943 .check_virtual_path(FsOperation::Write, link_path)
944 .map_err(KernelError::from)?;
945 return Err(read_only_filesystem_error(link_path));
946 }
947 self.reject_read_only_entry_write_path(link_path)?;
948 self.check_symlink_limits(target, link_path)?;
949 Ok(self.filesystem.symlink(target, link_path)?)
950 }
951
952 pub fn chmod(&mut self, path: &str, mode: u32) -> KernelResult<()> {
953 self.assert_not_terminated()?;
954 self.reject_read_only_resolved_write_path(path)?;
955 Ok(self.filesystem.chmod(path, mode)?)
956 }
957
958 pub fn link(&mut self, old_path: &str, new_path: &str) -> KernelResult<()> {
959 self.assert_not_terminated()?;
960 if is_proc_path(old_path) {
961 self.filesystem
962 .check_virtual_path(FsOperation::Write, new_path)
963 .map_err(KernelError::from)?;
964 return Err(read_only_filesystem_error(new_path));
965 }
966 self.reject_read_only_resolved_write_path(old_path)?;
967 self.reject_read_only_entry_write_path(new_path)?;
968 Ok(self.filesystem.link(old_path, new_path)?)
969 }
970
971 pub fn chown(&mut self, path: &str, uid: u32, gid: u32) -> KernelResult<()> {
972 self.assert_not_terminated()?;
973 self.reject_read_only_resolved_write_path(path)?;
974 Ok(self.filesystem.chown(path, uid, gid)?)
975 }
976
977 pub fn utimes(&mut self, path: &str, atime_ms: u64, mtime_ms: u64) -> KernelResult<()> {
978 self.utimes_spec(
979 path,
980 VirtualUtimeSpec::Set(VirtualTimeSpec::from_millis(atime_ms)),
981 VirtualUtimeSpec::Set(VirtualTimeSpec::from_millis(mtime_ms)),
982 )
983 }
984
985 pub fn utimes_spec(
986 &mut self,
987 path: &str,
988 atime: VirtualUtimeSpec,
989 mtime: VirtualUtimeSpec,
990 ) -> KernelResult<()> {
991 self.assert_not_terminated()?;
992 self.reject_read_only_resolved_write_path(path)?;
993 Ok(self.filesystem.utimes_spec(path, atime, mtime, true)?)
994 }
995
996 pub fn lutimes(
997 &mut self,
998 path: &str,
999 atime: VirtualUtimeSpec,
1000 mtime: VirtualUtimeSpec,
1001 ) -> KernelResult<()> {
1002 self.assert_not_terminated()?;
1003 self.reject_read_only_entry_write_path(path)?;
1004 Ok(self.filesystem.utimes_spec(path, atime, mtime, false)?)
1005 }
1006
1007 pub fn futimes(
1008 &mut self,
1009 requester_driver: &str,
1010 pid: u32,
1011 fd: u32,
1012 atime: VirtualUtimeSpec,
1013 mtime: VirtualUtimeSpec,
1014 ) -> KernelResult<()> {
1015 self.assert_not_terminated()?;
1016 let path = self
1017 .description_for_fd(requester_driver, pid, fd)?
1018 .path()
1019 .to_owned();
1020 self.reject_read_only_resolved_write_path(&path)?;
1021 Ok(self.filesystem.utimes_spec(&path, atime, mtime, true)?)
1022 }
1023
1024 pub fn truncate(&mut self, path: &str, length: u64) -> KernelResult<()> {
1025 self.assert_not_terminated()?;
1026 self.reject_read_only_resolved_write_path(path)?;
1027 self.check_truncate_limits(path, length)?;
1028 Ok(self.filesystem.truncate(path, length)?)
1029 }
1030
1031 pub fn list_processes(&self) -> BTreeMap<u32, ProcessInfo> {
1032 self.processes.list_processes()
1033 }
1034
1035 pub fn zombie_timer_count(&self) -> usize {
1036 self.processes.zombie_timer_count()
1037 }
1038
1039 pub fn spawn_process(
1040 &mut self,
1041 command: &str,
1042 args: Vec<String>,
1043 options: SpawnOptions,
1044 ) -> KernelResult<KernelProcessHandle> {
1045 self.assert_not_terminated()?;
1046 if let (Some(requester), Some(parent_pid)) =
1047 (options.requester_driver.as_deref(), options.parent_pid)
1048 {
1049 self.assert_driver_owns(requester, parent_pid)?;
1050 }
1051
1052 let cwd = options.cwd.clone().unwrap_or_else(|| self.cwd.clone());
1053 let resolved = self.resolve_spawn_command(command, &args, &cwd)?;
1054
1055 self.resources
1056 .check_process_argv_bytes(&resolved.command, &resolved.args)?;
1057 self.resources
1058 .check_process_env_bytes(&self.env, &options.env)?;
1059
1060 let mut env = self.env.clone();
1061 env.extend(options.env.clone());
1062 check_command_execution(
1063 &self.vm_id,
1064 &self.permissions,
1065 &resolved.command,
1066 &resolved.args,
1067 Some(&cwd),
1068 &env,
1069 )?;
1070
1071 let inherited_fds = {
1072 let tables = lock_or_recover(&self.fd_tables);
1073 options
1074 .parent_pid
1075 .and_then(|pid| tables.get(pid).map(ProcessFdTable::len))
1076 .unwrap_or(3)
1077 };
1078 self.resources
1079 .check_process_spawn(&self.resource_snapshot(), inherited_fds)?;
1080
1081 self.register_process(
1082 resolved.driver.name().to_owned(),
1083 resolved.command,
1084 resolved.args,
1085 ProcessContext {
1086 pid: 0,
1087 ppid: options.parent_pid.unwrap_or(0),
1088 env,
1089 cwd,
1090 umask: DEFAULT_PROCESS_UMASK,
1091 fds: Default::default(),
1092 identity: self.users.identity(),
1093 blocked_signals: SignalSet::empty(),
1094 pending_signals: SignalSet::empty(),
1095 },
1096 options.requester_driver.as_deref(),
1097 )
1098 }
1099
1100 pub fn create_virtual_process(
1101 &mut self,
1102 requester_driver: &str,
1103 driver: &str,
1104 command: &str,
1105 args: Vec<String>,
1106 options: VirtualProcessOptions,
1107 ) -> KernelResult<KernelProcessHandle> {
1108 self.assert_not_terminated()?;
1109 if let Some(parent_pid) = options.parent_pid {
1110 self.assert_driver_owns(requester_driver, parent_pid)?;
1111 }
1112
1113 let cwd = options.cwd.clone().unwrap_or_else(|| self.cwd.clone());
1114 self.resources.check_process_argv_bytes(command, &args)?;
1115 self.resources
1116 .check_process_env_bytes(&self.env, &options.env)?;
1117
1118 let mut env = self.env.clone();
1119 env.extend(options.env.clone());
1120 check_command_execution(
1121 &self.vm_id,
1122 &self.permissions,
1123 command,
1124 &args,
1125 Some(&cwd),
1126 &env,
1127 )?;
1128
1129 let inherited_fds = {
1130 let tables = lock_or_recover(&self.fd_tables);
1131 options
1132 .parent_pid
1133 .and_then(|pid| tables.get(pid).map(ProcessFdTable::len))
1134 .unwrap_or(3)
1135 };
1136 self.resources
1137 .check_process_spawn(&self.resource_snapshot(), inherited_fds)?;
1138
1139 self.register_process(
1140 String::from(driver),
1141 String::from(command),
1142 args,
1143 ProcessContext {
1144 pid: 0,
1145 ppid: options.parent_pid.unwrap_or(0),
1146 env,
1147 cwd,
1148 umask: DEFAULT_PROCESS_UMASK,
1149 fds: Default::default(),
1150 identity: self.users.identity(),
1151 blocked_signals: SignalSet::empty(),
1152 pending_signals: SignalSet::empty(),
1153 },
1154 Some(requester_driver),
1155 )
1156 }
1157
1158 pub fn read_process_stdin(
1159 &mut self,
1160 requester_driver: &str,
1161 pid: u32,
1162 length: usize,
1163 timeout: Option<Duration>,
1164 ) -> KernelResult<Option<Vec<u8>>> {
1165 self.fd_read_with_timeout_result(requester_driver, pid, 0, length, timeout)
1166 }
1167
1168 pub fn write_process_stdout(
1169 &mut self,
1170 requester_driver: &str,
1171 pid: u32,
1172 data: &[u8],
1173 ) -> KernelResult<usize> {
1174 self.fd_write(requester_driver, pid, 1, data)
1175 }
1176
1177 pub fn write_process_stderr(
1178 &mut self,
1179 requester_driver: &str,
1180 pid: u32,
1181 data: &[u8],
1182 ) -> KernelResult<usize> {
1183 self.fd_write(requester_driver, pid, 2, data)
1184 }
1185
1186 pub fn exit_process(
1187 &mut self,
1188 requester_driver: &str,
1189 pid: u32,
1190 exit_code: i32,
1191 ) -> KernelResult<()> {
1192 self.assert_driver_owns(requester_driver, pid)?;
1193 self.processes.mark_exited(pid, exit_code);
1194 Ok(())
1195 }
1196
1197 fn register_process(
1198 &mut self,
1199 driver_name: String,
1200 command: String,
1201 args: Vec<String>,
1202 mut ctx: ProcessContext,
1203 requester_driver: Option<&str>,
1204 ) -> KernelResult<KernelProcessHandle> {
1205 let pid = self.processes.allocate_pid()?;
1206 ctx.pid = pid;
1207
1208 {
1209 let mut tables = lock_or_recover(&self.fd_tables);
1210 if ctx.ppid != 0 {
1211 let parent_pid = ctx.ppid;
1212 tables.fork(parent_pid, pid);
1213 } else {
1214 tables.create(pid);
1215 }
1216 }
1217
1218 let process = Arc::new(StubDriverProcess::default());
1219 self.processes.register(
1220 pid,
1221 driver_name.clone(),
1222 command,
1223 args,
1224 ctx,
1225 process.clone(),
1226 );
1227
1228 let mut owners = lock_or_recover(&self.driver_pids);
1229 owners.entry(driver_name.clone()).or_default().insert(pid);
1230 if let Some(requester) = requester_driver {
1231 owners
1232 .entry(String::from(requester))
1233 .or_default()
1234 .insert(pid);
1235 }
1236
1237 Ok(KernelProcessHandle {
1238 pid,
1239 driver: driver_name,
1240 process,
1241 })
1242 }
1243
1244 pub fn waitpid(&mut self, pid: u32) -> KernelResult<WaitPidResult> {
1245 let (pid, status) = self.processes.waitpid(pid)?;
1246 self.cleanup_process_resources(pid);
1247 Ok(WaitPidResult { pid, status })
1248 }
1249
1250 pub fn waitpid_with_options(
1251 &mut self,
1252 requester_driver: &str,
1253 waiter_pid: u32,
1254 pid: i32,
1255 flags: WaitPidFlags,
1256 ) -> KernelResult<Option<WaitPidEventResult>> {
1257 self.assert_driver_owns(requester_driver, waiter_pid)?;
1258 let result = self.processes.waitpid_for(waiter_pid, pid, flags)?;
1259 Ok(result.map(|result| self.finish_waitpid_event(result)))
1260 }
1261
1262 pub fn wait_and_reap(&mut self, pid: u32) -> KernelResult<(u32, i32)> {
1263 let result = self.waitpid(pid)?;
1264 Ok((result.pid, result.status))
1265 }
1266
1267 pub fn open_pipe(&mut self, requester_driver: &str, pid: u32) -> KernelResult<(u32, u32)> {
1268 self.assert_not_terminated()?;
1269 self.assert_driver_owns(requester_driver, pid)?;
1270 self.resources
1271 .check_pipe_allocation(&self.resource_snapshot())?;
1272 let mut tables = lock_or_recover(&self.fd_tables);
1273 let table = tables
1274 .get_mut(pid)
1275 .ok_or_else(|| KernelError::no_such_process(pid))?;
1276 Ok(self.pipes.create_pipe_fds(table)?)
1277 }
1278
1279 pub fn open_pty(
1280 &mut self,
1281 requester_driver: &str,
1282 pid: u32,
1283 ) -> KernelResult<(u32, u32, String)> {
1284 self.assert_not_terminated()?;
1285 self.assert_driver_owns(requester_driver, pid)?;
1286 self.resources
1287 .check_pty_allocation(&self.resource_snapshot())?;
1288 let mut tables = lock_or_recover(&self.fd_tables);
1289 let table = tables
1290 .get_mut(pid)
1291 .ok_or_else(|| KernelError::no_such_process(pid))?;
1292 Ok(self.ptys.create_pty_fds(table)?)
1293 }
1294
1295 pub fn socket_create(
1296 &mut self,
1297 requester_driver: &str,
1298 pid: u32,
1299 spec: SocketSpec,
1300 ) -> KernelResult<SocketId> {
1301 self.assert_not_terminated()?;
1302 self.assert_driver_owns(requester_driver, pid)?;
1303 self.resources
1304 .check_socket_allocation(&self.resource_snapshot())?;
1305 Ok(self.sockets.allocate(pid, spec).id())
1306 }
1307
1308 pub fn socket_get(&self, socket_id: SocketId) -> Option<SocketRecord> {
1309 self.sockets.get(socket_id)
1310 }
1311
1312 pub fn socket_bind_inet(
1313 &mut self,
1314 requester_driver: &str,
1315 pid: u32,
1316 socket_id: SocketId,
1317 address: InetSocketAddress,
1318 ) -> KernelResult<()> {
1319 self.assert_not_terminated()?;
1320 self.assert_driver_owns(requester_driver, pid)?;
1321 let existing = self
1322 .sockets
1323 .get(socket_id)
1324 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1325 if existing.owner_pid() != pid {
1326 return Err(KernelError::permission_denied(format!(
1327 "process {pid} does not own socket {socket_id}"
1328 )));
1329 }
1330
1331 self.sockets.bind_inet(socket_id, address)?;
1332 self.poll_notifier.notify();
1333 Ok(())
1334 }
1335
1336 pub fn socket_bind_unix(
1337 &mut self,
1338 requester_driver: &str,
1339 pid: u32,
1340 socket_id: SocketId,
1341 path: impl Into<String>,
1342 ) -> KernelResult<()> {
1343 self.assert_not_terminated()?;
1344 self.assert_driver_owns(requester_driver, pid)?;
1345 let existing = self
1346 .sockets
1347 .get(socket_id)
1348 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1349 if existing.owner_pid() != pid {
1350 return Err(KernelError::permission_denied(format!(
1351 "process {pid} does not own socket {socket_id}"
1352 )));
1353 }
1354
1355 self.sockets
1356 .bind_unix(socket_id, normalize_path(&path.into()))?;
1357 self.poll_notifier.notify();
1358 Ok(())
1359 }
1360
1361 pub fn socket_listen(
1362 &mut self,
1363 requester_driver: &str,
1364 pid: u32,
1365 socket_id: SocketId,
1366 backlog: usize,
1367 ) -> KernelResult<()> {
1368 self.assert_not_terminated()?;
1369 self.assert_driver_owns(requester_driver, pid)?;
1370 let existing = self
1371 .sockets
1372 .get(socket_id)
1373 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1374 if existing.owner_pid() != pid {
1375 return Err(KernelError::permission_denied(format!(
1376 "process {pid} does not own socket {socket_id}"
1377 )));
1378 }
1379
1380 self.sockets.listen(socket_id, backlog)?;
1381 self.poll_notifier.notify();
1382 Ok(())
1383 }
1384
1385 pub fn socket_queue_incoming_tcp_connection(
1386 &mut self,
1387 requester_driver: &str,
1388 pid: u32,
1389 listener_socket_id: SocketId,
1390 peer_address: InetSocketAddress,
1391 ) -> KernelResult<()> {
1392 self.assert_not_terminated()?;
1393 self.assert_driver_owns(requester_driver, pid)?;
1394 let existing = self.sockets.get(listener_socket_id).ok_or_else(|| {
1395 KernelError::new("ENOENT", format!("no such socket {listener_socket_id}"))
1396 })?;
1397 if existing.owner_pid() != pid {
1398 return Err(KernelError::permission_denied(format!(
1399 "process {pid} does not own socket {listener_socket_id}"
1400 )));
1401 }
1402
1403 self.sockets
1404 .enqueue_incoming_tcp_connection(listener_socket_id, peer_address)?;
1405 self.poll_notifier.notify();
1406 Ok(())
1407 }
1408
1409 pub fn socket_accept(
1410 &mut self,
1411 requester_driver: &str,
1412 pid: u32,
1413 listener_socket_id: SocketId,
1414 ) -> KernelResult<SocketId> {
1415 self.assert_not_terminated()?;
1416 self.assert_driver_owns(requester_driver, pid)?;
1417 let existing = self.sockets.get(listener_socket_id).ok_or_else(|| {
1418 KernelError::new("ENOENT", format!("no such socket {listener_socket_id}"))
1419 })?;
1420 if existing.owner_pid() != pid {
1421 return Err(KernelError::permission_denied(format!(
1422 "process {pid} does not own socket {listener_socket_id}"
1423 )));
1424 }
1425
1426 let snapshot = self.resource_snapshot();
1427 self.resources.check_socket_allocation(&snapshot)?;
1428 self.resources.check_socket_state_transition(
1429 &snapshot,
1430 SocketState::Created,
1431 SocketState::Connected,
1432 )?;
1433
1434 let socket_id = self.sockets.accept(listener_socket_id)?.id();
1435 self.poll_notifier.notify();
1436 Ok(socket_id)
1437 }
1438
1439 pub fn socket_connect_pair(
1440 &mut self,
1441 requester_driver: &str,
1442 pid: u32,
1443 socket_id: SocketId,
1444 peer_socket_id: SocketId,
1445 ) -> KernelResult<()> {
1446 self.assert_not_terminated()?;
1447 self.assert_driver_owns(requester_driver, pid)?;
1448 let existing = self
1449 .sockets
1450 .get(socket_id)
1451 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1452 if existing.owner_pid() != pid {
1453 return Err(KernelError::permission_denied(format!(
1454 "process {pid} does not own socket {socket_id}"
1455 )));
1456 }
1457
1458 let peer = self.sockets.get(peer_socket_id).ok_or_else(|| {
1459 KernelError::new("ENOENT", format!("no such socket {peer_socket_id}"))
1460 })?;
1461 self.assert_driver_owns(requester_driver, peer.owner_pid())?;
1462
1463 let mut snapshot = self.resource_snapshot();
1464 for current_state in [existing.state(), peer.state()] {
1465 self.resources.check_socket_state_transition(
1466 &snapshot,
1467 current_state,
1468 SocketState::Connected,
1469 )?;
1470 if !current_state.counts_as_connection() {
1471 snapshot.socket_connections = snapshot.socket_connections.saturating_add(1);
1472 }
1473 }
1474
1475 self.sockets.connect_pair(socket_id, peer_socket_id)?;
1476 self.poll_notifier.notify();
1477 Ok(())
1478 }
1479
1480 pub fn socket_connect_unix(
1481 &mut self,
1482 requester_driver: &str,
1483 pid: u32,
1484 socket_id: SocketId,
1485 target_path: impl Into<String>,
1486 ) -> KernelResult<()> {
1487 self.assert_not_terminated()?;
1488 self.assert_driver_owns(requester_driver, pid)?;
1489 let existing = self
1490 .sockets
1491 .get(socket_id)
1492 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1493 if existing.owner_pid() != pid {
1494 return Err(KernelError::permission_denied(format!(
1495 "process {pid} does not own socket {socket_id}"
1496 )));
1497 }
1498
1499 let target_path = normalize_path(&target_path.into());
1500 self.sockets
1501 .find_bound_unix_socket(&target_path)
1502 .ok_or_else(|| {
1503 KernelError::new(
1504 "ECONNREFUSED",
1505 format!("no listening socket bound at path {target_path}"),
1506 )
1507 })?;
1508
1509 let mut snapshot = self.resource_snapshot();
1510 self.resources.check_socket_allocation(&snapshot)?;
1511 for current_state in [existing.state(), SocketState::Created] {
1512 self.resources.check_socket_state_transition(
1513 &snapshot,
1514 current_state,
1515 SocketState::Connected,
1516 )?;
1517 if !current_state.counts_as_connection() {
1518 snapshot.socket_connections = snapshot.socket_connections.saturating_add(1);
1519 }
1520 }
1521
1522 self.sockets
1523 .connect_to_bound_unix_stream(socket_id, target_path)?;
1524 self.poll_notifier.notify();
1525 Ok(())
1526 }
1527
1528 pub fn socket_connect_inet_loopback(
1529 &mut self,
1530 requester_driver: &str,
1531 pid: u32,
1532 socket_id: SocketId,
1533 target_address: InetSocketAddress,
1534 ) -> KernelResult<()> {
1535 self.assert_not_terminated()?;
1536 self.assert_driver_owns(requester_driver, pid)?;
1537 let existing = self
1538 .sockets
1539 .get(socket_id)
1540 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1541 if existing.owner_pid() != pid {
1542 return Err(KernelError::permission_denied(format!(
1543 "process {pid} does not own socket {socket_id}"
1544 )));
1545 }
1546
1547 self.sockets
1548 .find_bound_inet_socket(SocketSpec::tcp(), &target_address)
1549 .ok_or_else(|| {
1550 KernelError::new(
1551 "ECONNREFUSED",
1552 format!(
1553 "no listening socket bound at {}:{}",
1554 target_address.host(),
1555 target_address.port()
1556 ),
1557 )
1558 })?;
1559
1560 let mut snapshot = self.resource_snapshot();
1561 self.resources.check_socket_allocation(&snapshot)?;
1562 for current_state in [existing.state(), SocketState::Created] {
1563 self.resources.check_socket_state_transition(
1564 &snapshot,
1565 current_state,
1566 SocketState::Connected,
1567 )?;
1568 if !current_state.counts_as_connection() {
1569 snapshot.socket_connections = snapshot.socket_connections.saturating_add(1);
1570 }
1571 }
1572
1573 self.sockets
1574 .connect_to_bound_inet_stream(socket_id, target_address)?;
1575 self.poll_notifier.notify();
1576 Ok(())
1577 }
1578
1579 pub fn socket_send_to_inet_loopback(
1580 &mut self,
1581 requester_driver: &str,
1582 pid: u32,
1583 socket_id: SocketId,
1584 target_address: InetSocketAddress,
1585 data: &[u8],
1586 ) -> KernelResult<usize> {
1587 self.assert_not_terminated()?;
1588 self.assert_driver_owns(requester_driver, pid)?;
1589 let existing = self
1590 .sockets
1591 .get(socket_id)
1592 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1593 if existing.owner_pid() != pid {
1594 return Err(KernelError::permission_denied(format!(
1595 "process {pid} does not own socket {socket_id}"
1596 )));
1597 }
1598
1599 self.sockets
1600 .check_send_to_bound_udp_socket(socket_id, target_address.clone())?;
1601 self.resources
1602 .check_socket_datagram_enqueue(&self.resource_snapshot(), data.len())?;
1603 let written = self
1604 .sockets
1605 .send_to_bound_udp_socket(socket_id, target_address, data)?;
1606 if written > 0 {
1607 self.poll_notifier.notify();
1608 }
1609 Ok(written)
1610 }
1611
1612 pub fn socket_recv_datagram(
1613 &mut self,
1614 requester_driver: &str,
1615 pid: u32,
1616 socket_id: SocketId,
1617 max_bytes: usize,
1618 ) -> KernelResult<Option<ReceivedDatagram>> {
1619 self.assert_not_terminated()?;
1620 self.assert_driver_owns(requester_driver, pid)?;
1621 let existing = self
1622 .sockets
1623 .get(socket_id)
1624 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1625 if existing.owner_pid() != pid {
1626 return Err(KernelError::permission_denied(format!(
1627 "process {pid} does not own socket {socket_id}"
1628 )));
1629 }
1630
1631 let result = self.sockets.recv_datagram(socket_id, max_bytes)?;
1632 if result.is_some() {
1633 self.poll_notifier.notify();
1634 }
1635 Ok(result)
1636 }
1637
1638 pub fn socket_set_datagram_option(
1639 &mut self,
1640 requester_driver: &str,
1641 pid: u32,
1642 socket_id: SocketId,
1643 option: DatagramSocketOption,
1644 enabled: bool,
1645 ) -> KernelResult<()> {
1646 self.assert_not_terminated()?;
1647 self.assert_driver_owns(requester_driver, pid)?;
1648 let existing = self
1649 .sockets
1650 .get(socket_id)
1651 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1652 if existing.owner_pid() != pid {
1653 return Err(KernelError::permission_denied(format!(
1654 "process {pid} does not own socket {socket_id}"
1655 )));
1656 }
1657
1658 self.sockets
1659 .set_datagram_socket_option(socket_id, option, enabled)?;
1660 self.poll_notifier.notify();
1661 Ok(())
1662 }
1663
1664 pub fn socket_add_membership(
1665 &mut self,
1666 requester_driver: &str,
1667 pid: u32,
1668 socket_id: SocketId,
1669 membership: SocketMulticastMembership,
1670 ) -> KernelResult<()> {
1671 self.assert_not_terminated()?;
1672 self.assert_driver_owns(requester_driver, pid)?;
1673 let existing = self
1674 .sockets
1675 .get(socket_id)
1676 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1677 if existing.owner_pid() != pid {
1678 return Err(KernelError::permission_denied(format!(
1679 "process {pid} does not own socket {socket_id}"
1680 )));
1681 }
1682
1683 self.sockets
1684 .add_multicast_membership(socket_id, membership)?;
1685 self.poll_notifier.notify();
1686 Ok(())
1687 }
1688
1689 pub fn socket_drop_membership(
1690 &mut self,
1691 requester_driver: &str,
1692 pid: u32,
1693 socket_id: SocketId,
1694 membership: SocketMulticastMembership,
1695 ) -> KernelResult<()> {
1696 self.assert_not_terminated()?;
1697 self.assert_driver_owns(requester_driver, pid)?;
1698 let existing = self
1699 .sockets
1700 .get(socket_id)
1701 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1702 if existing.owner_pid() != pid {
1703 return Err(KernelError::permission_denied(format!(
1704 "process {pid} does not own socket {socket_id}"
1705 )));
1706 }
1707
1708 self.sockets
1709 .drop_multicast_membership(socket_id, membership)?;
1710 self.poll_notifier.notify();
1711 Ok(())
1712 }
1713
1714 pub fn socket_set_state(
1715 &mut self,
1716 requester_driver: &str,
1717 pid: u32,
1718 socket_id: SocketId,
1719 state: SocketState,
1720 ) -> KernelResult<()> {
1721 self.assert_not_terminated()?;
1722 self.assert_driver_owns(requester_driver, pid)?;
1723 let existing = self
1724 .sockets
1725 .get(socket_id)
1726 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1727 if existing.owner_pid() != pid {
1728 return Err(KernelError::permission_denied(format!(
1729 "process {pid} does not own socket {socket_id}"
1730 )));
1731 }
1732
1733 self.resources.check_socket_state_transition(
1734 &self.resource_snapshot(),
1735 existing.state(),
1736 state,
1737 )?;
1738 self.sockets.update_state(socket_id, state)?;
1739 self.poll_notifier.notify();
1740 Ok(())
1741 }
1742
1743 pub fn socket_write(
1744 &mut self,
1745 requester_driver: &str,
1746 pid: u32,
1747 socket_id: SocketId,
1748 data: &[u8],
1749 ) -> KernelResult<usize> {
1750 self.assert_not_terminated()?;
1751 self.assert_driver_owns(requester_driver, pid)?;
1752 let existing = self
1753 .sockets
1754 .get(socket_id)
1755 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1756 if existing.owner_pid() != pid {
1757 return Err(KernelError::permission_denied(format!(
1758 "process {pid} does not own socket {socket_id}"
1759 )));
1760 }
1761
1762 self.sockets.check_write(socket_id)?;
1763 self.resources
1764 .check_socket_buffer_growth(&self.resource_snapshot(), data.len())?;
1765 let written = self.sockets.write(socket_id, data)?;
1766 if written > 0 {
1767 self.poll_notifier.notify();
1768 }
1769 Ok(written)
1770 }
1771
1772 pub fn socket_read(
1773 &mut self,
1774 requester_driver: &str,
1775 pid: u32,
1776 socket_id: SocketId,
1777 max_bytes: usize,
1778 ) -> KernelResult<Option<Vec<u8>>> {
1779 self.assert_not_terminated()?;
1780 self.assert_driver_owns(requester_driver, pid)?;
1781 let existing = self
1782 .sockets
1783 .get(socket_id)
1784 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1785 if existing.owner_pid() != pid {
1786 return Err(KernelError::permission_denied(format!(
1787 "process {pid} does not own socket {socket_id}"
1788 )));
1789 }
1790
1791 let result = self.sockets.read(socket_id, max_bytes)?;
1792 if result.is_some() {
1793 self.poll_notifier.notify();
1794 }
1795 Ok(result)
1796 }
1797
1798 pub fn socket_shutdown(
1799 &mut self,
1800 requester_driver: &str,
1801 pid: u32,
1802 socket_id: SocketId,
1803 how: SocketShutdown,
1804 ) -> KernelResult<()> {
1805 self.assert_not_terminated()?;
1806 self.assert_driver_owns(requester_driver, pid)?;
1807 let existing = self
1808 .sockets
1809 .get(socket_id)
1810 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1811 if existing.owner_pid() != pid {
1812 return Err(KernelError::permission_denied(format!(
1813 "process {pid} does not own socket {socket_id}"
1814 )));
1815 }
1816
1817 self.sockets.shutdown(socket_id, how)?;
1818 self.poll_notifier.notify();
1819 Ok(())
1820 }
1821
1822 pub fn socket_close(
1823 &mut self,
1824 requester_driver: &str,
1825 pid: u32,
1826 socket_id: SocketId,
1827 ) -> KernelResult<()> {
1828 self.assert_not_terminated()?;
1829 self.assert_driver_owns(requester_driver, pid)?;
1830 let existing = self
1831 .sockets
1832 .get(socket_id)
1833 .ok_or_else(|| KernelError::new("ENOENT", format!("no such socket {socket_id}")))?;
1834 if existing.owner_pid() != pid {
1835 return Err(KernelError::permission_denied(format!(
1836 "process {pid} does not own socket {socket_id}"
1837 )));
1838 }
1839
1840 self.sockets.remove(socket_id)?;
1841 self.poll_notifier.notify();
1842 Ok(())
1843 }
1844
1845 pub fn fd_open(
1846 &mut self,
1847 requester_driver: &str,
1848 pid: u32,
1849 path: &str,
1850 flags: u32,
1851 mode: Option<u32>,
1852 ) -> KernelResult<u32> {
1853 self.assert_not_terminated()?;
1854 self.assert_driver_owns(requester_driver, pid)?;
1855 if let Some(existing_fd) = parse_dev_fd_path(path)? {
1856 {
1857 let tables = lock_or_recover(&self.fd_tables);
1858 let table = tables
1859 .get(pid)
1860 .ok_or_else(|| KernelError::no_such_process(pid))?;
1861 table
1862 .get(existing_fd)
1863 .ok_or_else(|| KernelError::bad_file_descriptor(existing_fd))?;
1864 }
1865 self.resources
1866 .check_fd_allocation(&self.resource_snapshot(), 1)?;
1867 let mut tables = lock_or_recover(&self.fd_tables);
1868 let table = tables
1869 .get_mut(pid)
1870 .ok_or_else(|| KernelError::no_such_process(pid))?;
1871 let entry = table
1872 .get(existing_fd)
1873 .cloned()
1874 .ok_or_else(|| KernelError::bad_file_descriptor(existing_fd))?;
1875 return Ok(table.dup_with_status_flags(
1876 existing_fd,
1877 Some(entry.status_flags | (flags & O_NONBLOCK)),
1878 )?);
1879 }
1880
1881 if let Some(proc_node) = self.resolve_proc_node(path, Some(pid))? {
1882 if open_requires_write_access(flags) {
1883 self.filesystem
1884 .check_virtual_path(FsOperation::Write, path)
1885 .map_err(KernelError::from)?;
1886 return Err(read_only_filesystem_error(path));
1887 }
1888
1889 if matches!(
1890 proc_node,
1891 ProcNode::SelfLink { .. }
1892 | ProcNode::PidCwdLink { .. }
1893 | ProcNode::PidFdLink { .. }
1894 ) {
1895 let target = self.proc_symlink_target(&proc_node)?;
1896 return self.fd_open(requester_driver, pid, &target, flags, mode);
1897 }
1898
1899 self.filesystem
1900 .check_virtual_path(FsOperation::Read, path)
1901 .map_err(KernelError::from)?;
1902 self.resources
1903 .check_fd_allocation(&self.resource_snapshot(), 1)?;
1904 let mut tables = lock_or_recover(&self.fd_tables);
1905 let table = tables
1906 .get_mut(pid)
1907 .ok_or_else(|| KernelError::no_such_process(pid))?;
1908 return Ok(table.open_with_details(
1909 &self.proc_canonical_path(&proc_node),
1910 flags,
1911 proc_filetype(&proc_node),
1912 None,
1913 )?);
1914 }
1915
1916 if open_requires_write_access(flags) {
1917 self.reject_read_only_resolved_write_path(path)?;
1918 }
1919 let existed = if flags & O_CREAT != 0 {
1920 self.exists_internal(Some(pid), path)?
1921 } else {
1922 false
1923 };
1924 let (filetype, lock_target) = self.prepare_fd_open(path, flags, mode)?;
1925 if flags & O_CREAT != 0 && !existed {
1926 let umask = self.processes.get_umask(pid)?;
1927 self.apply_creation_mode(path, mode.unwrap_or(0o666), umask)?;
1928 }
1929 self.resources
1930 .check_fd_allocation(&self.resource_snapshot(), 1)?;
1931 let mut tables = lock_or_recover(&self.fd_tables);
1932 let table = tables
1933 .get_mut(pid)
1934 .ok_or_else(|| KernelError::no_such_process(pid))?;
1935 Ok(table.open_with_details(path, flags, filetype, lock_target)?)
1936 }
1937
1938 pub fn fd_read(
1939 &mut self,
1940 requester_driver: &str,
1941 pid: u32,
1942 fd: u32,
1943 length: usize,
1944 ) -> KernelResult<Vec<u8>> {
1945 Ok(self
1946 .fd_read_with_timeout_result(requester_driver, pid, fd, length, None)?
1947 .unwrap_or_default())
1948 }
1949
1950 pub fn fd_read_with_timeout_result(
1951 &mut self,
1952 requester_driver: &str,
1953 pid: u32,
1954 fd: u32,
1955 length: usize,
1956 timeout: Option<Duration>,
1957 ) -> KernelResult<Option<Vec<u8>>> {
1958 self.assert_driver_owns(requester_driver, pid)?;
1959 let entry = {
1960 let tables = lock_or_recover(&self.fd_tables);
1961 tables
1962 .get(pid)
1963 .and_then(|table| table.get(fd))
1964 .cloned()
1965 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
1966 };
1967
1968 if self.pipes.is_pipe(entry.description.id()) {
1969 return Ok(self.pipes.read_with_timeout(
1970 entry.description.id(),
1971 length,
1972 if entry.status_flags & O_NONBLOCK != 0 {
1973 Some(Duration::ZERO)
1974 } else {
1975 timeout.or_else(|| self.blocking_read_timeout())
1976 },
1977 )?);
1978 }
1979
1980 if self.ptys.is_pty(entry.description.id()) {
1981 return Ok(self.ptys.read_with_timeout(
1982 entry.description.id(),
1983 length,
1984 if entry.status_flags & O_NONBLOCK != 0 {
1985 Some(Duration::ZERO)
1986 } else {
1987 timeout.or_else(|| self.blocking_read_timeout())
1988 },
1989 )?);
1990 }
1991
1992 self.resources.check_pread_length(length)?;
1993
1994 if is_proc_path(entry.description.path()) {
1995 let bytes = self.proc_read_file_from_open_path(Some(pid), entry.description.path())?;
1996 let start = entry.description.cursor() as usize;
1997 let end = start.saturating_add(length).min(bytes.len());
1998 let chunk = if start >= bytes.len() {
1999 Vec::new()
2000 } else {
2001 bytes[start..end].to_vec()
2002 };
2003 entry.description.set_cursor(
2004 entry
2005 .description
2006 .cursor()
2007 .saturating_add(chunk.len() as u64),
2008 );
2009 return Ok(Some(chunk));
2010 }
2011
2012 let cursor = entry.description.cursor();
2013 let bytes = VirtualFileSystem::pread(
2014 &mut self.filesystem,
2015 entry.description.path(),
2016 cursor,
2017 length,
2018 )?;
2019 entry
2020 .description
2021 .set_cursor(cursor.saturating_add(bytes.len() as u64));
2022 Ok(Some(bytes))
2023 }
2024
2025 pub fn fd_write(
2026 &mut self,
2027 requester_driver: &str,
2028 pid: u32,
2029 fd: u32,
2030 data: &[u8],
2031 ) -> KernelResult<usize> {
2032 self.assert_driver_owns(requester_driver, pid)?;
2033 self.resources.check_fd_write_size(data.len())?;
2034 let entry = {
2035 let tables = lock_or_recover(&self.fd_tables);
2036 tables
2037 .get(pid)
2038 .and_then(|table| table.get(fd))
2039 .cloned()
2040 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2041 };
2042
2043 if self.pipes.is_pipe(entry.description.id()) {
2044 return match self.pipes.write_with_mode(
2045 entry.description.id(),
2046 data,
2047 entry.status_flags & O_NONBLOCK != 0,
2048 ) {
2049 Ok(bytes) => Ok(bytes),
2050 Err(error) => {
2051 if error.code() == "EPIPE" {
2052 self.processes.kill(pid as i32, SIGPIPE)?;
2053 }
2054 Err(error.into())
2055 }
2056 };
2057 }
2058
2059 if self.ptys.is_pty(entry.description.id()) {
2060 return Ok(self.ptys.write(entry.description.id(), data)?);
2061 }
2062
2063 self.reject_read_only_resolved_write_path(entry.description.path())?;
2064
2065 let path = entry.description.path().to_owned();
2066 if is_virtual_device_storage_path(&path) {
2067 VirtualFileSystem::write_file(&mut self.filesystem, &path, data.to_vec())?;
2068 let cursor = entry.description.cursor();
2069 entry
2070 .description
2071 .set_cursor(cursor.saturating_add(data.len() as u64));
2072 return Ok(data.len());
2073 }
2074 let current_size = self.current_storage_file_size(&path)?;
2075 let cursor = entry.description.cursor();
2076 if entry.description.flags() & O_APPEND != 0 {
2077 let required_size = current_size.max(checked_write_end(current_size, data.len())?);
2078 self.check_path_resize_limits(&path, required_size)?;
2079 let new_len = VirtualFileSystem::append_file(&mut self.filesystem, &path, data)?;
2080 entry.description.set_cursor(new_len);
2081 return Ok(data.len());
2082 }
2083
2084 let required_size = current_size.max(checked_write_end(cursor, data.len())?);
2085 self.check_path_resize_limits(&path, required_size)?;
2086 VirtualFileSystem::pwrite(&mut self.filesystem, &path, data, cursor)?;
2087 entry
2088 .description
2089 .set_cursor(cursor.saturating_add(data.len() as u64));
2090 Ok(data.len())
2091 }
2092
2093 pub fn poll_fds(
2094 &self,
2095 requester_driver: &str,
2096 pid: u32,
2097 fds: Vec<PollFd>,
2098 timeout_ms: i32,
2099 ) -> KernelResult<PollResult> {
2100 let targets = fds
2101 .into_iter()
2102 .map(|poll_fd| PollTargetEntry::fd(poll_fd.fd, poll_fd.events))
2103 .collect::<Vec<_>>();
2104 let result = self.poll_targets(requester_driver, pid, targets, timeout_ms)?;
2105 Ok(PollResult {
2106 ready_count: result.ready_count,
2107 fds: result
2108 .targets
2109 .into_iter()
2110 .map(|target| match target.target {
2111 PollTarget::Fd(fd) => PollFd {
2112 fd,
2113 events: target.events,
2114 revents: target.revents,
2115 },
2116 PollTarget::Socket(_) => unreachable!("fd poll should only include fd targets"),
2117 })
2118 .collect(),
2119 })
2120 }
2121
2122 pub fn poll_targets(
2123 &self,
2124 requester_driver: &str,
2125 pid: u32,
2126 mut targets: Vec<PollTargetEntry>,
2127 timeout_ms: i32,
2128 ) -> KernelResult<PollTargetResult> {
2129 self.assert_driver_owns(requester_driver, pid)?;
2130 if timeout_ms < -1 {
2131 return Err(KernelError::new(
2132 "EINVAL",
2133 format!("invalid poll timeout {timeout_ms}"),
2134 ));
2135 }
2136
2137 let timeout = if timeout_ms < 0 {
2138 None
2139 } else {
2140 Some(Duration::from_millis(timeout_ms as u64))
2141 };
2142 let deadline = timeout.map(|duration| Instant::now() + duration);
2143
2144 loop {
2145 let observed_generation = self.poll_notifier.snapshot();
2146 let ready_count = self.populate_poll_target_revents(pid, &mut targets)?;
2147 if ready_count > 0 || matches!(timeout, Some(duration) if duration.is_zero()) {
2148 return Ok(PollTargetResult {
2149 ready_count,
2150 targets,
2151 });
2152 }
2153
2154 let remaining = deadline.map(|target| target.saturating_duration_since(Instant::now()));
2155 if matches!(remaining, Some(duration) if duration.is_zero()) {
2156 return Ok(PollTargetResult {
2157 ready_count,
2158 targets,
2159 });
2160 }
2161
2162 if !self
2163 .poll_notifier
2164 .wait_for_change(observed_generation, remaining)
2165 {
2166 return Ok(PollTargetResult {
2167 ready_count,
2168 targets,
2169 });
2170 }
2171 }
2172 }
2173
2174 pub fn fd_seek(
2175 &mut self,
2176 requester_driver: &str,
2177 pid: u32,
2178 fd: u32,
2179 offset: i64,
2180 whence: u8,
2181 ) -> KernelResult<u64> {
2182 self.assert_driver_owns(requester_driver, pid)?;
2183 let entry = {
2184 let tables = lock_or_recover(&self.fd_tables);
2185 tables
2186 .get(pid)
2187 .and_then(|table| table.get(fd))
2188 .cloned()
2189 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2190 };
2191
2192 if self.pipes.is_pipe(entry.description.id()) || self.ptys.is_pty(entry.description.id()) {
2193 return Err(KernelError::new("ESPIPE", "illegal seek"));
2194 }
2195
2196 let base = match whence {
2197 SEEK_SET => 0_i128,
2198 SEEK_CUR => i128::from(entry.description.cursor()),
2199 SEEK_END => {
2200 let size = if is_proc_path(entry.description.path()) {
2201 self.proc_stat_from_open_path(Some(pid), entry.description.path())?
2202 .size
2203 } else {
2204 self.filesystem.stat(entry.description.path())?.size
2205 };
2206 i128::from(size)
2207 }
2208 _ => {
2209 return Err(KernelError::new(
2210 "EINVAL",
2211 format!("invalid whence {whence}"),
2212 ))
2213 }
2214 };
2215 let next = base + i128::from(offset);
2216 if next < 0 {
2217 return Err(KernelError::new("EINVAL", "negative seek position"));
2218 }
2219 let next = u64::try_from(next)
2220 .map_err(|_| KernelError::new("EINVAL", "seek position out of range"))?;
2221 entry.description.set_cursor(next);
2222 Ok(next)
2223 }
2224
2225 pub fn fd_pread(
2226 &mut self,
2227 requester_driver: &str,
2228 pid: u32,
2229 fd: u32,
2230 length: usize,
2231 offset: u64,
2232 ) -> KernelResult<Vec<u8>> {
2233 self.assert_driver_owns(requester_driver, pid)?;
2234 self.resources.check_pread_length(length)?;
2235 let entry = {
2236 let tables = lock_or_recover(&self.fd_tables);
2237 tables
2238 .get(pid)
2239 .and_then(|table| table.get(fd))
2240 .cloned()
2241 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2242 };
2243
2244 if self.pipes.is_pipe(entry.description.id()) || self.ptys.is_pty(entry.description.id()) {
2245 return Err(KernelError::new("ESPIPE", "illegal seek"));
2246 }
2247
2248 if is_proc_path(entry.description.path()) {
2249 let bytes = self.proc_read_file_from_open_path(Some(pid), entry.description.path())?;
2250 let start = usize::try_from(offset)
2251 .map_err(|_| KernelError::new("EINVAL", "pread offset out of range"))?;
2252 let end = start.saturating_add(length).min(bytes.len());
2253 return Ok(if start >= bytes.len() {
2254 Vec::new()
2255 } else {
2256 bytes[start..end].to_vec()
2257 });
2258 }
2259
2260 Ok(VirtualFileSystem::pread(
2261 &mut self.filesystem,
2262 entry.description.path(),
2263 offset,
2264 length,
2265 )?)
2266 }
2267
2268 pub fn fd_pwrite(
2269 &mut self,
2270 requester_driver: &str,
2271 pid: u32,
2272 fd: u32,
2273 data: &[u8],
2274 offset: u64,
2275 ) -> KernelResult<usize> {
2276 self.assert_driver_owns(requester_driver, pid)?;
2277 self.resources.check_fd_write_size(data.len())?;
2278 let entry = {
2279 let tables = lock_or_recover(&self.fd_tables);
2280 tables
2281 .get(pid)
2282 .and_then(|table| table.get(fd))
2283 .cloned()
2284 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2285 };
2286
2287 if self.pipes.is_pipe(entry.description.id()) || self.ptys.is_pty(entry.description.id()) {
2288 return Err(KernelError::new("ESPIPE", "illegal seek"));
2289 }
2290
2291 self.reject_read_only_resolved_write_path(entry.description.path())?;
2292
2293 let required_size = self
2294 .current_storage_file_size(entry.description.path())?
2295 .max(checked_write_end(offset, data.len())?);
2296 self.check_path_resize_limits(entry.description.path(), required_size)?;
2297 VirtualFileSystem::pwrite(
2298 &mut self.filesystem,
2299 entry.description.path(),
2300 data.to_vec(),
2301 offset,
2302 )?;
2303 Ok(data.len())
2304 }
2305
2306 pub fn fd_dup(&mut self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<u32> {
2307 self.assert_driver_owns(requester_driver, pid)?;
2308 {
2309 let tables = lock_or_recover(&self.fd_tables);
2310 let table = tables
2311 .get(pid)
2312 .ok_or_else(|| KernelError::no_such_process(pid))?;
2313 table
2314 .get(fd)
2315 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?;
2316 }
2317 self.resources
2318 .check_fd_allocation(&self.resource_snapshot(), 1)?;
2319 let mut tables = lock_or_recover(&self.fd_tables);
2320 let table = tables
2321 .get_mut(pid)
2322 .ok_or_else(|| KernelError::no_such_process(pid))?;
2323 Ok(table.dup(fd)?)
2324 }
2325
2326 pub fn fd_dup2(
2327 &mut self,
2328 requester_driver: &str,
2329 pid: u32,
2330 old_fd: u32,
2331 new_fd: u32,
2332 ) -> KernelResult<()> {
2333 self.assert_driver_owns(requester_driver, pid)?;
2334 let (replaced, needs_fd_growth) = {
2335 let tables = lock_or_recover(&self.fd_tables);
2336 let table = tables
2337 .get(pid)
2338 .ok_or_else(|| KernelError::no_such_process(pid))?;
2339 table
2340 .get(old_fd)
2341 .ok_or_else(|| KernelError::bad_file_descriptor(old_fd))?;
2342 let replaced = if old_fd == new_fd {
2343 None
2344 } else {
2345 table.get(new_fd).cloned()
2346 };
2347 if new_fd as usize >= table.max_fds() {
2348 return Err(KernelError::bad_file_descriptor(new_fd));
2349 }
2350 let needs_fd_growth = old_fd != new_fd && replaced.is_none();
2351 (replaced, needs_fd_growth)
2352 };
2353 if needs_fd_growth {
2354 self.resources
2355 .check_fd_allocation(&self.resource_snapshot(), 1)?;
2356 }
2357 {
2358 let mut tables = lock_or_recover(&self.fd_tables);
2359 let table = tables
2360 .get_mut(pid)
2361 .ok_or_else(|| KernelError::no_such_process(pid))?;
2362 table.dup2(old_fd, new_fd)?;
2363 }
2364
2365 if let Some(entry) = replaced {
2366 self.close_special_resource_if_needed(&entry.description, entry.filetype);
2367 }
2368 Ok(())
2369 }
2370
2371 pub fn fd_close(&mut self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<()> {
2372 self.assert_driver_owns(requester_driver, pid)?;
2373 let (description, filetype) = {
2374 let mut tables = lock_or_recover(&self.fd_tables);
2375 let table = tables
2376 .get_mut(pid)
2377 .ok_or_else(|| KernelError::no_such_process(pid))?;
2378 let entry = table
2379 .get(fd)
2380 .cloned()
2381 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?;
2382 table.close(fd);
2383 (entry.description, entry.filetype)
2384 };
2385 self.close_special_resource_if_needed(&description, filetype);
2386 Ok(())
2387 }
2388
2389 pub fn fd_fcntl(
2390 &mut self,
2391 requester_driver: &str,
2392 pid: u32,
2393 fd: u32,
2394 command: u32,
2395 arg: u32,
2396 ) -> KernelResult<u32> {
2397 self.assert_driver_owns(requester_driver, pid)?;
2398 if command == F_DUPFD {
2399 {
2400 let tables = lock_or_recover(&self.fd_tables);
2401 let table = tables
2402 .get(pid)
2403 .ok_or_else(|| KernelError::no_such_process(pid))?;
2404 table
2405 .get(fd)
2406 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?;
2407 if arg as usize >= table.max_fds() {
2408 return Err(KernelError::new(
2409 "EINVAL",
2410 format!("fd {arg} exceeds process fd limit"),
2411 ));
2412 }
2413 }
2414 self.resources
2415 .check_fd_allocation(&self.resource_snapshot(), 1)?;
2416 }
2417 let mut tables = lock_or_recover(&self.fd_tables);
2418 let table = tables
2419 .get_mut(pid)
2420 .ok_or_else(|| KernelError::no_such_process(pid))?;
2421 let result = table.fcntl(fd, command, arg)?;
2422 if command == F_DUPFD {
2423 self.poll_notifier.notify();
2424 }
2425 Ok(result)
2426 }
2427
2428 pub fn fd_flock(
2429 &self,
2430 requester_driver: &str,
2431 pid: u32,
2432 fd: u32,
2433 operation: u32,
2434 ) -> KernelResult<()> {
2435 self.assert_driver_owns(requester_driver, pid)?;
2436 let entry = {
2437 let tables = lock_or_recover(&self.fd_tables);
2438 tables
2439 .get(pid)
2440 .and_then(|table| table.get(fd))
2441 .cloned()
2442 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2443 };
2444
2445 if entry.filetype != FILETYPE_REGULAR_FILE {
2446 return Err(KernelError::new(
2447 "EBADF",
2448 format!("file descriptor {fd} does not support advisory locking"),
2449 ));
2450 }
2451
2452 let target = entry.description.lock_target().ok_or_else(|| {
2453 KernelError::new(
2454 "EBADF",
2455 format!("file descriptor {fd} is missing advisory lock metadata"),
2456 )
2457 })?;
2458 let operation = FlockOperation::from_bits(operation)?;
2459 self.file_locks
2460 .apply(entry.description.id(), target, operation)?;
2461 Ok(())
2462 }
2463
2464 pub fn fd_stat(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<FdStat> {
2465 self.assert_driver_owns(requester_driver, pid)?;
2466 let tables = lock_or_recover(&self.fd_tables);
2467 Ok(tables
2468 .get(pid)
2469 .ok_or_else(|| KernelError::no_such_process(pid))?
2470 .stat(fd)?)
2471 }
2472
2473 pub fn fd_path(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<String> {
2474 let description = self.description_for_fd(requester_driver, pid, fd)?;
2475 Ok(description.path().to_owned())
2476 }
2477
2478 pub fn isatty(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<bool> {
2479 self.assert_driver_owns(requester_driver, pid)?;
2480 let entry = {
2481 let tables = lock_or_recover(&self.fd_tables);
2482 tables
2483 .get(pid)
2484 .and_then(|table| table.get(fd))
2485 .cloned()
2486 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2487 };
2488 Ok(self.ptys.is_slave(entry.description.id()))
2489 }
2490
2491 pub fn pty_set_discipline(
2492 &self,
2493 requester_driver: &str,
2494 pid: u32,
2495 fd: u32,
2496 config: LineDisciplineConfig,
2497 ) -> KernelResult<()> {
2498 let description = self.description_for_fd(requester_driver, pid, fd)?;
2499 self.ptys.set_discipline(description.id(), config)?;
2500 Ok(())
2501 }
2502
2503 pub fn pty_set_foreground_pgid(
2504 &self,
2505 requester_driver: &str,
2506 pid: u32,
2507 fd: u32,
2508 pgid: u32,
2509 ) -> KernelResult<()> {
2510 let description = self.description_for_fd(requester_driver, pid, fd)?;
2511 let requester_sid = self.processes.getsid(pid)?;
2512 let group = self
2513 .processes
2514 .list_processes()
2515 .into_values()
2516 .find(|process| process.pgid == pgid && process.status != ProcessStatus::Exited)
2517 .ok_or_else(|| KernelError::new("ESRCH", format!("no such process group {pgid}")))?;
2518 if group.sid != requester_sid {
2519 return Err(KernelError::permission_denied(
2520 "cannot set foreground process group in different session",
2521 ));
2522 }
2523 self.ptys.set_foreground_pgid(description.id(), pgid)?;
2524 Ok(())
2525 }
2526
2527 pub fn tcgetattr(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<Termios> {
2528 let description = self.description_for_fd(requester_driver, pid, fd)?;
2529 Ok(self.ptys.get_termios(description.id())?)
2530 }
2531
2532 pub fn tcsetattr(
2533 &self,
2534 requester_driver: &str,
2535 pid: u32,
2536 fd: u32,
2537 termios: PartialTermios,
2538 ) -> KernelResult<()> {
2539 let description = self.description_for_fd(requester_driver, pid, fd)?;
2540 self.ptys.set_termios(description.id(), termios)?;
2541 Ok(())
2542 }
2543
2544 pub fn tcgetpgrp(&self, requester_driver: &str, pid: u32, fd: u32) -> KernelResult<u32> {
2545 let description = self.description_for_fd(requester_driver, pid, fd)?;
2546 Ok(self.ptys.get_foreground_pgid(description.id())?)
2547 }
2548
2549 pub fn pty_resize(
2550 &self,
2551 requester_driver: &str,
2552 pid: u32,
2553 fd: u32,
2554 cols: u16,
2555 rows: u16,
2556 ) -> KernelResult<()> {
2557 let description = self.description_for_fd(requester_driver, pid, fd)?;
2558 let target_pgid = self.ptys.resize(description.id(), cols, rows)?;
2559 if let Some(pgid) = target_pgid {
2560 match self.processes.kill(-(pgid as i32), SIGWINCH) {
2561 Ok(()) => {}
2562 Err(error) if error.code() == "ESRCH" => {}
2563 Err(error) => return Err(error.into()),
2564 }
2565 }
2566 Ok(())
2567 }
2568
2569 pub fn signal_process(
2570 &self,
2571 requester_driver: &str,
2572 pid: i32,
2573 signal: i32,
2574 ) -> KernelResult<()> {
2575 if pid < 0 {
2576 let pgid = pid.unsigned_abs();
2577 let members = self
2578 .processes
2579 .list_processes()
2580 .into_values()
2581 .filter(|process| process.pgid == pgid && process.status != ProcessStatus::Exited)
2582 .collect::<Vec<_>>();
2583 if members.is_empty() {
2584 self.processes.kill(pid, signal)?;
2585 return Ok(());
2586 }
2587 if let Some(process) = members
2588 .iter()
2589 .find(|process| process.driver != requester_driver)
2590 {
2591 return Err(KernelError::permission_denied(format!(
2592 "driver \"{requester_driver}\" does not own process group {pgid} containing PID {}",
2593 process.pid
2594 )));
2595 }
2596 self.processes.kill(pid, signal)?;
2597 return Ok(());
2598 }
2599
2600 let pid = u32::try_from(pid)
2601 .map_err(|_| KernelError::new("EINVAL", format!("invalid pid {pid}")))?;
2602 self.assert_driver_owns(requester_driver, pid)?;
2603 self.processes.kill(pid as i32, signal)?;
2604 Ok(())
2605 }
2606
2607 pub fn kill_process(&self, requester_driver: &str, pid: u32, signal: i32) -> KernelResult<()> {
2608 let pid = i32::try_from(pid)
2609 .map_err(|_| KernelError::new("EINVAL", format!("pid {pid} exceeds i32::MAX")))?;
2610 self.signal_process(requester_driver, pid, signal)
2611 }
2612
2613 pub fn setpgid(&self, requester_driver: &str, pid: u32, pgid: u32) -> KernelResult<()> {
2614 self.assert_driver_owns(requester_driver, pid)?;
2615 let target_pgid = if pgid == 0 { pid } else { pgid };
2616 if target_pgid != pid {
2617 if let Some(group_owner) =
2618 self.processes
2619 .list_processes()
2620 .into_values()
2621 .find(|process| {
2622 process.pgid == target_pgid && process.status == ProcessStatus::Running
2623 })
2624 {
2625 if group_owner.driver != requester_driver {
2626 return Err(KernelError::permission_denied(format!(
2627 "driver \"{requester_driver}\" cannot join process group {target_pgid} owned by \"{}\"",
2628 group_owner.driver
2629 )));
2630 }
2631 }
2632 }
2633 self.processes.setpgid(pid, pgid)?;
2634 Ok(())
2635 }
2636
2637 pub fn getpgid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2638 self.assert_driver_owns(requester_driver, pid)?;
2639 Ok(self.processes.getpgid(pid)?)
2640 }
2641
2642 pub fn getpid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2643 self.assert_driver_owns(requester_driver, pid)?;
2644 Ok(pid)
2645 }
2646
2647 pub fn sigprocmask(
2648 &self,
2649 requester_driver: &str,
2650 pid: u32,
2651 how: SigmaskHow,
2652 set: SignalSet,
2653 ) -> KernelResult<SignalSet> {
2654 self.assert_driver_owns(requester_driver, pid)?;
2655 Ok(self.processes.sigprocmask(pid, how, set)?)
2656 }
2657
2658 pub fn sigpending(&self, requester_driver: &str, pid: u32) -> KernelResult<SignalSet> {
2659 self.assert_driver_owns(requester_driver, pid)?;
2660 Ok(self.processes.sigpending(pid)?)
2661 }
2662
2663 pub fn getppid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2664 self.assert_driver_owns(requester_driver, pid)?;
2665 Ok(self.processes.getppid(pid)?)
2666 }
2667
2668 pub fn setsid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2669 self.assert_driver_owns(requester_driver, pid)?;
2670 Ok(self.processes.setsid(pid)?)
2671 }
2672
2673 pub fn getsid(&self, requester_driver: &str, pid: u32) -> KernelResult<u32> {
2674 self.assert_driver_owns(requester_driver, pid)?;
2675 Ok(self.processes.getsid(pid)?)
2676 }
2677
2678 pub fn dev_fd_read_dir(&self, requester_driver: &str, pid: u32) -> KernelResult<Vec<String>> {
2679 self.assert_driver_owns(requester_driver, pid)?;
2680 let tables = lock_or_recover(&self.fd_tables);
2681 let table = tables
2682 .get(pid)
2683 .ok_or_else(|| KernelError::no_such_process(pid))?;
2684 let entry_count = table.len();
2685 self.resources.check_readdir_entries(entry_count)?;
2686 Ok(table.iter().map(|entry| entry.fd.to_string()).collect())
2687 }
2688
2689 pub fn dev_fd_stat(
2690 &mut self,
2691 requester_driver: &str,
2692 pid: u32,
2693 fd: u32,
2694 ) -> KernelResult<VirtualStat> {
2695 self.assert_driver_owns(requester_driver, pid)?;
2696 let entry = {
2697 let tables = lock_or_recover(&self.fd_tables);
2698 tables
2699 .get(pid)
2700 .and_then(|table| table.get(fd))
2701 .cloned()
2702 .ok_or_else(|| KernelError::bad_file_descriptor(fd))?
2703 };
2704
2705 if self.pipes.is_pipe(entry.description.id()) || self.ptys.is_pty(entry.description.id()) {
2706 return Ok(synthetic_character_device_stat(entry.description.id()));
2707 }
2708
2709 if is_proc_path(entry.description.path()) {
2710 return self.proc_stat_from_open_path(Some(pid), entry.description.path());
2711 }
2712
2713 Ok(self.filesystem.stat(entry.description.path())?)
2714 }
2715
2716 pub fn dispose(&mut self) -> KernelResult<()> {
2717 if self.terminated {
2718 return Ok(());
2719 }
2720
2721 dispose_kernel_vm_resources(self);
2722 Ok(())
2723 }
2724
2725 fn prepare_fd_open(
2726 &mut self,
2727 path: &str,
2728 flags: u32,
2729 mode: Option<u32>,
2730 ) -> KernelResult<(u8, Option<FileLockTarget>)> {
2731 if open_requires_write_access(flags) {
2732 self.reject_read_only_resolved_write_path(path)?;
2733 }
2734
2735 if flags & O_CREAT != 0 && flags & O_EXCL != 0 {
2736 self.check_write_file_limits(path, 0)?;
2737 VirtualFileSystem::create_file_exclusive_with_mode(
2738 &mut self.filesystem,
2739 path,
2740 Vec::new(),
2741 mode,
2742 )?;
2743 let stat = VirtualFileSystem::stat(&mut self.filesystem, path)?;
2744 return Ok((
2745 filetype_for_path(path, &stat),
2746 Some(FileLockTarget::new(stat.ino)),
2747 ));
2748 }
2749
2750 let exists = self.filesystem.exists(path)?;
2751 if exists {
2752 if flags & O_TRUNC != 0 {
2753 self.check_truncate_limits(path, 0)?;
2754 VirtualFileSystem::truncate(&mut self.filesystem, path, 0)?;
2755 }
2756 } else if flags & O_CREAT != 0 {
2757 self.check_write_file_limits(path, 0)?;
2758 VirtualFileSystem::write_file_with_mode(&mut self.filesystem, path, Vec::new(), mode)?;
2759 } else {
2760 let _ = VirtualFileSystem::stat(&mut self.filesystem, path)?;
2761 unreachable!("stat should return an error when opening a missing path");
2762 }
2763
2764 let stat = VirtualFileSystem::stat(&mut self.filesystem, path)?;
2765 Ok((
2766 filetype_for_path(path, &stat),
2767 Some(FileLockTarget::new(stat.ino)),
2768 ))
2769 }
2770
2771 fn reject_read_only_write_path(&mut self, path: &str) -> KernelResult<()> {
2772 if is_proc_path(path) {
2773 self.filesystem
2774 .check_virtual_path(FsOperation::Write, path)
2775 .map_err(KernelError::from)?;
2776 return Err(read_only_filesystem_error(path));
2777 }
2778
2779 if is_agentos_path(path) {
2780 return Err(read_only_filesystem_error(path));
2781 }
2782
2783 Ok(())
2784 }
2785
2786 fn reject_read_only_resolved_write_path(&mut self, path: &str) -> KernelResult<()> {
2787 self.reject_read_only_write_path(path)?;
2788
2789 if let Some(resolved) = self.resolve_write_guard_path(path, true)? {
2790 if is_agentos_path(&resolved) {
2791 return Err(read_only_filesystem_error(&resolved));
2792 }
2793 if self.has_agentos_hardlink_alias(&resolved)? {
2794 return Err(read_only_filesystem_error(&resolved));
2795 }
2796 }
2797 if self.has_agentos_hardlink_alias(path)? {
2798 return Err(read_only_filesystem_error(path));
2799 }
2800
2801 Ok(())
2802 }
2803
2804 fn reject_read_only_entry_write_path(&mut self, path: &str) -> KernelResult<()> {
2805 self.reject_read_only_write_path(path)?;
2806
2807 if let Some(resolved) = self.resolve_write_guard_path(path, false)? {
2808 if is_agentos_path(&resolved) {
2809 return Err(read_only_filesystem_error(&resolved));
2810 }
2811 if self.has_agentos_hardlink_alias(&resolved)? {
2812 return Err(read_only_filesystem_error(&resolved));
2813 }
2814 }
2815 if self.has_agentos_hardlink_alias(path)? {
2816 return Err(read_only_filesystem_error(path));
2817 }
2818
2819 Ok(())
2820 }
2821
2822 fn has_agentos_hardlink_alias(&mut self, path: &str) -> KernelResult<bool> {
2823 let Some(target) = self.storage_lstat(path)? else {
2824 return Ok(false);
2825 };
2826 if target.is_directory || target.is_symbolic_link {
2827 return Ok(false);
2828 }
2829
2830 self.agentos_subtree_contains_inode("/etc/agentos", target.dev, target.ino)
2831 }
2832
2833 fn agentos_subtree_contains_inode(
2834 &mut self,
2835 path: &str,
2836 target_dev: u64,
2837 target_ino: u64,
2838 ) -> KernelResult<bool> {
2839 let Some(stat) = self.storage_lstat(path)? else {
2840 return Ok(false);
2841 };
2842 if !stat.is_directory && !stat.is_symbolic_link {
2843 return Ok(stat.dev == target_dev && stat.ino == target_ino);
2844 }
2845 if !stat.is_directory {
2846 return Ok(false);
2847 }
2848
2849 let children = self.raw_filesystem_mut().read_dir_with_types(path)?;
2850 for child in children {
2851 if child.name == "." || child.name == ".." {
2852 continue;
2853 }
2854 let child_path = join_absolute_path(path, &child.name);
2855 if self.agentos_subtree_contains_inode(&child_path, target_dev, target_ino)? {
2856 return Ok(true);
2857 }
2858 }
2859
2860 Ok(false)
2861 }
2862
2863 fn resolve_write_guard_path(
2864 &mut self,
2865 path: &str,
2866 follow_final_symlink: bool,
2867 ) -> KernelResult<Option<String>> {
2868 let normalized = normalize_path(path);
2869 if normalized == "/" {
2870 return Ok(Some(normalized));
2871 }
2872
2873 if follow_final_symlink {
2874 if let Ok(resolved) = self.filesystem.realpath(&normalized) {
2875 return Ok(Some(resolved));
2876 }
2877 }
2878
2879 let components: Vec<&str> = normalized
2880 .split('/')
2881 .filter(|component| !component.is_empty())
2882 .collect();
2883 let mut resolved_prefix = String::from("/");
2884 let mut raw_prefix = String::from("/");
2885
2886 for (index, component) in components.iter().enumerate() {
2887 let is_final = index + 1 == components.len();
2888 if is_final && !follow_final_symlink {
2889 return Ok(Some(join_absolute_path(&resolved_prefix, component)));
2890 }
2891
2892 raw_prefix = join_absolute_path(&raw_prefix, component);
2893 match self.filesystem.realpath(&raw_prefix) {
2894 Ok(resolved) => {
2895 resolved_prefix = resolved;
2896 }
2897 Err(error) if error.code() == "ENOENT" => {
2898 let mut resolved = resolved_prefix;
2899 for remaining in &components[index..] {
2900 resolved = join_absolute_path(&resolved, remaining);
2901 }
2902 return Ok(Some(resolved));
2903 }
2904 Err(error) => return Err(error.into()),
2905 }
2906 }
2907
2908 Ok(Some(resolved_prefix))
2909 }
2910
2911 fn populate_poll_target_revents(
2912 &self,
2913 pid: u32,
2914 targets: &mut [PollTargetEntry],
2915 ) -> KernelResult<usize> {
2916 let mut ready_count = 0;
2917 for target in targets.iter_mut() {
2918 target.revents = self.poll_target_entry(pid, target.target, target.events)?;
2919 if !target.revents.is_empty() {
2920 ready_count += 1;
2921 }
2922 }
2923
2924 Ok(ready_count)
2925 }
2926
2927 fn poll_target_entry(
2928 &self,
2929 pid: u32,
2930 target: PollTarget,
2931 requested: PollEvents,
2932 ) -> KernelResult<PollEvents> {
2933 match target {
2934 PollTarget::Fd(fd) => {
2935 let entry = {
2936 let tables = lock_or_recover(&self.fd_tables);
2937 tables
2938 .get(pid)
2939 .ok_or_else(|| KernelError::no_such_process(pid))?
2940 .get(fd)
2941 .cloned()
2942 };
2943 if let Some(entry) = entry {
2944 self.poll_entry(&entry, requested)
2945 } else {
2946 Ok(POLLNVAL)
2947 }
2948 }
2949 PollTarget::Socket(socket_id) => {
2950 let socket = self.sockets.get(socket_id);
2951 if let Some(socket) = socket {
2952 if socket.owner_pid() != pid {
2953 return Err(KernelError::permission_denied(format!(
2954 "process {pid} does not own socket {socket_id}"
2955 )));
2956 }
2957 let mut events = self.sockets.poll(socket_id, requested)?;
2958 if events.intersects(POLLOUT)
2959 && !self.socket_pollout_has_resource_capacity(&socket)
2960 {
2961 events = PollEvents::from_bits(events.bits() & !POLLOUT.bits());
2962 }
2963 Ok(events)
2964 } else {
2965 Ok(POLLNVAL)
2966 }
2967 }
2968 }
2969 }
2970
2971 fn socket_pollout_has_resource_capacity(&self, socket: &SocketRecord) -> bool {
2972 let snapshot = self.resource_snapshot();
2973 if self
2974 .resources
2975 .limits()
2976 .max_socket_buffered_bytes
2977 .is_some_and(|limit| snapshot.socket_buffered_bytes >= limit)
2978 {
2979 return false;
2980 }
2981
2982 if socket.spec().socket_type == SocketType::Datagram
2983 && self
2984 .resources
2985 .limits()
2986 .max_socket_datagram_queue_len
2987 .is_some_and(|limit| snapshot.socket_datagram_queue_len >= limit)
2988 {
2989 return false;
2990 }
2991
2992 true
2993 }
2994
2995 fn poll_entry(
2996 &self,
2997 entry: &crate::fd_table::FdEntry,
2998 requested: PollEvents,
2999 ) -> KernelResult<PollEvents> {
3000 if self.pipes.is_pipe(entry.description.id()) {
3001 return Ok(self.pipes.poll(entry.description.id(), requested)?);
3002 }
3003
3004 if self.ptys.is_pty(entry.description.id()) {
3005 return Ok(self.ptys.poll(entry.description.id(), requested)?);
3006 }
3007
3008 let access_mode = entry.description.flags() & 0b11;
3009 let mut events = PollEvents::empty();
3010 if requested.intersects(POLLIN) && access_mode != crate::fd_table::O_WRONLY {
3011 events |= POLLIN;
3012 }
3013 if requested.intersects(POLLOUT) && access_mode != crate::fd_table::O_RDONLY {
3014 events |= POLLOUT;
3015 }
3016 if entry.filetype == FILETYPE_DIRECTORY && requested.intersects(POLLOUT) {
3017 events |= POLLERR;
3018 }
3019 if self.terminated {
3020 events |= POLLHUP;
3021 }
3022 Ok(events)
3023 }
3024
3025 fn description_for_fd(
3026 &self,
3027 requester_driver: &str,
3028 pid: u32,
3029 fd: u32,
3030 ) -> KernelResult<Arc<FileDescription>> {
3031 self.assert_driver_owns(requester_driver, pid)?;
3032 lock_or_recover(&self.fd_tables)
3033 .get(pid)
3034 .and_then(|table| table.get(fd))
3035 .map(|entry| Arc::clone(&entry.description))
3036 .ok_or_else(|| KernelError::bad_file_descriptor(fd))
3037 }
3038
3039 fn assert_not_terminated(&self) -> KernelResult<()> {
3040 if self.terminated {
3041 Err(KernelError::disposed())
3042 } else {
3043 Ok(())
3044 }
3045 }
3046
3047 fn assert_driver_owns(&self, requester_driver: &str, pid: u32) -> KernelResult<()> {
3048 let driver_pids = lock_or_recover(&self.driver_pids);
3049 if driver_pids
3050 .get(requester_driver)
3051 .map(|pids| pids.contains(&pid))
3052 .unwrap_or(false)
3053 {
3054 return Ok(());
3055 }
3056
3057 if driver_pids.values().any(|pids| pids.contains(&pid)) {
3058 return Err(KernelError::permission_denied(format!(
3059 "driver \"{requester_driver}\" does not own PID {pid}"
3060 )));
3061 }
3062
3063 Err(KernelError::no_such_process(pid))
3064 }
3065
3066 fn cleanup_process_resources(&self, pid: u32) {
3067 cleanup_process_resources(
3068 self.fd_tables.as_ref(),
3069 &self.file_locks,
3070 &self.pipes,
3071 &self.ptys,
3072 &self.sockets,
3073 self.driver_pids.as_ref(),
3074 pid,
3075 );
3076 }
3077
3078 fn resolve_spawn_command(
3079 &mut self,
3080 command: &str,
3081 args: &[String],
3082 cwd: &str,
3083 ) -> KernelResult<ResolvedSpawnCommand> {
3084 if let Some(driver) = self.commands.resolve(command).cloned() {
3085 return Ok(ResolvedSpawnCommand {
3086 command: command.to_owned(),
3087 args: args.to_vec(),
3088 driver,
3089 });
3090 }
3091
3092 let Some(path) = self.resolve_executable_path(command, cwd)? else {
3093 return Err(KernelError::command_not_found(command));
3094 };
3095
3096 if let Some(registered_command) = self.resolve_registered_command_path(&path) {
3097 let driver = self
3098 .commands
3099 .resolve(®istered_command)
3100 .cloned()
3101 .ok_or_else(|| KernelError::command_not_found(®istered_command))?;
3102 return Ok(ResolvedSpawnCommand {
3103 command: registered_command,
3104 args: args.to_vec(),
3105 driver,
3106 });
3107 }
3108
3109 let shebang = self
3110 .parse_shebang_command(&path)?
3111 .ok_or_else(|| KernelError::new("ENOEXEC", format!("exec format error: {path}")))?;
3112 self.resolve_shebang_command(&path, args, shebang)
3113 }
3114
3115 fn resolve_executable_path(
3116 &mut self,
3117 command: &str,
3118 cwd: &str,
3119 ) -> KernelResult<Option<String>> {
3120 if !command.contains('/') {
3121 return Ok(None);
3122 }
3123
3124 let path = if command.starts_with('/') {
3125 normalize_path(command)
3126 } else {
3127 normalize_path(&format!("{cwd}/{command}"))
3128 };
3129 let stat = self.filesystem.stat(&path)?;
3130 if stat.is_directory {
3131 return Err(KernelError::new(
3132 "EACCES",
3133 format!("permission denied, execute '{path}'"),
3134 ));
3135 }
3136 if stat.mode & EXECUTABLE_PERMISSION_BITS == 0 {
3137 return Err(KernelError::new(
3138 "EACCES",
3139 format!("permission denied, execute '{path}'"),
3140 ));
3141 }
3142 Ok(Some(path))
3143 }
3144
3145 fn resolve_registered_command_path(&self, path: &str) -> Option<String> {
3146 let normalized = normalize_path(path);
3147 for prefix in ["/bin/", "/usr/bin/", "/usr/local/bin/"] {
3148 let Some(name) = normalized.strip_prefix(prefix) else {
3149 continue;
3150 };
3151 if !name.is_empty() && !name.contains('/') && self.commands.resolve(name).is_some() {
3152 return Some(name.to_owned());
3153 }
3154 }
3155
3156 if let Some(name) = normalized
3157 .strip_prefix("/__secure_exec/commands/")
3158 .and_then(|suffix| suffix.rsplit('/').next())
3159 {
3160 if !name.is_empty() && !name.contains('/') && self.commands.resolve(name).is_some() {
3161 return Some(name.to_owned());
3162 }
3163 }
3164
3165 None
3166 }
3167
3168 fn parse_shebang_command(&mut self, path: &str) -> KernelResult<Option<ShebangCommand>> {
3169 let header = self.filesystem.pread(path, 0, SHEBANG_LINE_MAX_BYTES + 1)?;
3170 if !header.starts_with(b"#!") {
3171 return Ok(None);
3172 }
3173
3174 let line_end = match header.iter().position(|byte| *byte == b'\n') {
3175 Some(index) => index,
3176 None if header.len() <= SHEBANG_LINE_MAX_BYTES => header.len(),
3177 None => {
3178 return Err(KernelError::new(
3179 "ENOEXEC",
3180 format!("shebang line exceeds {SHEBANG_LINE_MAX_BYTES} bytes: {path}"),
3181 ))
3182 }
3183 };
3184 let line = header[2..line_end]
3185 .strip_suffix(b"\r")
3186 .unwrap_or(&header[2..line_end]);
3187 let text = std::str::from_utf8(line)
3188 .map_err(|_| KernelError::new("ENOEXEC", format!("invalid shebang line: {path}")))?;
3189 let mut parts = text.split_ascii_whitespace();
3190 let interpreter = parts
3191 .next()
3192 .ok_or_else(|| KernelError::new("ENOEXEC", format!("invalid shebang line: {path}")))?;
3193 Ok(Some(ShebangCommand {
3194 interpreter: interpreter.to_owned(),
3195 args: parts.map(ToOwned::to_owned).collect(),
3196 }))
3197 }
3198
3199 fn resolve_shebang_command(
3200 &self,
3201 path: &str,
3202 args: &[String],
3203 shebang: ShebangCommand,
3204 ) -> KernelResult<ResolvedSpawnCommand> {
3205 let mut interpreter_args = shebang.args;
3206 let interpreter = normalize_path(&shebang.interpreter);
3207 let command = if interpreter == "/usr/bin/env" || interpreter == "/bin/env" {
3208 if interpreter_args.is_empty() {
3209 return Err(KernelError::new(
3210 "ENOENT",
3211 format!("missing interpreter after /usr/bin/env in shebang: {path}"),
3212 ));
3213 }
3214 interpreter_args.remove(0)
3215 } else if let Some(command) = self.resolve_registered_command_path(&interpreter) {
3216 command
3217 } else if self.commands.resolve(&shebang.interpreter).is_some() {
3218 shebang.interpreter
3219 } else {
3220 return Err(KernelError::command_not_found(&shebang.interpreter));
3221 };
3222
3223 let driver = self
3224 .commands
3225 .resolve(&command)
3226 .cloned()
3227 .ok_or_else(|| KernelError::command_not_found(&command))?;
3228 let mut resolved_args = interpreter_args;
3229 resolved_args.push(path.to_owned());
3230 resolved_args.extend(args.iter().cloned());
3231 Ok(ResolvedSpawnCommand {
3232 command,
3233 args: resolved_args,
3234 driver,
3235 })
3236 }
3237
3238 fn finish_waitpid_event(&mut self, result: ProcessWaitResult) -> WaitPidEventResult {
3239 if result.event == WaitPidEvent::Exited {
3240 self.cleanup_process_resources(result.pid);
3241 }
3242 WaitPidEventResult {
3243 pid: result.pid,
3244 status: result.status,
3245 event: result.event,
3246 }
3247 }
3248
3249 fn raw_filesystem_mut(&mut self) -> &mut F {
3250 self.filesystem.inner_mut().inner_mut()
3251 }
3252
3253 fn read_file_internal(
3254 &mut self,
3255 current_pid: Option<u32>,
3256 path: &str,
3257 ) -> KernelResult<Vec<u8>> {
3258 if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3259 self.filesystem
3260 .check_virtual_path(FsOperation::Read, path)
3261 .map_err(KernelError::from)?;
3262 return self.proc_read_file(current_pid, &proc_node);
3263 }
3264
3265 Ok(self.filesystem.read_file(path)?)
3266 }
3267
3268 fn exists_internal(&self, current_pid: Option<u32>, path: &str) -> KernelResult<bool> {
3269 match self.resolve_proc_node(path, current_pid) {
3270 Ok(Some(_)) => {
3271 self.filesystem
3272 .check_virtual_path(FsOperation::Read, path)
3273 .map_err(KernelError::from)?;
3274 Ok(true)
3275 }
3276 Ok(None) => Ok(self.filesystem.exists(path)?),
3277 Err(error) if error.code() == "ENOENT" => Ok(false),
3278 Err(error) => Err(error),
3279 }
3280 }
3281
3282 fn stat_internal(&mut self, current_pid: Option<u32>, path: &str) -> KernelResult<VirtualStat> {
3283 if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3284 self.filesystem
3285 .check_virtual_path(FsOperation::Read, path)
3286 .map_err(KernelError::from)?;
3287 return self.proc_stat(current_pid, &proc_node);
3288 }
3289
3290 Ok(self.filesystem.stat(path)?)
3291 }
3292
3293 fn lstat_internal(&self, current_pid: Option<u32>, path: &str) -> KernelResult<VirtualStat> {
3294 if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3295 self.filesystem
3296 .check_virtual_path(FsOperation::Read, path)
3297 .map_err(KernelError::from)?;
3298 return self.proc_lstat(&proc_node);
3299 }
3300
3301 Ok(self.filesystem.lstat(path)?)
3302 }
3303
3304 fn read_link_internal(&self, current_pid: Option<u32>, path: &str) -> KernelResult<String> {
3305 if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3306 self.filesystem
3307 .check_virtual_path(FsOperation::Read, path)
3308 .map_err(KernelError::from)?;
3309 return self.proc_read_link(&proc_node);
3310 }
3311
3312 Ok(self.filesystem.read_link(path)?)
3313 }
3314
3315 fn read_dir_internal(
3316 &mut self,
3317 current_pid: Option<u32>,
3318 path: &str,
3319 ) -> KernelResult<Vec<String>> {
3320 if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3321 self.filesystem
3322 .check_virtual_path(FsOperation::Read, path)
3323 .map_err(KernelError::from)?;
3324 return self.proc_read_dir(current_pid, &proc_node);
3325 }
3326
3327 if let Some(limit) = self.resources.max_readdir_entries() {
3328 Ok(self.filesystem.read_dir_limited(path, limit)?)
3329 } else {
3330 Ok(self.filesystem.read_dir(path)?)
3331 }
3332 }
3333
3334 fn realpath_internal(&self, current_pid: Option<u32>, path: &str) -> KernelResult<String> {
3335 if let Some(proc_node) = self.resolve_proc_node(path, current_pid)? {
3336 self.filesystem
3337 .check_virtual_path(FsOperation::Read, path)
3338 .map_err(KernelError::from)?;
3339 return self.proc_realpath(current_pid, &proc_node);
3340 }
3341
3342 Ok(self.filesystem.realpath(path)?)
3343 }
3344
3345 fn resolve_proc_node(
3346 &self,
3347 path: &str,
3348 current_pid: Option<u32>,
3349 ) -> KernelResult<Option<ProcNode>> {
3350 let normalized = normalize_path(path);
3351 if !is_proc_path(&normalized) {
3352 return Ok(None);
3353 }
3354
3355 if normalized == "/proc" {
3356 return Ok(Some(ProcNode::RootDir));
3357 }
3358
3359 let suffix = normalized
3360 .strip_prefix("/proc/")
3361 .expect("proc path should have /proc prefix");
3362 let parts = suffix.split('/').collect::<Vec<_>>();
3363 if parts.is_empty() {
3364 return Ok(Some(ProcNode::RootDir));
3365 }
3366
3367 let root_node = match parts.as_slice() {
3368 ["mounts"] => Some(ProcNode::MountsFile),
3369 ["cpuinfo"] => Some(ProcNode::CpuInfoFile),
3370 ["meminfo"] => Some(ProcNode::MemInfoFile),
3371 ["loadavg"] => Some(ProcNode::LoadAvgFile),
3372 ["uptime"] => Some(ProcNode::UptimeFile),
3373 ["version"] => Some(ProcNode::VersionFile),
3374 _ => None,
3375 };
3376 if let Some(node) = root_node {
3377 return Ok(Some(node));
3378 }
3379
3380 let pid = match parts[0] {
3381 "self" => current_pid.ok_or_else(|| proc_not_found_error(&normalized))?,
3382 raw => raw
3383 .parse::<u32>()
3384 .map_err(|_| proc_not_found_error(&normalized))?,
3385 };
3386 self.proc_entry(pid)?;
3387
3388 let node = match parts.as_slice() {
3389 ["self"] => ProcNode::SelfLink { pid },
3390 [_pid] => ProcNode::PidDir { pid },
3391 [_pid, "fd"] => ProcNode::PidFdDir { pid },
3392 [_pid, "cmdline"] => ProcNode::PidCmdline { pid },
3393 [_pid, "environ"] => ProcNode::PidEnviron { pid },
3394 [_pid, "cwd"] => ProcNode::PidCwdLink { pid },
3395 [_pid, "stat"] => ProcNode::PidStatFile { pid },
3396 [_pid, "status"] => ProcNode::PidStatusFile { pid },
3397 [_pid, "fd", fd] => {
3398 let fd = fd
3399 .parse::<u32>()
3400 .map_err(|_| proc_not_found_error(&normalized))?;
3401 self.proc_fd_entry(pid, fd)?;
3402 ProcNode::PidFdLink { pid, fd }
3403 }
3404 _ => return Err(proc_not_found_error(&normalized)),
3405 };
3406
3407 Ok(Some(node))
3408 }
3409
3410 fn proc_entry(&self, pid: u32) -> KernelResult<crate::process_table::ProcessEntry> {
3411 self.processes
3412 .get(pid)
3413 .ok_or_else(|| proc_not_found_error(&format!("/proc/{pid}")))
3414 }
3415
3416 fn proc_fd_entry(&self, pid: u32, fd: u32) -> KernelResult<FdEntry> {
3417 lock_or_recover(&self.fd_tables)
3418 .get(pid)
3419 .and_then(|table| table.get(fd))
3420 .cloned()
3421 .ok_or_else(|| proc_not_found_error(&format!("/proc/{pid}/fd/{fd}")))
3422 }
3423
3424 fn proc_read_file(
3425 &mut self,
3426 current_pid: Option<u32>,
3427 node: &ProcNode,
3428 ) -> KernelResult<Vec<u8>> {
3429 match node {
3430 ProcNode::SelfLink { .. }
3431 | ProcNode::PidCwdLink { .. }
3432 | ProcNode::PidFdLink { .. } => {
3433 let target = self.proc_symlink_target(node)?;
3434 self.read_file_internal(current_pid, &target)
3435 }
3436 ProcNode::MountsFile => Ok(self.proc_mounts_bytes()),
3437 ProcNode::CpuInfoFile => Ok(self.proc_cpuinfo_bytes()),
3438 ProcNode::MemInfoFile => Ok(self.proc_meminfo_bytes()),
3439 ProcNode::LoadAvgFile => Ok(self.proc_loadavg_bytes()),
3440 ProcNode::UptimeFile => Ok(self.proc_uptime_bytes()),
3441 ProcNode::VersionFile => Ok(self.proc_version_bytes()),
3442 ProcNode::PidCmdline { pid } => Ok(self.proc_cmdline_bytes(*pid)),
3443 ProcNode::PidEnviron { pid } => Ok(self.proc_environ_bytes(*pid)),
3444 ProcNode::PidStatFile { pid } => Ok(self.proc_stat_bytes(*pid)),
3445 ProcNode::PidStatusFile { pid } => Ok(self.proc_status_bytes(*pid)),
3446 ProcNode::RootDir | ProcNode::PidDir { .. } | ProcNode::PidFdDir { .. } => {
3447 Err(KernelError::new(
3448 "EISDIR",
3449 format!(
3450 "illegal operation on a directory, read '{}'",
3451 self.proc_canonical_path(node)
3452 ),
3453 ))
3454 }
3455 }
3456 }
3457
3458 fn proc_stat(
3459 &mut self,
3460 current_pid: Option<u32>,
3461 node: &ProcNode,
3462 ) -> KernelResult<VirtualStat> {
3463 match node {
3464 ProcNode::SelfLink { .. }
3465 | ProcNode::PidCwdLink { .. }
3466 | ProcNode::PidFdLink { .. } => {
3467 let target = self.proc_symlink_target(node)?;
3468 self.stat_internal(current_pid, &target)
3469 }
3470 _ => self.proc_lstat(node),
3471 }
3472 }
3473
3474 fn proc_lstat(&self, node: &ProcNode) -> KernelResult<VirtualStat> {
3475 match node {
3476 ProcNode::RootDir | ProcNode::PidDir { .. } | ProcNode::PidFdDir { .. } => {
3477 Ok(proc_dir_stat(proc_inode(node)))
3478 }
3479 ProcNode::MountsFile => Ok(proc_file_stat(
3480 proc_inode(node),
3481 self.proc_mounts_bytes().len() as u64,
3482 )),
3483 ProcNode::CpuInfoFile => Ok(proc_file_stat(
3484 proc_inode(node),
3485 self.proc_cpuinfo_bytes().len() as u64,
3486 )),
3487 ProcNode::MemInfoFile => Ok(proc_file_stat(
3488 proc_inode(node),
3489 self.proc_meminfo_bytes().len() as u64,
3490 )),
3491 ProcNode::LoadAvgFile => Ok(proc_file_stat(
3492 proc_inode(node),
3493 self.proc_loadavg_bytes().len() as u64,
3494 )),
3495 ProcNode::UptimeFile => Ok(proc_file_stat(
3496 proc_inode(node),
3497 self.proc_uptime_bytes().len() as u64,
3498 )),
3499 ProcNode::VersionFile => Ok(proc_file_stat(
3500 proc_inode(node),
3501 self.proc_version_bytes().len() as u64,
3502 )),
3503 ProcNode::PidCmdline { pid } => Ok(proc_file_stat(
3504 proc_inode(node),
3505 self.proc_cmdline_bytes(*pid).len() as u64,
3506 )),
3507 ProcNode::PidEnviron { pid } => Ok(proc_file_stat(
3508 proc_inode(node),
3509 self.proc_environ_bytes(*pid).len() as u64,
3510 )),
3511 ProcNode::PidStatFile { pid } => Ok(proc_file_stat(
3512 proc_inode(node),
3513 self.proc_stat_bytes(*pid).len() as u64,
3514 )),
3515 ProcNode::PidStatusFile { pid } => Ok(proc_file_stat(
3516 proc_inode(node),
3517 self.proc_status_bytes(*pid).len() as u64,
3518 )),
3519 ProcNode::SelfLink { .. }
3520 | ProcNode::PidCwdLink { .. }
3521 | ProcNode::PidFdLink { .. } => Ok(proc_symlink_stat(
3522 proc_inode(node),
3523 self.proc_read_link(node)?.len() as u64,
3524 )),
3525 }
3526 }
3527
3528 fn proc_read_link(&self, node: &ProcNode) -> KernelResult<String> {
3529 match node {
3530 ProcNode::SelfLink { .. }
3531 | ProcNode::PidCwdLink { .. }
3532 | ProcNode::PidFdLink { .. } => self.proc_symlink_target(node),
3533 _ => Err(KernelError::new(
3534 "EINVAL",
3535 format!(
3536 "invalid argument, readlink '{}'",
3537 self.proc_canonical_path(node)
3538 ),
3539 )),
3540 }
3541 }
3542
3543 fn proc_read_dir(
3544 &mut self,
3545 current_pid: Option<u32>,
3546 node: &ProcNode,
3547 ) -> KernelResult<Vec<String>> {
3548 match node {
3549 ProcNode::SelfLink { .. }
3550 | ProcNode::PidCwdLink { .. }
3551 | ProcNode::PidFdLink { .. } => {
3552 let target = self.proc_symlink_target(node)?;
3553 self.read_dir_internal(current_pid, &target)
3554 }
3555 ProcNode::RootDir => {
3556 let mut entries = self
3557 .processes
3558 .list_processes()
3559 .keys()
3560 .map(|pid| pid.to_string())
3561 .collect::<Vec<_>>();
3562 entries.push(String::from("cpuinfo"));
3563 entries.push(String::from("loadavg"));
3564 entries.push(String::from("meminfo"));
3565 entries.push(String::from("mounts"));
3566 entries.push(String::from("self"));
3567 entries.push(String::from("uptime"));
3568 entries.push(String::from("version"));
3569 entries.sort();
3570 Ok(entries)
3571 }
3572 ProcNode::PidDir { .. } => Ok(vec![
3573 String::from("cmdline"),
3574 String::from("cwd"),
3575 String::from("environ"),
3576 String::from("fd"),
3577 String::from("stat"),
3578 String::from("status"),
3579 ]),
3580 ProcNode::PidFdDir { pid } => {
3581 let tables = lock_or_recover(&self.fd_tables);
3582 let table = tables
3583 .get(*pid)
3584 .ok_or_else(|| proc_not_found_error(&format!("/proc/{pid}/fd")))?;
3585 Ok(table.iter().map(|entry| entry.fd.to_string()).collect())
3586 }
3587 _ => Err(KernelError::new(
3588 "ENOTDIR",
3589 format!(
3590 "not a directory, scandir '{}'",
3591 self.proc_canonical_path(node)
3592 ),
3593 )),
3594 }
3595 }
3596
3597 fn proc_realpath(&self, current_pid: Option<u32>, node: &ProcNode) -> KernelResult<String> {
3598 match node {
3599 ProcNode::SelfLink { .. }
3600 | ProcNode::PidCwdLink { .. }
3601 | ProcNode::PidFdLink { .. } => {
3602 let target = self.proc_symlink_target(node)?;
3603 self.realpath_internal(current_pid, &target)
3604 }
3605 _ => Ok(self.proc_canonical_path(node)),
3606 }
3607 }
3608
3609 fn proc_symlink_target(&self, node: &ProcNode) -> KernelResult<String> {
3610 match node {
3611 ProcNode::SelfLink { pid } => Ok(format!("/proc/{pid}")),
3612 ProcNode::PidCwdLink { pid } => Ok(self.proc_entry(*pid)?.cwd),
3613 ProcNode::PidFdLink { pid, fd } => {
3614 Ok(self.proc_fd_entry(*pid, *fd)?.description.path().to_owned())
3615 }
3616 _ => Err(KernelError::new(
3617 "EINVAL",
3618 format!(
3619 "'{}' is not a symbolic link",
3620 self.proc_canonical_path(node)
3621 ),
3622 )),
3623 }
3624 }
3625
3626 fn proc_canonical_path(&self, node: &ProcNode) -> String {
3627 match node {
3628 ProcNode::RootDir => String::from("/proc"),
3629 ProcNode::MountsFile => String::from("/proc/mounts"),
3630 ProcNode::CpuInfoFile => String::from("/proc/cpuinfo"),
3631 ProcNode::MemInfoFile => String::from("/proc/meminfo"),
3632 ProcNode::LoadAvgFile => String::from("/proc/loadavg"),
3633 ProcNode::UptimeFile => String::from("/proc/uptime"),
3634 ProcNode::VersionFile => String::from("/proc/version"),
3635 ProcNode::SelfLink { pid } => format!("/proc/{pid}"),
3636 ProcNode::PidDir { pid } => format!("/proc/{pid}"),
3637 ProcNode::PidFdDir { pid } => format!("/proc/{pid}/fd"),
3638 ProcNode::PidCmdline { pid } => format!("/proc/{pid}/cmdline"),
3639 ProcNode::PidEnviron { pid } => format!("/proc/{pid}/environ"),
3640 ProcNode::PidCwdLink { pid } => format!("/proc/{pid}/cwd"),
3641 ProcNode::PidStatFile { pid } => format!("/proc/{pid}/stat"),
3642 ProcNode::PidStatusFile { pid } => format!("/proc/{pid}/status"),
3643 ProcNode::PidFdLink { pid, fd } => format!("/proc/{pid}/fd/{fd}"),
3644 }
3645 }
3646
3647 fn proc_cmdline_bytes(&self, pid: u32) -> Vec<u8> {
3648 let entry = self
3649 .processes
3650 .get(pid)
3651 .expect("process must exist while procfs path is resolved");
3652 let mut argv = vec![entry.command];
3653 argv.extend(entry.args);
3654 null_separated_bytes(argv)
3655 }
3656
3657 fn proc_environ_bytes(&self, pid: u32) -> Vec<u8> {
3658 let entry = self
3659 .processes
3660 .get(pid)
3661 .expect("process must exist while procfs path is resolved");
3662 null_separated_bytes(
3663 entry
3664 .env
3665 .into_iter()
3666 .map(|(key, value)| format!("{key}={value}"))
3667 .collect(),
3668 )
3669 }
3670
3671 fn proc_stat_bytes(&self, pid: u32) -> Vec<u8> {
3672 let entry = self
3673 .processes
3674 .get(pid)
3675 .expect("process must exist while procfs path is resolved");
3676 let command = entry.command.replace(')', "]");
3677 let state = match entry.status {
3678 ProcessStatus::Running => 'R',
3679 ProcessStatus::Stopped => 'T',
3680 ProcessStatus::Exited => 'Z',
3681 };
3682 format!(
3683 "{pid} ({command}) {state} {ppid} {pgid} {sid} 0 0 0 0 0 0 0 0 0 0 20 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
3684 ppid = entry.ppid,
3685 pgid = entry.pgid,
3686 sid = entry.sid,
3687 )
3688 .into_bytes()
3689 }
3690
3691 fn proc_mounts_bytes(&self) -> Vec<u8> {
3692 let mounts = if let Some(table) =
3693 (self.filesystem.inner().inner() as &dyn Any).downcast_ref::<MountTable>()
3694 {
3695 table.get_mounts()
3696 } else {
3697 vec![MountEntry {
3698 path: String::from("/"),
3699 plugin_id: String::from("root"),
3700 read_only: false,
3701 }]
3702 };
3703
3704 mounts
3705 .into_iter()
3706 .map(|mount| {
3707 let options = if mount.read_only { "ro" } else { "rw" };
3708 format!(
3709 "{source} {target} {fstype} {options} 0 0\n",
3710 source = mount.plugin_id,
3711 target = mount.path,
3712 fstype = mount.plugin_id,
3713 )
3714 })
3715 .collect::<String>()
3716 .into_bytes()
3717 }
3718
3719 fn proc_cpu_count(&self) -> usize {
3720 self.resource_limits().virtual_cpu_count.unwrap_or(1)
3721 }
3722
3723 fn proc_cpuinfo_bytes(&self) -> Vec<u8> {
3724 let mut body = String::new();
3725 for processor in 0..self.proc_cpu_count() {
3726 body.push_str(&format!(
3727 "processor\t: {processor}\nmodel name\t: secure-exec Virtual CPU\ncpu MHz\t\t: 1000.000\nsiblings\t: 1\ncpu cores\t: 1\n\n"
3728 ));
3729 }
3730 body.into_bytes()
3731 }
3732
3733 fn proc_mem_total_bytes(&self) -> u64 {
3734 self.resource_limits()
3735 .max_wasm_memory_bytes
3736 .or(self.resource_limits().max_filesystem_bytes)
3737 .unwrap_or(DEFAULT_MAX_OPEN_FDS as u64 * 1024 * 1024)
3738 }
3739
3740 fn proc_meminfo_bytes(&self) -> Vec<u8> {
3741 let total_kb = self.proc_mem_total_bytes().div_ceil(1024);
3742 let zero_kb = 0;
3743 format!(
3744 "MemTotal:{total_kb:>8} kB\nMemFree:{total_kb:>9} kB\nMemAvailable:{total_kb:>4} kB\nBuffers:{zero_kb:>9} kB\nCached:{zero_kb:>10} kB\n"
3745 )
3746 .into_bytes()
3747 }
3748
3749 fn proc_loadavg_bytes(&self) -> Vec<u8> {
3750 let processes = self.processes.list_processes();
3751 let running = processes
3752 .values()
3753 .filter(|process| process.status == ProcessStatus::Running)
3754 .count();
3755 let total = processes.len().max(1);
3756 let last_pid = processes.keys().next_back().copied().unwrap_or(0);
3757 format!("0.00 0.00 0.00 {running}/{total} {last_pid}\n").into_bytes()
3758 }
3759
3760 fn proc_uptime_bytes(&self) -> Vec<u8> {
3761 let uptime = self.boot_instant.elapsed().as_secs_f64();
3762 format!("{uptime:.2} {uptime:.2}\n").into_bytes()
3763 }
3764
3765 fn proc_version_bytes(&self) -> Vec<u8> {
3766 format!(
3767 "Linux version 6.8.0-agentos (agentos@localhost) #1 SMP boot={}\n",
3768 self.boot_time_ms
3769 )
3770 .into_bytes()
3771 }
3772
3773 fn proc_status_bytes(&self, pid: u32) -> Vec<u8> {
3774 let entry = self
3775 .processes
3776 .get(pid)
3777 .expect("process must exist while procfs path is resolved");
3778 let (state_code, state_name) = match entry.status {
3779 ProcessStatus::Running => ('R', "running"),
3780 ProcessStatus::Stopped => ('T', "stopped"),
3781 ProcessStatus::Exited => ('Z', "zombie"),
3782 };
3783 format!(
3784 "Name:\t{name}\nState:\t{state_code} ({state_name})\nPid:\t{pid}\nPPid:\t{ppid}\nUid:\t{uid}\t{euid}\t{euid}\t{euid}\nGid:\t{gid}\t{egid}\t{egid}\t{egid}\nVmSize:\t{:>8} kB\nVmRSS:\t{:>9} kB\nThreads:\t1\n",
3785 0,
3786 0,
3787 name = entry.command,
3788 ppid = entry.ppid,
3789 uid = entry.identity.uid,
3790 euid = entry.identity.euid,
3791 gid = entry.identity.gid,
3792 egid = entry.identity.egid,
3793 )
3794 .into_bytes()
3795 }
3796
3797 fn proc_read_file_from_open_path(
3798 &mut self,
3799 current_pid: Option<u32>,
3800 path: &str,
3801 ) -> KernelResult<Vec<u8>> {
3802 let node = self
3803 .resolve_proc_node(path, current_pid)?
3804 .ok_or_else(|| proc_not_found_error(path))?;
3805 self.proc_read_file(current_pid, &node)
3806 }
3807
3808 fn proc_stat_from_open_path(
3809 &mut self,
3810 current_pid: Option<u32>,
3811 path: &str,
3812 ) -> KernelResult<VirtualStat> {
3813 let node = self
3814 .resolve_proc_node(path, current_pid)?
3815 .ok_or_else(|| proc_not_found_error(path))?;
3816 self.proc_stat(current_pid, &node)
3817 }
3818
3819 fn filesystem_usage(&mut self) -> KernelResult<FileSystemUsage> {
3820 let filesystem = self.raw_filesystem_mut();
3821 let filesystem_any = filesystem as &mut dyn Any;
3822 if let Some(mount_table) = filesystem_any.downcast_mut::<MountTable>() {
3823 return Ok(mount_table.root_usage()?);
3824 }
3825 Ok(measure_filesystem_usage(filesystem)?)
3826 }
3827
3828 fn storage_stat(&mut self, path: &str) -> KernelResult<Option<VirtualStat>> {
3829 if is_virtual_device_storage_path(path) {
3830 return Ok(None);
3831 }
3832
3833 match self.raw_filesystem_mut().stat(path) {
3834 Ok(stat) => Ok(Some(stat)),
3835 Err(error) if error.code() == "ENOENT" => Ok(None),
3836 Err(error) => Err(error.into()),
3837 }
3838 }
3839
3840 fn storage_lstat(&mut self, path: &str) -> KernelResult<Option<VirtualStat>> {
3841 if is_virtual_device_storage_path(path) {
3842 return Ok(None);
3843 }
3844
3845 match self.raw_filesystem_mut().lstat(path) {
3846 Ok(stat) => Ok(Some(stat)),
3847 Err(error) if error.code() == "ENOENT" => Ok(None),
3848 Err(error) => Err(error.into()),
3849 }
3850 }
3851
3852 fn current_storage_file_size(&mut self, path: &str) -> KernelResult<u64> {
3853 Ok(self
3854 .storage_stat(path)?
3855 .filter(|stat| !stat.is_directory)
3856 .map(|stat| stat.size)
3857 .unwrap_or(0))
3858 }
3859
3860 fn apply_creation_mode(&mut self, path: &str, mode: u32, umask: u32) -> KernelResult<()> {
3861 let masked_mode = (mode & !0o777) | ((mode & 0o777) & !(umask & 0o777));
3862 Ok(self.filesystem.chmod(path, masked_mode)?)
3863 }
3864
3865 fn missing_directory_paths(
3866 &mut self,
3867 path: &str,
3868 recursive: bool,
3869 ) -> KernelResult<Vec<String>> {
3870 let normalized = normalize_path(path);
3871 if normalized == "/" {
3872 return Ok(Vec::new());
3873 }
3874
3875 if !recursive {
3876 return Ok(if self.storage_lstat(&normalized)?.is_none() {
3877 vec![normalized]
3878 } else {
3879 Vec::new()
3880 });
3881 }
3882
3883 let mut created = Vec::new();
3884 let mut current = String::from("/");
3885 for component in normalized
3886 .split('/')
3887 .filter(|component| !component.is_empty())
3888 {
3889 current = if current == "/" {
3890 format!("/{component}")
3891 } else {
3892 format!("{current}/{component}")
3893 };
3894 if self.storage_lstat(¤t)?.is_none() {
3895 created.push(current.clone());
3896 }
3897 }
3898 Ok(created)
3899 }
3900
3901 fn check_write_file_limits(&mut self, path: &str, new_size: u64) -> KernelResult<()> {
3902 if is_virtual_device_storage_path(path) {
3903 return Ok(());
3904 }
3905
3906 let usage = self.filesystem_usage()?;
3907 if let Some(existing) = self.storage_stat(path)? {
3908 if existing.is_directory {
3909 return Ok(());
3910 }
3911
3912 self.resources.check_filesystem_usage(
3913 &usage,
3914 usage
3915 .total_bytes
3916 .saturating_sub(existing.size)
3917 .saturating_add(new_size),
3918 usage.inode_count,
3919 )?;
3920 return Ok(());
3921 }
3922
3923 let new_inodes =
3924 count_missing_directory_components(self.raw_filesystem_mut(), path, false)?
3925 .saturating_add(1);
3926 self.resources.check_filesystem_usage(
3927 &usage,
3928 usage.total_bytes.saturating_add(new_size),
3929 usage.inode_count.saturating_add(new_inodes),
3930 )?;
3931 Ok(())
3932 }
3933
3934 fn check_create_dir_limits(&mut self, path: &str) -> KernelResult<()> {
3935 if is_virtual_device_storage_path(path) || self.storage_lstat(path)?.is_some() {
3936 return Ok(());
3937 }
3938
3939 let parent = parent_path(path);
3940 let Some(parent_stat) = self.storage_stat(&parent)? else {
3941 return Ok(());
3942 };
3943 if !parent_stat.is_directory {
3944 return Ok(());
3945 }
3946
3947 let usage = self.filesystem_usage()?;
3948 self.resources.check_filesystem_usage(
3949 &usage,
3950 usage.total_bytes,
3951 usage.inode_count.saturating_add(1),
3952 )?;
3953 Ok(())
3954 }
3955
3956 fn check_mkdir_limits(&mut self, path: &str, recursive: bool) -> KernelResult<()> {
3957 if is_virtual_device_storage_path(path) {
3958 return Ok(());
3959 }
3960
3961 if !recursive {
3962 return self.check_create_dir_limits(path);
3963 }
3964
3965 let usage = self.filesystem_usage()?;
3966 let new_inodes = count_missing_directory_components(self.raw_filesystem_mut(), path, true)?;
3967 self.resources.check_filesystem_usage(
3968 &usage,
3969 usage.total_bytes,
3970 usage.inode_count.saturating_add(new_inodes),
3971 )?;
3972 Ok(())
3973 }
3974
3975 fn check_symlink_limits(&mut self, target: &str, link_path: &str) -> KernelResult<()> {
3976 if is_virtual_device_storage_path(link_path) || self.storage_lstat(link_path)?.is_some() {
3977 return Ok(());
3978 }
3979
3980 let parent = parent_path(link_path);
3981 let Some(parent_stat) = self.storage_stat(&parent)? else {
3982 return Ok(());
3983 };
3984 if !parent_stat.is_directory {
3985 return Ok(());
3986 }
3987
3988 let usage = self.filesystem_usage()?;
3989 self.resources.check_filesystem_usage(
3990 &usage,
3991 usage.total_bytes.saturating_add(target.len() as u64),
3992 usage.inode_count.saturating_add(1),
3993 )?;
3994 Ok(())
3995 }
3996
3997 fn check_truncate_limits(&mut self, path: &str, length: u64) -> KernelResult<()> {
3998 self.check_path_resize_limits(path, length)
3999 }
4000
4001 fn check_rename_copy_up_limits(&mut self, old_path: &str, new_path: &str) -> KernelResult<()> {
4002 let max_bytes = self.resource_limits().max_filesystem_bytes;
4003 let max_inodes = self.resource_limits().max_inode_count;
4004 let filesystem_any = self.raw_filesystem_mut() as &mut dyn Any;
4005
4006 if let Some(root) = filesystem_any.downcast_mut::<RootFileSystem>() {
4007 root.check_rename_copy_up_limits(old_path, new_path, max_bytes, max_inodes)?;
4008 return Ok(());
4009 }
4010
4011 if let Some(mount_table) = filesystem_any.downcast_mut::<MountTable>() {
4012 mount_table.check_rename_copy_up_limits(old_path, new_path, max_bytes, max_inodes)?;
4013 }
4014
4015 Ok(())
4016 }
4017
4018 fn check_path_resize_limits(&mut self, path: &str, new_size: u64) -> KernelResult<()> {
4019 if is_virtual_device_storage_path(path) {
4020 return Ok(());
4021 }
4022
4023 let Some(existing) = self.storage_stat(path)? else {
4024 return Ok(());
4025 };
4026 if existing.is_directory {
4027 return Ok(());
4028 }
4029
4030 let usage = self.filesystem_usage()?;
4031 self.resources.check_filesystem_usage(
4032 &usage,
4033 usage
4034 .total_bytes
4035 .saturating_sub(existing.size)
4036 .saturating_add(new_size),
4037 usage.inode_count,
4038 )?;
4039 Ok(())
4040 }
4041
4042 fn blocking_read_timeout(&self) -> Option<Duration> {
4043 self.resources
4044 .limits()
4045 .max_blocking_read_ms
4046 .map(Duration::from_millis)
4047 }
4048
4049 fn close_special_resource_if_needed(&self, description: &Arc<FileDescription>, filetype: u8) {
4050 close_special_resource_if_needed(
4051 &self.file_locks,
4052 &self.pipes,
4053 &self.ptys,
4054 description,
4055 filetype,
4056 );
4057 }
4058}
4059
4060impl KernelVm<MountTable> {
4061 fn check_mount_permissions(&self, path: &str) -> KernelResult<()> {
4062 self.filesystem
4063 .check_path(FsOperation::Write, path)
4064 .map_err(KernelError::from)?;
4065 if is_sensitive_mount_path(path) {
4066 self.filesystem
4067 .check_path(FsOperation::MountSensitive, path)
4068 .map_err(KernelError::from)?;
4069 }
4070 Ok(())
4071 }
4072
4073 pub fn mount_filesystem(
4074 &mut self,
4075 path: &str,
4076 filesystem: impl VirtualFileSystem + 'static,
4077 options: MountOptions,
4078 ) -> KernelResult<()> {
4079 self.assert_not_terminated()?;
4080 self.check_mount_permissions(path)?;
4081 self.filesystem
4082 .inner_mut()
4083 .inner_mut()
4084 .mount(path, filesystem, options)
4085 .map_err(KernelError::from)
4086 }
4087
4088 pub fn mount_boxed_filesystem(
4089 &mut self,
4090 path: &str,
4091 filesystem: Box<dyn MountedFileSystem>,
4092 options: MountOptions,
4093 ) -> KernelResult<()> {
4094 self.assert_not_terminated()?;
4095 self.check_mount_permissions(path)?;
4096 self.filesystem
4097 .inner_mut()
4098 .inner_mut()
4099 .mount_boxed(path, filesystem, options)
4100 .map_err(KernelError::from)
4101 }
4102
4103 pub fn unmount_filesystem(&mut self, path: &str) -> KernelResult<()> {
4104 self.assert_not_terminated()?;
4105 self.check_mount_permissions(path)?;
4106 self.filesystem
4107 .inner_mut()
4108 .inner_mut()
4109 .unmount(path)
4110 .map_err(KernelError::from)
4111 }
4112
4113 pub fn mounted_filesystems(&self) -> Vec<MountEntry> {
4114 self.filesystem.inner().inner().get_mounts()
4115 }
4116
4117 pub fn root_filesystem_mut(&mut self) -> Option<&mut RootFileSystem> {
4118 self.filesystem
4119 .inner_mut()
4120 .inner_mut()
4121 .root_virtual_filesystem_mut::<RootFileSystem>()
4122 }
4123
4124 pub fn snapshot_root_filesystem(&mut self) -> KernelResult<RootFilesystemSnapshot> {
4125 let usage = self.filesystem_usage()?;
4126 self.resources
4127 .check_filesystem_usage(&usage, usage.total_bytes, usage.inode_count)?;
4128 let root = self
4129 .root_filesystem_mut()
4130 .ok_or_else(|| KernelError::new("EINVAL", "native root filesystem is not available"))?;
4131 root.snapshot().map_err(KernelError::from)
4132 }
4133}
4134
4135#[derive(Default)]
4136struct StubDriverState {
4137 exit_code: Option<i32>,
4138 on_exit: Option<ProcessExitCallback>,
4139 kill_signals: Vec<i32>,
4140}
4141
4142#[derive(Default)]
4143struct StubDriverProcess {
4144 state: Mutex<StubDriverState>,
4145 waiters: Condvar,
4146}
4147
4148impl StubDriverProcess {
4149 fn finish(&self, exit_code: i32) {
4150 let callback = {
4151 let mut state = lock_or_recover(&self.state);
4152 if state.exit_code.is_some() {
4153 return;
4154 }
4155 state.exit_code = Some(exit_code);
4156 self.waiters.notify_all();
4157 state.on_exit.clone()
4158 };
4159
4160 if let Some(callback) = callback {
4161 callback(exit_code);
4162 }
4163 }
4164
4165 fn kill_signals(&self) -> Vec<i32> {
4166 lock_or_recover(&self.state).kill_signals.clone()
4167 }
4168}
4169
4170impl DriverProcess for StubDriverProcess {
4171 fn kill(&self, signal: i32) {
4172 {
4173 let mut state = lock_or_recover(&self.state);
4174 state.kill_signals.push(signal);
4175 }
4176 if matches!(
4177 signal,
4178 crate::process_table::SIGCHLD | SIGCONT | SIGSTOP | SIGTSTP | SIGWINCH
4179 ) {
4180 return;
4181 }
4182 self.finish(128 + signal);
4183 }
4184
4185 fn wait(&self, timeout: Duration) -> Option<i32> {
4186 let state = lock_or_recover(&self.state);
4187 if let Some(code) = state.exit_code {
4188 return Some(code);
4189 }
4190
4191 let (state, _) = wait_timeout_or_recover(&self.waiters, state, timeout);
4192 state.exit_code
4193 }
4194
4195 fn set_on_exit(&self, callback: ProcessExitCallback) {
4196 let maybe_exit = {
4197 let mut state = lock_or_recover(&self.state);
4198 state.on_exit = Some(callback.clone());
4199 state.exit_code
4200 };
4201
4202 if let Some(code) = maybe_exit {
4203 callback(code);
4204 }
4205 }
4206}
4207
4208impl From<VfsError> for KernelError {
4209 fn from(error: VfsError) -> Self {
4210 map_error(error.code(), error.to_string())
4211 }
4212}
4213
4214fn lock_or_recover<'a, T>(mutex: &'a Mutex<T>) -> MutexGuard<'a, T> {
4215 match mutex.lock() {
4216 Ok(guard) => guard,
4217 Err(poisoned) => poisoned.into_inner(),
4218 }
4219}
4220
4221fn wait_timeout_or_recover<'a, T>(
4222 condvar: &Condvar,
4223 guard: MutexGuard<'a, T>,
4224 timeout: Duration,
4225) -> (MutexGuard<'a, T>, WaitTimeoutResult) {
4226 match condvar.wait_timeout(guard, timeout) {
4227 Ok(result) => result,
4228 Err(poisoned) => poisoned.into_inner(),
4229 }
4230}
4231
4232fn is_sensitive_mount_path(path: &str) -> bool {
4233 let normalized = crate::vfs::normalize_path(path);
4234 normalized == "/"
4235 || normalized == "/etc"
4236 || normalized.starts_with("/etc/")
4237 || normalized == "/proc"
4238 || normalized.starts_with("/proc/")
4239}
4240
4241impl From<FdTableError> for KernelError {
4242 fn from(error: FdTableError) -> Self {
4243 map_error(error.code(), error.to_string())
4244 }
4245}
4246
4247impl From<PipeError> for KernelError {
4248 fn from(error: PipeError) -> Self {
4249 map_error(error.code(), error.to_string())
4250 }
4251}
4252
4253impl From<PtyError> for KernelError {
4254 fn from(error: PtyError) -> Self {
4255 map_error(error.code(), error.to_string())
4256 }
4257}
4258
4259impl From<ProcessTableError> for KernelError {
4260 fn from(error: ProcessTableError) -> Self {
4261 map_error(error.code(), error.to_string())
4262 }
4263}
4264
4265impl From<PermissionError> for KernelError {
4266 fn from(error: PermissionError) -> Self {
4267 map_error(error.code(), error.to_string())
4268 }
4269}
4270
4271impl From<ResourceError> for KernelError {
4272 fn from(error: ResourceError) -> Self {
4273 map_error(error.code(), error.to_string())
4274 }
4275}
4276
4277impl From<SocketTableError> for KernelError {
4278 fn from(error: SocketTableError) -> Self {
4279 map_error(error.code(), error.to_string())
4280 }
4281}
4282
4283impl From<RootFilesystemError> for KernelError {
4284 fn from(error: RootFilesystemError) -> Self {
4285 map_error("EINVAL", error.to_string())
4286 }
4287}
4288
4289fn map_dns_resolver_error(error: crate::dns::DnsResolverError) -> KernelError {
4290 let code = match error.kind() {
4291 DnsResolverErrorKind::InvalidInput => "EINVAL",
4292 DnsResolverErrorKind::LookupFailed => "EHOSTUNREACH",
4293 };
4294 map_error(code, error.to_string())
4295}
4296
4297fn map_error(code: &'static str, message: String) -> KernelError {
4298 let trimmed = strip_error_prefix(code, &message)
4299 .map(ToOwned::to_owned)
4300 .unwrap_or(message);
4301 KernelError::new(code, trimmed)
4302}
4303
4304fn strip_error_prefix<'a>(code: &str, message: &'a str) -> Option<&'a str> {
4305 let prefix = format!("{code}: ");
4306 message.strip_prefix(&prefix)
4307}
4308
4309fn parse_dev_fd_path(path: &str) -> KernelResult<Option<u32>> {
4310 let Some(raw_fd) = path.strip_prefix("/dev/fd/") else {
4311 return Ok(None);
4312 };
4313 if raw_fd.is_empty() {
4314 return Err(KernelError::new(
4315 "EBADF",
4316 format!("bad file descriptor: {path}"),
4317 ));
4318 }
4319 let fd = raw_fd
4320 .parse::<u32>()
4321 .map_err(|_| KernelError::new("EBADF", format!("bad file descriptor: {path}")))?;
4322 Ok(Some(fd))
4323}
4324
4325fn count_missing_directory_components<F: VirtualFileSystem>(
4326 filesystem: &mut F,
4327 path: &str,
4328 include_final: bool,
4329) -> VfsResult<usize> {
4330 let normalized = normalize_path(path);
4331 let parts = normalized
4332 .split('/')
4333 .filter(|part| !part.is_empty())
4334 .collect::<Vec<_>>();
4335 let limit = if include_final {
4336 parts.len()
4337 } else {
4338 parts.len().saturating_sub(1)
4339 };
4340
4341 let mut current = String::from("/");
4342 for (index, part) in parts.iter().take(limit).enumerate() {
4343 let candidate = if current == "/" {
4344 format!("/{}", part)
4345 } else {
4346 format!("{current}/{}", part)
4347 };
4348
4349 match filesystem.stat(&candidate) {
4350 Ok(stat) => {
4351 if !stat.is_directory {
4352 return Err(VfsError::new(
4353 "ENOTDIR",
4354 format!("not a directory, mkdir '{candidate}'"),
4355 ));
4356 }
4357 current = candidate;
4358 }
4359 Err(error) if error.code() == "ENOENT" => {
4360 return Ok(limit.saturating_sub(index));
4361 }
4362 Err(error) => return Err(error),
4363 }
4364 }
4365
4366 Ok(0)
4367}
4368
4369fn parent_path(path: &str) -> String {
4370 let normalized = normalize_path(path);
4371 let Some((head, _)) = normalized.rsplit_once('/') else {
4372 return String::from("/");
4373 };
4374
4375 if head.is_empty() {
4376 String::from("/")
4377 } else {
4378 String::from(head)
4379 }
4380}
4381
4382fn join_absolute_path(parent: &str, child: &str) -> String {
4383 if parent == "/" {
4384 format!("/{child}")
4385 } else {
4386 format!("{parent}/{child}")
4387 }
4388}
4389
4390fn is_virtual_device_storage_path(path: &str) -> bool {
4391 matches!(
4392 path,
4393 "/dev/null" | "/dev/zero" | "/dev/stdin" | "/dev/stdout" | "/dev/stderr" | "/dev/urandom"
4394 ) || path == "/dev"
4395 || path == "/dev/fd"
4396 || path == "/dev/pts"
4397 || path.starts_with("/dev/fd/")
4398 || path.starts_with("/dev/pts/")
4399}
4400
4401fn is_proc_path(path: &str) -> bool {
4402 let normalized = normalize_path(path);
4403 normalized == "/proc" || normalized.starts_with("/proc/")
4404}
4405
4406fn is_agentos_path(path: &str) -> bool {
4407 let normalized = normalize_path(path);
4408 normalized == "/etc/agentos" || normalized.starts_with("/etc/agentos/")
4409}
4410
4411fn open_requires_write_access(flags: u32) -> bool {
4412 flags & (O_CREAT | O_EXCL | O_TRUNC) != 0 || (flags & 0b11) != crate::fd_table::O_RDONLY
4413}
4414
4415fn checked_write_end(offset: u64, len: usize) -> KernelResult<u64> {
4416 offset
4417 .checked_add(len as u64)
4418 .ok_or_else(|| KernelError::new("EINVAL", "write offset out of range"))
4419}
4420
4421fn filetype_for_path(path: &str, stat: &VirtualStat) -> u8 {
4422 if stat.is_directory {
4423 FILETYPE_DIRECTORY
4424 } else if path.starts_with("/dev/") {
4425 FILETYPE_CHARACTER_DEVICE
4426 } else if stat.is_symbolic_link {
4427 FILETYPE_SYMBOLIC_LINK
4428 } else {
4429 FILETYPE_REGULAR_FILE
4430 }
4431}
4432
4433fn synthetic_character_device_stat(ino: u64) -> VirtualStat {
4434 let now = now_ms();
4435 VirtualStat {
4436 mode: 0o666,
4437 size: 0,
4438 blocks: 0,
4439 dev: 2,
4440 rdev: 0,
4441 is_directory: false,
4442 is_symbolic_link: false,
4443 atime_ms: now,
4444 atime_nsec: 0,
4445 mtime_ms: now,
4446 mtime_nsec: 0,
4447 ctime_ms: now,
4448 ctime_nsec: 0,
4449 birthtime_ms: now,
4450 ino,
4451 nlink: 1,
4452 uid: 0,
4453 gid: 0,
4454 }
4455}
4456
4457fn proc_dir_stat(ino: u64) -> VirtualStat {
4458 let now = now_ms();
4459 VirtualStat {
4460 mode: 0o555,
4461 size: 0,
4462 blocks: 0,
4463 dev: 3,
4464 rdev: 0,
4465 is_directory: true,
4466 is_symbolic_link: false,
4467 atime_ms: now,
4468 atime_nsec: 0,
4469 mtime_ms: now,
4470 mtime_nsec: 0,
4471 ctime_ms: now,
4472 ctime_nsec: 0,
4473 birthtime_ms: now,
4474 ino,
4475 nlink: 2,
4476 uid: 0,
4477 gid: 0,
4478 }
4479}
4480
4481fn proc_file_stat(ino: u64, size: u64) -> VirtualStat {
4482 let now = now_ms();
4483 VirtualStat {
4484 mode: 0o444,
4485 size,
4486 blocks: if size == 0 { 0 } else { size.div_ceil(512) },
4487 dev: 3,
4488 rdev: 0,
4489 is_directory: false,
4490 is_symbolic_link: false,
4491 atime_ms: now,
4492 atime_nsec: 0,
4493 mtime_ms: now,
4494 mtime_nsec: 0,
4495 ctime_ms: now,
4496 ctime_nsec: 0,
4497 birthtime_ms: now,
4498 ino,
4499 nlink: 1,
4500 uid: 0,
4501 gid: 0,
4502 }
4503}
4504
4505fn proc_symlink_stat(ino: u64, size: u64) -> VirtualStat {
4506 let now = now_ms();
4507 VirtualStat {
4508 mode: 0o777,
4509 size,
4510 blocks: if size == 0 { 0 } else { size.div_ceil(512) },
4511 dev: 3,
4512 rdev: 0,
4513 is_directory: false,
4514 is_symbolic_link: true,
4515 atime_ms: now,
4516 atime_nsec: 0,
4517 mtime_ms: now,
4518 mtime_nsec: 0,
4519 ctime_ms: now,
4520 ctime_nsec: 0,
4521 birthtime_ms: now,
4522 ino,
4523 nlink: 1,
4524 uid: 0,
4525 gid: 0,
4526 }
4527}
4528
4529fn proc_filetype(node: &ProcNode) -> u8 {
4530 match node {
4531 ProcNode::RootDir | ProcNode::PidDir { .. } | ProcNode::PidFdDir { .. } => {
4532 FILETYPE_DIRECTORY
4533 }
4534 ProcNode::SelfLink { .. } | ProcNode::PidCwdLink { .. } | ProcNode::PidFdLink { .. } => {
4535 FILETYPE_SYMBOLIC_LINK
4536 }
4537 ProcNode::MountsFile
4538 | ProcNode::CpuInfoFile
4539 | ProcNode::MemInfoFile
4540 | ProcNode::LoadAvgFile
4541 | ProcNode::UptimeFile
4542 | ProcNode::VersionFile
4543 | ProcNode::PidCmdline { .. }
4544 | ProcNode::PidEnviron { .. }
4545 | ProcNode::PidStatFile { .. }
4546 | ProcNode::PidStatusFile { .. } => FILETYPE_REGULAR_FILE,
4547 }
4548}
4549
4550fn proc_inode(node: &ProcNode) -> u64 {
4551 match node {
4552 ProcNode::RootDir => 0xfffe_0001,
4553 ProcNode::MountsFile => 0xfffe_0002,
4554 ProcNode::CpuInfoFile => 0xfffe_0003,
4555 ProcNode::MemInfoFile => 0xfffe_0004,
4556 ProcNode::LoadAvgFile => 0xfffe_0005,
4557 ProcNode::UptimeFile => 0xfffe_0006,
4558 ProcNode::VersionFile => 0xfffe_0007,
4559 ProcNode::SelfLink { pid } => 0xfffe_1000 + u64::from(*pid),
4560 ProcNode::PidDir { pid } => 0xfffe_2000 + u64::from(*pid),
4561 ProcNode::PidFdDir { pid } => 0xfffe_3000 + u64::from(*pid),
4562 ProcNode::PidCmdline { pid } => 0xfffe_4000 + u64::from(*pid),
4563 ProcNode::PidEnviron { pid } => 0xfffe_5000 + u64::from(*pid),
4564 ProcNode::PidCwdLink { pid } => 0xfffe_6000 + u64::from(*pid),
4565 ProcNode::PidStatFile { pid } => 0xfffe_7000 + u64::from(*pid),
4566 ProcNode::PidStatusFile { pid } => 0xfffe_8000 + u64::from(*pid),
4567 ProcNode::PidFdLink { pid, fd } => 0xffff_0000 + ((u64::from(*pid)) << 8) + u64::from(*fd),
4568 }
4569}
4570
4571fn null_separated_bytes(parts: Vec<String>) -> Vec<u8> {
4572 if parts.is_empty() {
4573 return Vec::new();
4574 }
4575
4576 let mut bytes = parts.join("\0").into_bytes();
4577 bytes.push(0);
4578 bytes
4579}
4580
4581fn proc_not_found_error(path: &str) -> KernelError {
4582 KernelError::new(
4583 "ENOENT",
4584 format!("no such file or directory, stat '{path}'"),
4585 )
4586}
4587
4588fn read_only_filesystem_error(path: &str) -> KernelError {
4589 KernelError::new("EROFS", format!("read-only filesystem: {path}"))
4590}
4591
4592fn now_ms() -> u64 {
4593 SystemTime::now()
4594 .duration_since(UNIX_EPOCH)
4595 .unwrap_or_default()
4596 .as_millis() as u64
4597}
4598
4599impl<F> Drop for KernelVm<F> {
4600 fn drop(&mut self) {
4601 if !self.terminated {
4602 dispose_kernel_vm_resources(self);
4603 }
4604 }
4605}
4606
4607#[cfg(test)]
4608mod tests {
4609 use super::*;
4610 use crate::vfs::MemoryFileSystem;
4611 use std::panic::{catch_unwind, AssertUnwindSafe};
4612 use std::thread;
4613
4614 struct RetainedKernelResources {
4615 process: KernelProcessHandle,
4616 fd_tables: Arc<Mutex<FdTableManager>>,
4617 pipes: PipeManager,
4618 ptys: PtyManager,
4619 sockets: SocketTable,
4620 driver_pids: Arc<Mutex<BTreeMap<String, BTreeSet<u32>>>>,
4621 }
4622
4623 fn kernel_with_live_resources() -> (KernelVm<MemoryFileSystem>, RetainedKernelResources) {
4624 let mut config = KernelVmConfig::new("vm-drop-resources");
4625 config.permissions = Permissions::allow_all();
4626 let mut kernel = KernelVm::new(MemoryFileSystem::new(), config);
4627 kernel
4628 .register_driver(CommandDriver::new("shell", ["sh"]))
4629 .expect("register shell");
4630
4631 let process = kernel
4632 .spawn_process(
4633 "sh",
4634 Vec::new(),
4635 SpawnOptions {
4636 requester_driver: Some(String::from("shell")),
4637 ..SpawnOptions::default()
4638 },
4639 )
4640 .expect("spawn shell");
4641 let _ = kernel.open_pipe("shell", process.pid()).expect("open pipe");
4642 let _ = kernel.open_pty("shell", process.pid()).expect("open pty");
4643 let socket = kernel
4644 .socket_create("shell", process.pid(), SocketSpec::tcp())
4645 .expect("create socket");
4646 kernel
4647 .socket_set_state("shell", process.pid(), socket, SocketState::Listening)
4648 .expect("mark listener");
4649
4650 let retained = RetainedKernelResources {
4651 process: process.clone(),
4652 fd_tables: Arc::clone(&kernel.fd_tables),
4653 pipes: kernel.pipes.clone(),
4654 ptys: kernel.ptys.clone(),
4655 sockets: kernel.sockets.clone(),
4656 driver_pids: Arc::clone(&kernel.driver_pids),
4657 };
4658
4659 assert_eq!(lock_or_recover(retained.fd_tables.as_ref()).len(), 1);
4660 assert_eq!(retained.pipes.pipe_count(), 1);
4661 assert_eq!(retained.ptys.pty_count(), 1);
4662 assert_eq!(retained.sockets.snapshot().sockets, 1);
4663
4664 (kernel, retained)
4665 }
4666
4667 fn assert_kernel_drop_released_resources(retained: &RetainedKernelResources) {
4668 assert_eq!(retained.process.wait(Duration::from_millis(50)), Some(143));
4669 assert_eq!(retained.process.kill_signals(), vec![15]);
4670 assert!(
4671 lock_or_recover(retained.fd_tables.as_ref()).is_empty(),
4672 "kernel drop should remove fd tables"
4673 );
4674 assert_eq!(
4675 retained.pipes.pipe_count(),
4676 0,
4677 "kernel drop should close pipes"
4678 );
4679 assert_eq!(
4680 retained.ptys.pty_count(),
4681 0,
4682 "kernel drop should close PTYs"
4683 );
4684 assert_eq!(
4685 retained.sockets.snapshot().sockets,
4686 0,
4687 "kernel drop should reclaim sockets"
4688 );
4689 assert!(
4690 lock_or_recover(retained.driver_pids.as_ref()).is_empty(),
4691 "kernel drop should clear driver-owned pid tracking"
4692 );
4693 }
4694
4695 #[test]
4696 fn setpgid_rejects_joining_a_process_group_owned_by_another_driver() {
4697 let kernel = KernelVm::new(MemoryFileSystem::new(), KernelVmConfig::new("vm-setpgid"));
4698
4699 let leader_pid = kernel.processes.allocate_pid().expect("allocate pid");
4700 kernel.processes.register(
4701 leader_pid,
4702 String::from("driver-a"),
4703 String::from("sh"),
4704 Vec::new(),
4705 ProcessContext {
4706 pid: leader_pid,
4707 ppid: 0,
4708 env: BTreeMap::new(),
4709 cwd: String::from("/"),
4710 umask: DEFAULT_PROCESS_UMASK,
4711 fds: Default::default(),
4712 identity: ProcessIdentity::default(),
4713 blocked_signals: SignalSet::empty(),
4714 pending_signals: SignalSet::empty(),
4715 },
4716 Arc::new(StubDriverProcess::default()),
4717 );
4718
4719 let peer_pid = kernel.processes.allocate_pid().expect("allocate pid");
4720 kernel.processes.register(
4721 peer_pid,
4722 String::from("driver-b"),
4723 String::from("sh"),
4724 Vec::new(),
4725 ProcessContext {
4726 pid: peer_pid,
4727 ppid: leader_pid,
4728 env: BTreeMap::new(),
4729 cwd: String::from("/"),
4730 umask: DEFAULT_PROCESS_UMASK,
4731 fds: Default::default(),
4732 identity: ProcessIdentity::default(),
4733 blocked_signals: SignalSet::empty(),
4734 pending_signals: SignalSet::empty(),
4735 },
4736 Arc::new(StubDriverProcess::default()),
4737 );
4738
4739 lock_or_recover(&kernel.driver_pids)
4740 .entry(String::from("driver-a"))
4741 .or_default()
4742 .insert(leader_pid);
4743 lock_or_recover(&kernel.driver_pids)
4744 .entry(String::from("driver-b"))
4745 .or_default()
4746 .insert(peer_pid);
4747
4748 let error = kernel
4749 .setpgid("driver-b", peer_pid, leader_pid)
4750 .expect_err("cross-driver process-group join should be denied");
4751 assert_eq!(error.code(), "EPERM");
4752 }
4753
4754 #[test]
4755 fn sigprocmask_and_sigpending_require_process_ownership() {
4756 let mut kernel = KernelVm::new(MemoryFileSystem::new(), KernelVmConfig::new("vm-sigmask"));
4757 let process = kernel
4758 .register_process(
4759 String::from("driver-a"),
4760 String::from("sleep"),
4761 Vec::new(),
4762 ProcessContext {
4763 pid: 0,
4764 ppid: 0,
4765 env: BTreeMap::new(),
4766 cwd: String::from("/"),
4767 umask: DEFAULT_PROCESS_UMASK,
4768 fds: Default::default(),
4769 identity: ProcessIdentity::default(),
4770 blocked_signals: SignalSet::empty(),
4771 pending_signals: SignalSet::empty(),
4772 },
4773 None,
4774 )
4775 .expect("create virtual process");
4776 let mask =
4777 SignalSet::from_signal(crate::process_table::SIGCHLD).expect("SIGCHLD should be valid");
4778
4779 let previous = kernel
4780 .sigprocmask("driver-a", process.pid(), SigmaskHow::Block, mask)
4781 .expect("owner should update signal mask");
4782 assert_eq!(previous, SignalSet::empty());
4783 assert_eq!(
4784 kernel
4785 .sigpending("driver-a", process.pid())
4786 .expect("owner should read pending signals"),
4787 SignalSet::empty()
4788 );
4789
4790 let error = kernel
4791 .sigprocmask("driver-b", process.pid(), SigmaskHow::Block, mask)
4792 .expect_err("foreign driver should be rejected");
4793 assert_eq!(error.code(), "EPERM");
4794 let error = kernel
4795 .sigpending("driver-b", process.pid())
4796 .expect_err("foreign driver should be rejected");
4797 assert_eq!(error.code(), "EPERM");
4798 }
4799
4800 #[test]
4801 fn cleanup_process_resources_blocks_concurrent_dup2_until_pipe_cleanup_finishes() {
4802 let fd_tables = Arc::new(Mutex::new(FdTableManager::new()));
4803 let file_locks = FileLockManager::new();
4804 let pipes = PipeManager::new();
4805 let ptys = PtyManager::new();
4806 let sockets = SocketTable::new();
4807 let driver_pids = Arc::new(Mutex::new(BTreeMap::from([(
4808 String::from("driver"),
4809 BTreeSet::from([41]),
4810 )])));
4811 let pipe = pipes.create_pipe();
4812
4813 {
4814 let mut tables = lock_or_recover(fd_tables.as_ref());
4815 let table = tables.create(41);
4816 table
4817 .open_with(
4818 Arc::clone(&pipe.read.description),
4819 pipe.read.filetype,
4820 Some(10),
4821 )
4822 .expect("open pipe read end");
4823 table
4824 .open_with(
4825 Arc::clone(&pipe.write.description),
4826 pipe.write.filetype,
4827 Some(11),
4828 )
4829 .expect("open pipe write end");
4830 }
4831
4832 let hook_state = Arc::new((Mutex::new((false, false)), Condvar::new()));
4833 let hook_state_for_cleanup = Arc::clone(&hook_state);
4834 set_cleanup_process_resources_test_hook(Some(Arc::new(move || {
4835 let (state, wake) = &*hook_state_for_cleanup;
4836 let mut state = lock_or_recover(state);
4837 state.0 = true;
4838 wake.notify_all();
4839 while !state.1 {
4840 state = wake.wait(state).expect("wait for cleanup release");
4841 }
4842 })));
4843
4844 let fd_tables_for_cleanup = Arc::clone(&fd_tables);
4845 let pipes_for_cleanup = pipes.clone();
4846 let driver_pids_for_cleanup = Arc::clone(&driver_pids);
4847 let cleanup_thread = thread::spawn(move || {
4848 cleanup_process_resources(
4849 fd_tables_for_cleanup.as_ref(),
4850 &file_locks,
4851 &pipes_for_cleanup,
4852 &ptys,
4853 &sockets,
4854 driver_pids_for_cleanup.as_ref(),
4855 41,
4856 );
4857 });
4858
4859 {
4860 let (state, wake) = &*hook_state;
4861 let mut state = lock_or_recover(state);
4862 while !state.0 {
4863 state = wake.wait(state).expect("wait for cleanup hook");
4864 }
4865 }
4866
4867 let fd_tables_for_dup = Arc::clone(&fd_tables);
4868 let dup_thread = thread::spawn(move || {
4869 let mut tables = lock_or_recover(fd_tables_for_dup.as_ref());
4870 let Some(table) = tables.get_mut(41) else {
4871 return Err(String::from("ESRCH"));
4872 };
4873 table.dup2(10, 12).map_err(|error| error.code().to_string())
4874 });
4875
4876 {
4877 let (state, wake) = &*hook_state;
4878 let mut state = lock_or_recover(state);
4879 state.1 = true;
4880 wake.notify_all();
4881 }
4882
4883 cleanup_thread.join().expect("cleanup thread should finish");
4884 let dup_result = dup_thread.join().expect("dup thread should finish");
4885 set_cleanup_process_resources_test_hook(None);
4886
4887 assert_eq!(dup_result, Err(String::from("ESRCH")));
4888 assert!(
4889 lock_or_recover(fd_tables.as_ref()).get(41).is_none(),
4890 "cleanup should remove the process FD table"
4891 );
4892 assert_eq!(pipes.pipe_count(), 0, "pipe cleanup should not leak");
4893 assert!(
4894 lock_or_recover(driver_pids.as_ref())
4895 .get("driver")
4896 .is_none_or(|pids| pids.is_empty()),
4897 "driver ownership should be cleared"
4898 );
4899 }
4900
4901 #[test]
4902 fn drop_disposes_live_kernel_vm_resources() {
4903 let (kernel, retained) = kernel_with_live_resources();
4904 drop(kernel);
4905 assert_kernel_drop_released_resources(&retained);
4906 }
4907
4908 #[test]
4909 fn drop_during_panic_still_disposes_live_kernel_vm_resources() {
4910 let retained = Arc::new(Mutex::new(None::<RetainedKernelResources>));
4911 let retained_for_panic = Arc::clone(&retained);
4912
4913 let panic_result = catch_unwind(AssertUnwindSafe(move || {
4914 let (kernel, resources) = kernel_with_live_resources();
4915 *lock_or_recover(retained_for_panic.as_ref()) = Some(resources);
4916 let _kernel = kernel;
4917 panic!("intentional panic to exercise KernelVm::drop");
4918 }));
4919
4920 assert!(panic_result.is_err(), "panic should be observed");
4921 let retained = lock_or_recover(retained.as_ref())
4922 .take()
4923 .expect("panic path should retain resources for assertions");
4924 assert_kernel_drop_released_resources(&retained);
4925 }
4926}