1use std::collections::{btree_map::Values, BTreeMap, BTreeSet};
2use std::error::Error;
3use std::fmt;
4use std::sync::atomic::{AtomicU32, AtomicU64, AtomicUsize, Ordering};
5use std::sync::{Arc, Condvar, Mutex, MutexGuard};
6
7pub const MAX_FDS_PER_PROCESS: usize = 256;
8
9pub const O_RDONLY: u32 = 0;
10pub const O_WRONLY: u32 = 1;
11pub const O_RDWR: u32 = 2;
12pub const O_CREAT: u32 = 0o100;
13pub const O_EXCL: u32 = 0o200;
14pub const O_TRUNC: u32 = 0o1000;
15pub const O_APPEND: u32 = 0o2000;
16pub const O_NONBLOCK: u32 = 0o4000;
17pub const F_DUPFD: u32 = 0;
18pub const F_GETFD: u32 = 1;
19pub const F_SETFD: u32 = 2;
20pub const F_GETFL: u32 = 3;
21pub const F_SETFL: u32 = 4;
22pub const FD_CLOEXEC: u32 = 1;
23pub const LOCK_SH: u32 = 1;
24pub const LOCK_EX: u32 = 2;
25pub const LOCK_NB: u32 = 4;
26pub const LOCK_UN: u32 = 8;
27
28pub const FILETYPE_UNKNOWN: u8 = 0;
29pub const FILETYPE_CHARACTER_DEVICE: u8 = 2;
30pub const FILETYPE_DIRECTORY: u8 = 3;
31pub const FILETYPE_REGULAR_FILE: u8 = 4;
32pub const FILETYPE_PIPE: u8 = 6;
33pub const FILETYPE_SYMBOLIC_LINK: u8 = 7;
34
35pub type FdResult<T> = Result<T, FdTableError>;
36pub type SharedFileDescription = Arc<FileDescription>;
37
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct FdTableError {
40 code: &'static str,
41 message: String,
42}
43
44impl FdTableError {
45 pub fn code(&self) -> &'static str {
46 self.code
47 }
48
49 fn bad_file_descriptor(fd: u32) -> Self {
50 Self {
51 code: "EBADF",
52 message: format!("bad file descriptor {fd}"),
53 }
54 }
55
56 fn too_many_open_files() -> Self {
57 Self {
58 code: "EMFILE",
59 message: String::from("too many open files"),
60 }
61 }
62
63 fn invalid_argument(message: impl Into<String>) -> Self {
64 Self {
65 code: "EINVAL",
66 message: message.into(),
67 }
68 }
69
70 fn would_block(message: impl Into<String>) -> Self {
71 Self {
72 code: "EWOULDBLOCK",
73 message: message.into(),
74 }
75 }
76}
77
78impl fmt::Display for FdTableError {
79 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80 write!(f, "{}: {}", self.code, self.message)
81 }
82}
83
84impl Error for FdTableError {}
85
86#[derive(Debug)]
87pub struct FileDescription {
88 id: u64,
89 path: String,
90 lock_target: Option<FileLockTarget>,
91 cursor: AtomicU64,
92 flags: AtomicU32,
93 ref_count: AtomicUsize,
94}
95
96impl FileDescription {
97 pub fn new(id: u64, path: impl Into<String>, flags: u32) -> Self {
98 Self::with_ref_count_and_lock(id, path, flags, 1, None)
99 }
100
101 pub fn new_with_lock(
102 id: u64,
103 path: impl Into<String>,
104 flags: u32,
105 lock_target: Option<FileLockTarget>,
106 ) -> Self {
107 Self::with_ref_count_and_lock(id, path, flags, 1, lock_target)
108 }
109
110 pub fn with_ref_count(id: u64, path: impl Into<String>, flags: u32, ref_count: usize) -> Self {
111 Self::with_ref_count_and_lock(id, path, flags, ref_count, None)
112 }
113
114 pub fn with_ref_count_and_lock(
115 id: u64,
116 path: impl Into<String>,
117 flags: u32,
118 ref_count: usize,
119 lock_target: Option<FileLockTarget>,
120 ) -> Self {
121 Self {
122 id,
123 path: path.into(),
124 lock_target,
125 cursor: AtomicU64::new(0),
126 flags: AtomicU32::new(flags),
127 ref_count: AtomicUsize::new(ref_count),
128 }
129 }
130
131 pub fn id(&self) -> u64 {
132 self.id
133 }
134
135 pub fn path(&self) -> &str {
136 &self.path
137 }
138
139 pub fn lock_target(&self) -> Option<FileLockTarget> {
140 self.lock_target
141 }
142
143 pub fn cursor(&self) -> u64 {
144 self.cursor.load(Ordering::SeqCst)
145 }
146
147 pub fn set_cursor(&self, cursor: u64) {
148 self.cursor.store(cursor, Ordering::SeqCst);
149 }
150
151 pub fn flags(&self) -> u32 {
152 self.flags.load(Ordering::SeqCst)
153 }
154
155 pub fn update_flags(&self, mask: u32, flags: u32) -> u32 {
156 let mut current = self.flags();
157 loop {
158 let next = (current & !mask) | (flags & mask);
159 match self
160 .flags
161 .compare_exchange(current, next, Ordering::SeqCst, Ordering::SeqCst)
162 {
163 Ok(_) => return next,
164 Err(observed) => current = observed,
165 }
166 }
167 }
168
169 pub fn ref_count(&self) -> usize {
170 self.ref_count.load(Ordering::SeqCst)
171 }
172
173 pub fn increment_ref_count(&self) -> usize {
174 self.ref_count.fetch_add(1, Ordering::SeqCst) + 1
175 }
176
177 pub fn decrement_ref_count(&self) -> usize {
178 let mut current = self.ref_count.load(Ordering::SeqCst);
179 loop {
180 let next = current.saturating_sub(1);
181 match self
182 .ref_count
183 .compare_exchange(current, next, Ordering::SeqCst, Ordering::SeqCst)
184 {
185 Ok(_) => return next,
186 Err(observed) => current = observed,
187 }
188 }
189 }
190}
191
192#[derive(Debug, Clone)]
193pub struct FdEntry {
194 pub fd: u32,
195 pub description: SharedFileDescription,
196 pub status_flags: u32,
197 pub fd_flags: u32,
198 pub rights: u64,
199 pub filetype: u8,
200}
201
202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
203pub struct FdStat {
204 pub filetype: u8,
205 pub flags: u32,
206 pub rights: u64,
207}
208
209#[derive(Debug, Clone)]
210pub struct StdioOverride {
211 pub description: SharedFileDescription,
212 pub filetype: u8,
213}
214
215#[derive(Debug, Clone)]
216struct DescriptionFactory {
217 next_description_id: Arc<AtomicU64>,
218}
219
220impl DescriptionFactory {
221 fn new(starting_id: u64) -> Self {
222 Self {
223 next_description_id: Arc::new(AtomicU64::new(starting_id)),
224 }
225 }
226
227 fn allocate(&self, path: &str, flags: u32) -> SharedFileDescription {
228 self.allocate_with_lock(path, flags, None)
229 }
230
231 fn allocate_with_lock(
232 &self,
233 path: &str,
234 flags: u32,
235 lock_target: Option<FileLockTarget>,
236 ) -> SharedFileDescription {
237 let next_id = self.next_description_id.fetch_add(1, Ordering::SeqCst);
238 Arc::new(FileDescription::new_with_lock(
239 next_id,
240 path,
241 flags,
242 lock_target,
243 ))
244 }
245}
246
247#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
248pub struct FileLockTarget {
249 ino: u64,
250}
251
252impl FileLockTarget {
253 pub const fn new(ino: u64) -> Self {
254 Self { ino }
255 }
256
257 pub const fn ino(self) -> u64 {
258 self.ino
259 }
260}
261
262#[derive(Debug, Clone, Copy, PartialEq, Eq)]
263enum FileLockMode {
264 Shared,
265 Exclusive,
266}
267
268#[derive(Debug, Clone, Copy, PartialEq, Eq)]
269pub enum FlockOperation {
270 Shared { nonblocking: bool },
271 Exclusive { nonblocking: bool },
272 Unlock,
273}
274
275impl FlockOperation {
276 pub fn from_bits(operation: u32) -> FdResult<Self> {
277 let nonblocking = operation & LOCK_NB != 0;
278 match operation & !LOCK_NB {
279 LOCK_SH => Ok(Self::Shared { nonblocking }),
280 LOCK_EX => Ok(Self::Exclusive { nonblocking }),
281 LOCK_UN => Ok(Self::Unlock),
282 _ => Err(FdTableError::invalid_argument(format!(
283 "invalid flock operation {operation:#x}"
284 ))),
285 }
286 }
287}
288
289#[derive(Debug, Clone)]
290pub struct ProcessFdTable {
291 entries: BTreeMap<u32, FdEntry>,
292 next_fd: u32,
293 alloc_desc: DescriptionFactory,
294 max_fds: usize,
295}
296
297impl ProcessFdTable {
298 fn new(alloc_desc: DescriptionFactory, max_fds: usize) -> Self {
299 Self {
300 entries: BTreeMap::new(),
301 next_fd: 3,
302 alloc_desc,
303 max_fds,
304 }
305 }
306
307 pub fn max_fds(&self) -> usize {
308 self.max_fds
309 }
310
311 pub fn init_stdio(
312 &mut self,
313 stdin_desc: SharedFileDescription,
314 stdout_desc: SharedFileDescription,
315 stderr_desc: SharedFileDescription,
316 ) {
317 self.entries.insert(
318 0,
319 FdEntry {
320 fd: 0,
321 description: stdin_desc,
322 status_flags: 0,
323 fd_flags: 0,
324 rights: 0,
325 filetype: FILETYPE_CHARACTER_DEVICE,
326 },
327 );
328 self.entries.insert(
329 1,
330 FdEntry {
331 fd: 1,
332 description: stdout_desc,
333 status_flags: 0,
334 fd_flags: 0,
335 rights: 0,
336 filetype: FILETYPE_CHARACTER_DEVICE,
337 },
338 );
339 self.entries.insert(
340 2,
341 FdEntry {
342 fd: 2,
343 description: stderr_desc,
344 status_flags: 0,
345 fd_flags: 0,
346 rights: 0,
347 filetype: FILETYPE_CHARACTER_DEVICE,
348 },
349 );
350 }
351
352 pub fn init_stdio_with_types(
353 &mut self,
354 stdin_desc: SharedFileDescription,
355 stdin_type: u8,
356 stdout_desc: SharedFileDescription,
357 stdout_type: u8,
358 stderr_desc: SharedFileDescription,
359 stderr_type: u8,
360 ) {
361 stdin_desc.increment_ref_count();
362 stdout_desc.increment_ref_count();
363 stderr_desc.increment_ref_count();
364 self.entries.insert(
365 0,
366 FdEntry {
367 fd: 0,
368 description: stdin_desc,
369 status_flags: 0,
370 fd_flags: 0,
371 rights: 0,
372 filetype: stdin_type,
373 },
374 );
375 self.entries.insert(
376 1,
377 FdEntry {
378 fd: 1,
379 description: stdout_desc,
380 status_flags: 0,
381 fd_flags: 0,
382 rights: 0,
383 filetype: stdout_type,
384 },
385 );
386 self.entries.insert(
387 2,
388 FdEntry {
389 fd: 2,
390 description: stderr_desc,
391 status_flags: 0,
392 fd_flags: 0,
393 rights: 0,
394 filetype: stderr_type,
395 },
396 );
397 }
398
399 pub fn open(&mut self, path: &str, flags: u32) -> FdResult<u32> {
400 self.open_with_details(path, flags, FILETYPE_REGULAR_FILE, None)
401 }
402
403 pub fn open_with_filetype(&mut self, path: &str, flags: u32, filetype: u8) -> FdResult<u32> {
404 self.open_with_details(path, flags, filetype, None)
405 }
406
407 pub fn open_with_details(
408 &mut self,
409 path: &str,
410 flags: u32,
411 filetype: u8,
412 lock_target: Option<FileLockTarget>,
413 ) -> FdResult<u32> {
414 let fd = self.allocate_fd()?;
415 let description =
416 self.alloc_desc
417 .allocate_with_lock(path, description_flags(flags), lock_target);
418 self.entries.insert(
419 fd,
420 FdEntry {
421 fd,
422 description,
423 status_flags: status_flags(flags),
424 fd_flags: 0,
425 rights: 0,
426 filetype,
427 },
428 );
429 Ok(fd)
430 }
431
432 pub fn open_with(
433 &mut self,
434 description: SharedFileDescription,
435 filetype: u8,
436 target_fd: Option<u32>,
437 ) -> FdResult<u32> {
438 let fd = match target_fd {
439 Some(fd) => {
440 self.validate_fd_bounds(fd)?;
441 if self.entries.contains_key(&fd) {
442 self.close(fd);
443 }
444 fd
445 }
446 None => self.allocate_fd()?,
447 };
448 description.increment_ref_count();
449 self.entries.insert(
450 fd,
451 FdEntry {
452 fd,
453 description,
454 status_flags: 0,
455 fd_flags: 0,
456 rights: 0,
457 filetype,
458 },
459 );
460 Ok(fd)
461 }
462
463 pub fn get(&self, fd: u32) -> Option<&FdEntry> {
464 self.entries.get(&fd)
465 }
466
467 pub fn close(&mut self, fd: u32) -> bool {
468 let Some(entry) = self.entries.remove(&fd) else {
469 return false;
470 };
471 entry.description.decrement_ref_count();
472 true
473 }
474
475 pub fn dup(&mut self, fd: u32) -> FdResult<u32> {
476 self.dup_with_status_flags(fd, None)
477 }
478
479 pub fn dup_with_status_flags(
480 &mut self,
481 fd: u32,
482 status_flags_override: Option<u32>,
483 ) -> FdResult<u32> {
484 let entry = self
485 .entries
486 .get(&fd)
487 .cloned()
488 .ok_or_else(|| FdTableError::bad_file_descriptor(fd))?;
489 let new_fd = self.allocate_fd()?;
490 self.duplicate_entry(
491 &entry,
492 new_fd,
493 status_flags_override.unwrap_or(entry.status_flags),
494 0,
495 )
496 }
497
498 pub fn dup2(&mut self, old_fd: u32, new_fd: u32) -> FdResult<()> {
499 let entry = self
500 .entries
501 .get(&old_fd)
502 .cloned()
503 .ok_or_else(|| FdTableError::bad_file_descriptor(old_fd))?;
504 self.validate_fd_bounds(new_fd)?;
505 if old_fd == new_fd {
506 return Ok(());
507 }
508
509 if self.entries.contains_key(&new_fd) {
510 self.close(new_fd);
511 }
512
513 self.duplicate_entry(&entry, new_fd, entry.status_flags, 0)?;
514 Ok(())
515 }
516
517 pub fn stat(&self, fd: u32) -> FdResult<FdStat> {
518 let entry = self
519 .entries
520 .get(&fd)
521 .ok_or_else(|| FdTableError::bad_file_descriptor(fd))?;
522 Ok(FdStat {
523 filetype: entry.filetype,
524 flags: visible_fd_flags(entry.description.flags(), entry.status_flags),
525 rights: entry.rights,
526 })
527 }
528
529 pub fn fcntl(&mut self, fd: u32, command: u32, arg: u32) -> FdResult<u32> {
530 match command {
531 F_DUPFD => {
532 let entry = self
533 .entries
534 .get(&fd)
535 .cloned()
536 .ok_or_else(|| FdTableError::bad_file_descriptor(fd))?;
537 let min_fd = self.validate_fcntl_dup_min(arg)?;
538 let new_fd = self.allocate_fd_from(min_fd)?;
539 self.duplicate_entry(&entry, new_fd, entry.status_flags, 0)
540 }
541 F_GETFD => {
542 let entry = self
543 .entries
544 .get(&fd)
545 .ok_or_else(|| FdTableError::bad_file_descriptor(fd))?;
546 Ok(entry.fd_flags & FD_CLOEXEC)
547 }
548 F_SETFD => {
549 let entry = self
550 .entries
551 .get_mut(&fd)
552 .ok_or_else(|| FdTableError::bad_file_descriptor(fd))?;
553 entry.fd_flags = arg & FD_CLOEXEC;
554 Ok(0)
555 }
556 F_GETFL => {
557 let entry = self
558 .entries
559 .get(&fd)
560 .ok_or_else(|| FdTableError::bad_file_descriptor(fd))?;
561 Ok(visible_fd_flags(
562 entry.description.flags(),
563 entry.status_flags,
564 ))
565 }
566 F_SETFL => {
567 let entry = self
568 .entries
569 .get_mut(&fd)
570 .ok_or_else(|| FdTableError::bad_file_descriptor(fd))?;
571 entry.status_flags = arg & ENTRY_STATUS_FLAG_MASK;
572 entry.description.update_flags(SHARED_STATUS_FLAG_MASK, arg);
573 Ok(0)
574 }
575 _ => Err(FdTableError::invalid_argument(format!(
576 "unsupported fcntl command {command}"
577 ))),
578 }
579 }
580
581 pub fn fork(&self) -> Self {
582 let mut child = Self::new(self.alloc_desc.clone(), self.max_fds);
583 child.next_fd = self.next_fd;
584
585 for (fd, entry) in &self.entries {
586 entry.description.increment_ref_count();
587 child.entries.insert(
588 *fd,
589 FdEntry {
590 fd: *fd,
591 description: Arc::clone(&entry.description),
592 status_flags: entry.status_flags,
593 fd_flags: entry.fd_flags,
594 rights: entry.rights,
595 filetype: entry.filetype,
596 },
597 );
598 }
599
600 child
601 }
602
603 pub fn close_all(&mut self) {
604 let fds: Vec<u32> = self.entries.keys().copied().collect();
605 for fd in fds {
606 self.close(fd);
607 }
608 }
609
610 pub fn len(&self) -> usize {
611 self.entries.len()
612 }
613
614 pub fn is_empty(&self) -> bool {
615 self.entries.is_empty()
616 }
617
618 pub fn iter(&self) -> Values<'_, u32, FdEntry> {
619 self.entries.values()
620 }
621
622 fn allocate_fd(&mut self) -> FdResult<u32> {
623 if self.entries.len() >= self.max_fds {
624 return Err(FdTableError::too_many_open_files());
625 }
626
627 let start = usize::try_from(self.next_fd).unwrap_or(0) % self.max_fds;
628 for offset in 0..self.max_fds {
629 let candidate = ((start + offset) % self.max_fds) as u32;
630 if !self.entries.contains_key(&candidate) {
631 self.next_fd = candidate.saturating_add(1);
632 return Ok(candidate);
633 }
634 }
635
636 Err(FdTableError::too_many_open_files())
637 }
638
639 fn allocate_fd_from(&mut self, min_fd: u32) -> FdResult<u32> {
640 if self.entries.len() >= self.max_fds {
641 return Err(FdTableError::too_many_open_files());
642 }
643
644 if min_fd as usize >= self.max_fds {
645 return Err(FdTableError::invalid_argument(format!(
646 "fd {min_fd} exceeds process fd limit"
647 )));
648 }
649
650 for candidate in min_fd..self.max_fds as u32 {
651 if !self.entries.contains_key(&candidate) {
652 self.next_fd = candidate.saturating_add(1);
653 return Ok(candidate);
654 }
655 }
656
657 Err(FdTableError::too_many_open_files())
658 }
659
660 fn duplicate_entry(
661 &mut self,
662 entry: &FdEntry,
663 new_fd: u32,
664 status_flags: u32,
665 fd_flags: u32,
666 ) -> FdResult<u32> {
667 entry.description.increment_ref_count();
668 self.entries.insert(
669 new_fd,
670 FdEntry {
671 fd: new_fd,
672 description: Arc::clone(&entry.description),
673 status_flags,
674 fd_flags,
675 rights: entry.rights,
676 filetype: entry.filetype,
677 },
678 );
679 Ok(new_fd)
680 }
681
682 fn validate_fd_bounds(&self, fd: u32) -> FdResult<()> {
683 if fd as usize >= self.max_fds {
684 return Err(FdTableError::bad_file_descriptor(fd));
685 }
686 Ok(())
687 }
688
689 fn validate_fcntl_dup_min(&self, min_fd: u32) -> FdResult<u32> {
690 if min_fd as usize >= self.max_fds {
691 return Err(FdTableError::invalid_argument(format!(
692 "fd {min_fd} exceeds process fd limit"
693 )));
694 }
695 Ok(min_fd)
696 }
697}
698
699fn description_flags(flags: u32) -> u32 {
700 flags & !status_flags(flags)
701}
702
703fn status_flags(flags: u32) -> u32 {
704 flags & ENTRY_STATUS_FLAG_MASK
705}
706
707fn visible_fd_flags(description_flags: u32, entry_status_flags: u32) -> u32 {
708 (description_flags & (0b11 | SHARED_STATUS_FLAG_MASK))
709 | (entry_status_flags & ENTRY_STATUS_FLAG_MASK)
710}
711
712const SHARED_STATUS_FLAG_MASK: u32 = O_APPEND;
713const ENTRY_STATUS_FLAG_MASK: u32 = O_NONBLOCK;
714
715impl<'a> IntoIterator for &'a ProcessFdTable {
716 type Item = &'a FdEntry;
717 type IntoIter = Values<'a, u32, FdEntry>;
718
719 fn into_iter(self) -> Self::IntoIter {
720 self.entries.values()
721 }
722}
723
724#[derive(Debug, Clone)]
725pub struct FdTableManager {
726 tables: BTreeMap<u32, ProcessFdTable>,
727 alloc_desc: DescriptionFactory,
728 max_fds: usize,
729}
730
731impl Default for FdTableManager {
732 fn default() -> Self {
733 Self {
734 tables: BTreeMap::new(),
735 alloc_desc: DescriptionFactory::new(1),
736 max_fds: MAX_FDS_PER_PROCESS,
737 }
738 }
739}
740
741impl FdTableManager {
742 pub fn new() -> Self {
743 Self::default()
744 }
745
746 pub fn with_max_fds(max_fds: usize) -> Self {
747 Self {
748 max_fds,
749 ..Self::default()
750 }
751 }
752
753 pub fn create(&mut self, pid: u32) -> &mut ProcessFdTable {
754 let mut table = ProcessFdTable::new(self.alloc_desc.clone(), self.max_fds);
755 table.init_stdio(
756 self.alloc_desc.allocate("/dev/stdin", O_RDONLY),
757 self.alloc_desc.allocate("/dev/stdout", O_WRONLY),
758 self.alloc_desc.allocate("/dev/stderr", O_WRONLY),
759 );
760 self.remove(pid);
761 self.tables.insert(pid, table);
762 self.tables
763 .get_mut(&pid)
764 .expect("newly created FD table should be stored")
765 }
766
767 pub fn create_with_stdio(
768 &mut self,
769 pid: u32,
770 stdin_override: Option<StdioOverride>,
771 stdout_override: Option<StdioOverride>,
772 stderr_override: Option<StdioOverride>,
773 ) -> &mut ProcessFdTable {
774 let mut table = ProcessFdTable::new(self.alloc_desc.clone(), self.max_fds);
775 let stdin_desc = stdin_override
776 .as_ref()
777 .map(|entry| Arc::clone(&entry.description))
778 .unwrap_or_else(|| self.alloc_desc.allocate("/dev/stdin", O_RDONLY));
779 let stdout_desc = stdout_override
780 .as_ref()
781 .map(|entry| Arc::clone(&entry.description))
782 .unwrap_or_else(|| self.alloc_desc.allocate("/dev/stdout", O_WRONLY));
783 let stderr_desc = stderr_override
784 .as_ref()
785 .map(|entry| Arc::clone(&entry.description))
786 .unwrap_or_else(|| self.alloc_desc.allocate("/dev/stderr", O_WRONLY));
787
788 table.init_stdio_with_types(
789 stdin_desc,
790 stdin_override
791 .as_ref()
792 .map(|entry| entry.filetype)
793 .unwrap_or(FILETYPE_CHARACTER_DEVICE),
794 stdout_desc,
795 stdout_override
796 .as_ref()
797 .map(|entry| entry.filetype)
798 .unwrap_or(FILETYPE_CHARACTER_DEVICE),
799 stderr_desc,
800 stderr_override
801 .as_ref()
802 .map(|entry| entry.filetype)
803 .unwrap_or(FILETYPE_CHARACTER_DEVICE),
804 );
805 self.remove(pid);
806 self.tables.insert(pid, table);
807 self.tables
808 .get_mut(&pid)
809 .expect("newly created FD table should be stored")
810 }
811
812 pub fn fork(&mut self, parent_pid: u32, child_pid: u32) -> &mut ProcessFdTable {
813 if !self.tables.contains_key(&parent_pid) {
814 return self.create(child_pid);
815 }
816
817 let child = self
818 .tables
819 .get(&parent_pid)
820 .expect("parent table presence was checked")
821 .fork();
822 self.remove(child_pid);
823 self.tables.insert(child_pid, child);
824 self.tables
825 .get_mut(&child_pid)
826 .expect("forked FD table should be stored")
827 }
828
829 pub fn get(&self, pid: u32) -> Option<&ProcessFdTable> {
830 self.tables.get(&pid)
831 }
832
833 pub fn get_mut(&mut self, pid: u32) -> Option<&mut ProcessFdTable> {
834 self.tables.get_mut(&pid)
835 }
836
837 pub fn has(&self, pid: u32) -> bool {
838 self.tables.contains_key(&pid)
839 }
840
841 pub fn len(&self) -> usize {
842 self.tables.len()
843 }
844
845 pub fn is_empty(&self) -> bool {
846 self.tables.is_empty()
847 }
848
849 pub fn total_open_fds(&self) -> usize {
850 self.tables.values().map(ProcessFdTable::len).sum()
851 }
852
853 pub fn pids(&self) -> Vec<u32> {
854 self.tables.keys().copied().collect()
855 }
856
857 pub fn remove(&mut self, pid: u32) {
858 if let Some(mut table) = self.tables.remove(&pid) {
859 table.close_all();
860 }
861 }
862}
863
864#[derive(Debug, Clone, Default)]
865pub struct FileLockManager {
866 inner: Arc<FileLockManagerInner>,
867}
868
869#[derive(Debug, Default)]
870struct FileLockManagerInner {
871 state: Mutex<FileLockState>,
872 wake: Condvar,
873}
874
875#[derive(Debug, Default)]
876struct FileLockState {
877 entries: BTreeMap<FileLockTarget, FileLockEntry>,
878}
879
880#[derive(Debug, Default)]
881struct FileLockEntry {
882 shared: BTreeSet<u64>,
883 exclusive: Option<u64>,
884}
885
886impl FileLockManager {
887 pub fn new() -> Self {
888 Self::default()
889 }
890
891 pub fn apply(
892 &self,
893 owner_id: u64,
894 target: FileLockTarget,
895 operation: FlockOperation,
896 ) -> FdResult<()> {
897 match operation {
898 FlockOperation::Shared { nonblocking } => {
899 self.acquire(owner_id, target, FileLockMode::Shared, nonblocking)
900 }
901 FlockOperation::Exclusive { nonblocking } => {
902 self.acquire(owner_id, target, FileLockMode::Exclusive, nonblocking)
903 }
904 FlockOperation::Unlock => {
905 self.release_owner(owner_id);
906 Ok(())
907 }
908 }
909 }
910
911 pub fn release_owner(&self, owner_id: u64) -> bool {
912 let mut state = lock_or_recover(&self.inner.state);
913 let mut released = false;
914 state.entries.retain(|_, entry| {
915 let entry_changed = entry.shared.remove(&owner_id) || entry.exclusive == Some(owner_id);
916 if entry.exclusive == Some(owner_id) {
917 entry.exclusive = None;
918 }
919 released |= entry_changed;
920 !entry.is_empty()
921 });
922 drop(state);
923 if released {
924 self.inner.wake.notify_all();
925 }
926 released
927 }
928
929 fn acquire(
930 &self,
931 owner_id: u64,
932 target: FileLockTarget,
933 mode: FileLockMode,
934 nonblocking: bool,
935 ) -> FdResult<()> {
936 let mut state = lock_or_recover(&self.inner.state);
937 loop {
938 let entry = state.entries.entry(target).or_default();
939 if entry.can_grant(owner_id, mode) {
940 entry.grant(owner_id, mode);
941 return Ok(());
942 }
943
944 if nonblocking {
945 return Err(FdTableError::would_block(
946 "advisory file lock is unavailable",
947 ));
948 }
949
950 state = wait_or_recover(&self.inner.wake, state);
951 }
952 }
953}
954
955impl FileLockEntry {
956 fn can_grant(&self, owner_id: u64, mode: FileLockMode) -> bool {
957 match mode {
958 FileLockMode::Shared => self.exclusive.is_none_or(|owner| owner == owner_id),
959 FileLockMode::Exclusive => {
960 self.exclusive.is_none_or(|owner| owner == owner_id)
961 && self.shared.iter().all(|owner| *owner == owner_id)
962 }
963 }
964 }
965
966 fn grant(&mut self, owner_id: u64, mode: FileLockMode) {
967 match mode {
968 FileLockMode::Shared => {
969 self.exclusive = None;
970 self.shared.insert(owner_id);
971 }
972 FileLockMode::Exclusive => {
973 self.shared.retain(|owner| *owner != owner_id);
974 self.exclusive = Some(owner_id);
975 }
976 }
977 }
978
979 fn is_empty(&self) -> bool {
980 self.exclusive.is_none() && self.shared.is_empty()
981 }
982}
983
984fn lock_or_recover<T>(mutex: &Mutex<T>) -> MutexGuard<'_, T> {
985 mutex
986 .lock()
987 .unwrap_or_else(|poisoned| poisoned.into_inner())
988}
989
990fn wait_or_recover<'a, T>(condvar: &Condvar, guard: MutexGuard<'a, T>) -> MutexGuard<'a, T> {
991 condvar
992 .wait(guard)
993 .unwrap_or_else(|poisoned| poisoned.into_inner())
994}