use alloc::{
collections::BTreeMap,
sync::{Arc, Weak},
vec::Vec,
};
use core::ffi::c_int;
use ax_errno::{AxError, AxResult};
use ax_task::current;
use linux_raw_sys::general::{
F_GETLK, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW, F_RDLCK, F_SETLK, F_SETLKW, F_UNLCK, F_WRLCK,
LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN, O_ACCMODE, O_RDONLY, O_RDWR, O_WRONLY, SEEK_CUR, SEEK_END,
SEEK_SET, flock64,
};
use spin::RwLock;
use starry_process::Pid;
use crate::{
file::{File, FileLike, get_file_like},
mm::UserPtr,
task::{AsThread, futex::WaitQueue},
};
type InodeKey = (u64, u64); type OfdAddr = usize;
const OFD_PID_REPORTED: i32 = -1;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum LockKind {
Read,
Write,
}
#[derive(Debug)]
enum FOwner {
Posix {
pid: Pid,
},
Ofd {
addr: OfdAddr,
weak: Weak<dyn FileLike>,
},
}
impl FOwner {
fn same_as(&self, other: &FOwner) -> bool {
match (self, other) {
(FOwner::Posix { pid: a }, FOwner::Posix { pid: b }) => a == b,
(FOwner::Ofd { addr: a, .. }, FOwner::Ofd { addr: b, .. }) => a == b,
_ => false,
}
}
fn report_pid(&self) -> i32 {
match self {
FOwner::Posix { pid } => *pid as i32,
FOwner::Ofd { .. } => OFD_PID_REPORTED,
}
}
fn is_dead(&self) -> bool {
match self {
FOwner::Posix { .. } => false,
FOwner::Ofd { weak, .. } => weak.strong_count() == 0,
}
}
}
#[derive(Debug)]
struct FLockEntry {
start: i64,
end: i64,
kind: LockKind,
owner: FOwner,
}
static FCNTL_LOCKS: RwLock<BTreeMap<InodeKey, Vec<FLockEntry>>> = RwLock::new(BTreeMap::new());
static LOCK_WAITERS: RwLock<BTreeMap<InodeKey, Arc<WaitQueue>>> = RwLock::new(BTreeMap::new());
#[derive(Debug)]
struct FlockEntry {
addr: OfdAddr,
weak: Weak<dyn FileLike>,
kind: LockKind,
}
static FLOCK_LOCKS: RwLock<BTreeMap<InodeKey, Vec<FlockEntry>>> = RwLock::new(BTreeMap::new());
static FLOCK_WAITERS: RwLock<BTreeMap<InodeKey, Arc<WaitQueue>>> = RwLock::new(BTreeMap::new());
fn ofd_addr(arc: &Arc<dyn FileLike>) -> OfdAddr {
Arc::as_ptr(arc) as *const () as usize
}
fn current_pid() -> Pid {
current().as_thread().proc_data.proc.pid()
}
fn lockable(fd: c_int) -> AxResult<(InodeKey, Arc<dyn FileLike>)> {
let f = get_file_like(fd)?;
let key = f.inode_key().ok_or(AxError::BadFileDescriptor)?;
Ok((key, f))
}
fn resolve_l_start(file: &Arc<dyn FileLike>, l_whence: i16, l_start: i64) -> AxResult<i64> {
let whence = l_whence as u32;
if whence == SEEK_SET {
return Ok(l_start);
}
if whence != SEEK_CUR && whence != SEEK_END {
return Err(AxError::InvalidInput);
}
let regular = file.downcast_ref::<File>().ok_or(AxError::InvalidInput)?;
let base = if whence == SEEK_CUR {
regular.inner().position().ok_or(AxError::InvalidInput)?
} else {
regular
.inner()
.location()
.len()
.map_err(|_| AxError::InvalidInput)?
};
let base_i64 = i64::try_from(base).map_err(|_| AxError::InvalidInput)?;
base_i64.checked_add(l_start).ok_or(AxError::InvalidInput)
}
fn flock_range(l_start: i64, l_len: i64) -> AxResult<(i64, i64)> {
if l_len == 0 {
if l_start < 0 {
return Err(AxError::InvalidInput);
}
return Ok((l_start, i64::MAX));
}
let (start, end) = if l_len > 0 {
let end = l_start.checked_add(l_len).ok_or(AxError::InvalidInput)?;
(l_start, end)
} else {
let start = l_start.checked_add(l_len).ok_or(AxError::InvalidInput)?;
(start, l_start)
};
if start < 0 {
return Err(AxError::InvalidInput);
}
Ok((start, end))
}
fn ranges_overlap(a_start: i64, a_end: i64, b_start: i64, b_end: i64) -> bool {
a_start < b_end && b_start < a_end
}
fn kinds_conflict(a: LockKind, b: LockKind) -> bool {
!(a == LockKind::Read && b == LockKind::Read)
}
fn fd_supports_kind(file: &Arc<dyn FileLike>, kind: LockKind) -> bool {
let acc = file.open_flags() & O_ACCMODE;
match kind {
LockKind::Read => acc == O_RDONLY || acc == O_RDWR,
LockKind::Write => acc == O_WRONLY || acc == O_RDWR,
}
}
fn clear_owner_overlap(
entries: &mut Vec<FLockEntry>,
owner: &FOwner,
start: i64,
end: i64,
) -> bool {
let mut changed = false;
let mut i = 0;
while i < entries.len() {
let e = &entries[i];
if !e.owner.same_as(owner) || !ranges_overlap(e.start, e.end, start, end) {
i += 1;
continue;
}
changed = true;
let (es, ee, ek) = (e.start, e.end, e.kind);
let snap_owner = match &e.owner {
FOwner::Posix { pid } => FOwner::Posix { pid: *pid },
FOwner::Ofd { addr, weak } => FOwner::Ofd {
addr: *addr,
weak: weak.clone(),
},
};
entries.swap_remove(i);
if es < start {
entries.push(FLockEntry {
start: es,
end: start,
kind: ek,
owner: match &snap_owner {
FOwner::Posix { pid } => FOwner::Posix { pid: *pid },
FOwner::Ofd { addr, weak } => FOwner::Ofd {
addr: *addr,
weak: weak.clone(),
},
},
});
}
if ee > end {
entries.push(FLockEntry {
start: end,
end: ee,
kind: ek,
owner: snap_owner,
});
}
}
changed
}
fn find_conflict<'a>(
entries: &'a mut Vec<FLockEntry>,
requester: &FOwner,
start: i64,
end: i64,
kind: LockKind,
) -> Option<&'a FLockEntry> {
entries.retain(|e| !e.owner.is_dead());
entries.iter().find(|e| {
!e.owner.same_as(requester)
&& ranges_overlap(e.start, e.end, start, end)
&& kinds_conflict(e.kind, kind)
})
}
fn lock_waiters(key: InodeKey) -> Arc<WaitQueue> {
if let Some(wq) = LOCK_WAITERS.read().get(&key) {
return wq.clone();
}
LOCK_WAITERS
.write()
.entry(key)
.or_insert_with(|| Arc::new(WaitQueue::new()))
.clone()
}
pub fn wake_lock_waiters(key: InodeKey) {
let wq = LOCK_WAITERS.read().get(&key).cloned();
if let Some(wq) = wq {
wq.wake(usize::MAX, !0);
}
}
fn flock_waiters(key: InodeKey) -> Arc<WaitQueue> {
if let Some(wq) = FLOCK_WAITERS.read().get(&key) {
return wq.clone();
}
FLOCK_WAITERS
.write()
.entry(key)
.or_insert_with(|| Arc::new(WaitQueue::new()))
.clone()
}
pub fn wake_flock_waiters(key: InodeKey) {
let wq = FLOCK_WAITERS.read().get(&key).cloned();
if let Some(wq) = wq {
wq.wake(usize::MAX, !0);
}
}
fn make_owner(ofd: bool, file: &Arc<dyn FileLike>) -> FOwner {
if ofd {
FOwner::Ofd {
addr: ofd_addr(file),
weak: Arc::downgrade(file),
}
} else {
FOwner::Posix { pid: current_pid() }
}
}
enum SetlkAttempt {
Done { woke_others: bool },
Conflict,
}
fn try_setlk_once(
key: InodeKey,
owner: FOwner,
start: i64,
end: i64,
kind: Option<LockKind>,
) -> SetlkAttempt {
let mut table = FCNTL_LOCKS.write();
let entries = table.entry(key).or_default();
entries.retain(|e| !e.owner.is_dead());
let attempt = match kind {
None => {
let woke_others = clear_owner_overlap(entries, &owner, start, end);
SetlkAttempt::Done { woke_others }
}
Some(k) => {
if find_conflict(entries, &owner, start, end, k).is_some() {
SetlkAttempt::Conflict
} else {
let woke_others = clear_owner_overlap(entries, &owner, start, end);
entries.push(FLockEntry {
start,
end,
kind: k,
owner,
});
SetlkAttempt::Done { woke_others }
}
}
};
if entries.is_empty() {
table.remove(&key);
}
attempt
}
pub fn fcntl_setlk(fd: c_int, arg: usize, ofd: bool, wait: bool) -> AxResult<isize> {
let fl = UserPtr::<flock64>::from(arg).get_as_mut()?;
if ofd && fl.l_pid != 0 {
return Err(AxError::InvalidInput);
}
let (key, file) = lockable(fd)?;
let abs_start = resolve_l_start(&file, fl.l_whence, fl.l_start)?;
let (start, end) = flock_range(abs_start, fl.l_len)?;
let kind = match fl.l_type as u32 {
F_UNLCK => None,
F_RDLCK => Some(LockKind::Read),
F_WRLCK => Some(LockKind::Write),
_ => return Err(AxError::InvalidInput),
};
if let Some(k) = kind
&& !fd_supports_kind(&file, k)
{
return Err(AxError::BadFileDescriptor);
}
loop {
let owner = make_owner(ofd, &file);
match try_setlk_once(key, owner, start, end, kind) {
SetlkAttempt::Done { woke_others } => {
if woke_others {
wake_lock_waiters(key);
}
return Ok(0);
}
SetlkAttempt::Conflict => {
if !wait {
return Err(AxError::WouldBlock);
}
let wq = lock_waiters(key);
wq.wait_if(!0u32, None, || {
let mut table = FCNTL_LOCKS.write();
let Some(entries) = table.get_mut(&key) else {
return false;
};
entries.retain(|e| !e.owner.is_dead());
let owner = make_owner(ofd, &file);
let still_blocked =
find_conflict(entries, &owner, start, end, kind.unwrap()).is_some();
if entries.is_empty() {
table.remove(&key);
}
still_blocked
})?;
}
}
}
}
pub fn fcntl_getlk(fd: c_int, arg: usize, ofd: bool) -> AxResult<isize> {
let fl = UserPtr::<flock64>::from(arg).get_as_mut()?;
if ofd && fl.l_pid != 0 {
return Err(AxError::InvalidInput);
}
let req_kind = match fl.l_type as u32 {
F_RDLCK => LockKind::Read,
F_WRLCK => LockKind::Write,
_ => return Err(AxError::InvalidInput),
};
let (key, file) = lockable(fd)?;
let abs_start = resolve_l_start(&file, fl.l_whence, fl.l_start)?;
let (start, end) = flock_range(abs_start, fl.l_len)?;
let requester = if ofd {
FOwner::Ofd {
addr: ofd_addr(&file),
weak: Arc::downgrade(&file),
}
} else {
FOwner::Posix { pid: current_pid() }
};
let mut table = FCNTL_LOCKS.write();
let (report, empty_after) = {
let entries = table.entry(key).or_default();
let report = find_conflict(entries, &requester, start, end, req_kind).map(|e| {
(
e.kind,
e.owner.report_pid(),
e.start,
if e.end == i64::MAX {
0
} else {
e.end - e.start
},
)
});
(report, entries.is_empty())
};
if empty_after {
table.remove(&key);
}
if let Some((kind, pid, l_start, l_len)) = report {
fl.l_type = (if kind == LockKind::Read {
F_RDLCK
} else {
F_WRLCK
}) as i16;
fl.l_whence = SEEK_SET as i16;
fl.l_start = l_start;
fl.l_len = l_len;
fl.l_pid = pid;
} else {
fl.l_type = F_UNLCK as i16;
}
Ok(0)
}
pub fn dispatch_fcntl(fd: c_int, cmd: c_int, arg: usize) -> Option<AxResult<isize>> {
let cmd = cmd as u32;
Some(match cmd {
F_SETLK => fcntl_setlk(fd, arg, false, false),
F_SETLKW => fcntl_setlk(fd, arg, false, true),
F_OFD_SETLK => fcntl_setlk(fd, arg, true, false),
F_OFD_SETLKW => fcntl_setlk(fd, arg, true, true),
F_GETLK => fcntl_getlk(fd, arg, false),
F_OFD_GETLK => fcntl_getlk(fd, arg, true),
_ => return None,
})
}
pub fn release_pid_locks(pid: Pid) {
let mut affected: Vec<InodeKey> = Vec::new();
{
let mut table = FCNTL_LOCKS.write();
table.retain(|inode, entries| {
let before = entries.len();
entries.retain(|e| match &e.owner {
FOwner::Posix { pid: p } => *p != pid,
FOwner::Ofd { .. } => true,
});
if entries.len() != before {
affected.push(*inode);
}
!entries.is_empty()
});
}
for key in affected {
wake_lock_waiters(key);
}
}
pub fn release_inode_posix_locks(pid: Pid, key: (u64, u64)) {
let woke_someone = {
let mut table = FCNTL_LOCKS.write();
let Some(entries) = table.get_mut(&key) else {
return;
};
let before = entries.len();
entries.retain(|e| match &e.owner {
FOwner::Posix { pid: p } => *p != pid,
FOwner::Ofd { .. } => true,
});
let changed = entries.len() != before;
if entries.is_empty() {
table.remove(&key);
}
changed
};
if woke_someone {
wake_lock_waiters(key);
}
}
enum FlockAttempt {
Done,
Conflict,
}
fn try_flock_once(
key: InodeKey,
addr: OfdAddr,
file: &Arc<dyn FileLike>,
kind: Option<LockKind>,
) -> (FlockAttempt, bool) {
let mut table = FLOCK_LOCKS.write();
let entries = table.entry(key).or_default();
let before = entries.len();
entries.retain(|e| e.weak.strong_count() != 0);
let outcome = match kind {
None => {
entries.retain(|e| e.addr != addr);
FlockAttempt::Done
}
Some(want) => {
entries.retain(|e| e.addr != addr);
let blocked = entries.iter().any(|e| kinds_conflict(e.kind, want));
if blocked {
FlockAttempt::Conflict
} else {
entries.push(FlockEntry {
addr,
weak: Arc::downgrade(file),
kind: want,
});
FlockAttempt::Done
}
}
};
let mutated = entries.len() != before;
if entries.is_empty() {
table.remove(&key);
}
(outcome, mutated)
}
pub fn flock_op(fd: c_int, operation: c_int) -> AxResult<isize> {
let op = operation as u32;
let nonblock = op & LOCK_NB != 0;
let kind = match op & !LOCK_NB {
LOCK_SH => Some(LockKind::Read),
LOCK_EX => Some(LockKind::Write),
LOCK_UN => None,
_ => return Err(AxError::InvalidInput),
};
let (key, file) = lockable(fd)?;
let addr = ofd_addr(&file);
loop {
let (outcome, mutated) = try_flock_once(key, addr, &file, kind);
if mutated {
wake_flock_waiters(key);
}
match outcome {
FlockAttempt::Done => return Ok(0),
FlockAttempt::Conflict => {
if nonblock {
return Err(AxError::WouldBlock);
}
let want = kind.unwrap();
let wq = flock_waiters(key);
wq.wait_if(!0u32, None, || {
let table = FLOCK_LOCKS.read();
let Some(entries) = table.get(&key) else {
return false;
};
entries.iter().any(|e| {
e.weak.strong_count() != 0 && e.addr != addr && kinds_conflict(e.kind, want)
})
})?;
}
}
}
}