use alloc::{format, string::ToString, sync::Arc};
use core::{
ffi::{c_char, c_int},
mem,
ops::{Deref, DerefMut},
};
use ax_errno::{AxError, AxResult};
use ax_fs::{FS_CONTEXT, FileBackend, OpenOptions, OpenResult};
use ax_task::current;
use axfs_ng_vfs::{DirEntry, FileNode, Location, NodeOps, NodeType, Reference};
use bitflags::bitflags;
use linux_raw_sys::general::*;
use crate::{
file::{
Directory, FD_TABLE, File, FileDescriptor, FileLike, NsFd, Pipe, add_file_like,
close_file_like, get_file_like, memfd::Memfd, with_fs,
},
mm::vm_load_string,
pseudofs::{Device, dev::tty},
task::AsThread,
};
fn flags_to_options(flags: c_int, mode: __kernel_mode_t, (uid, gid): (u32, u32)) -> OpenOptions {
let flags = flags as u32;
let mut options = OpenOptions::new();
options.mode(mode).user(uid, gid);
match flags & 0b11 {
O_RDONLY => options.read(true),
O_WRONLY => options.write(true),
_ => options.read(true).write(true),
};
if flags & O_APPEND != 0 {
options.append(true);
}
if flags & O_TRUNC != 0 {
options.truncate(true);
}
if flags & O_CREAT != 0 {
options.create(true);
}
if flags & O_EXCL != 0 && flags & O_CREAT != 0 {
options.create_new(true);
}
if flags & O_DIRECTORY != 0 {
options.directory(true);
}
if flags & O_NOFOLLOW != 0 {
options.no_follow(true);
}
if flags & O_DIRECT != 0 {
options.direct(true);
}
if flags & O_PATH != 0 {
options.path(true);
options.read(true).write(false);
options
.create(false)
.create_new(false)
.truncate(false)
.append(false);
}
options
}
fn add_to_fd(result: OpenResult, flags: u32) -> AxResult<i32> {
if flags & O_NONBLOCK != 0
&& flags & 0b11 == O_WRONLY
&& let OpenResult::File(ref f) = result
&& let Ok(meta) = f.location().metadata()
&& meta.node_type == NodeType::Fifo
{
return Err(AxError::NoSuchDeviceOrAddress);
}
let f: Arc<dyn FileLike> = match result {
OpenResult::File(mut file) => {
if let Ok(device) = file.location().entry().downcast::<Device>() {
if let Ok(meta) = device.metadata()
&& meta.node_type == NodeType::BlockDevice
&& flags & O_EXCL != 0
{
device.inner().open(true)?;
}
let inner = device.inner().as_any();
#[cfg(feature = "plat-dyn")]
if crate::pseudofs::usbfs::is_usbfs_device(inner) {
let wrapped = crate::pseudofs::usbfs::open_usbfs_file(inner, file, flags)?;
if flags & O_NONBLOCK != 0 {
wrapped.set_nonblocking(true)?;
}
return add_file_like(wrapped, flags & O_CLOEXEC != 0);
}
if let Some(ptmx) = inner.downcast_ref::<tty::Ptmx>() {
let (master, pty_number) = ptmx.create_pty()?;
let pts = FS_CONTEXT.lock().resolve("/dev/pts")?;
let entry = DirEntry::new_file(
FileNode::new(master),
NodeType::CharacterDevice,
Reference::new(Some(pts.entry().clone()), pty_number.to_string()),
);
let loc = Location::new(file.location().mountpoint().clone(), entry);
file = ax_fs::File::new(FileBackend::Direct(loc), file.flags());
} else if inner.is::<tty::CurrentTty>() {
let term = current()
.as_thread()
.proc_data
.proc
.group()
.session()
.terminal()
.ok_or(AxError::NotFound)?;
let path = if term.is::<tty::NTtyDriver>() {
"/dev/console".to_string()
} else if let Some(pts) = term.downcast_ref::<tty::PtyDriver>() {
format!("/dev/pts/{}", pts.pty_number())
} else {
panic!("unknown terminal type")
};
let loc = FS_CONTEXT.lock().resolve(&path)?;
file = ax_fs::File::new(FileBackend::Direct(loc), file.flags());
}
}
Arc::new(File::new(file, flags))
}
OpenResult::Dir(dir) => Arc::new(Directory::new(dir, flags)),
};
if flags & O_NONBLOCK != 0 {
f.set_nonblocking(true)?;
}
add_file_like(f, flags & O_CLOEXEC != 0)
}
fn try_open_nsfd(path: &str, flags: u32) -> Option<AxResult<i32>> {
if !path.starts_with("/proc/") {
return None;
}
let rest = path.strip_prefix("/proc/")?;
let (pid_str, ns_type_str) = rest.split_once("/ns/")?;
if pid_str.is_empty() || ns_type_str.is_empty() {
return None;
}
if ns_type_str.contains('/') {
return None;
}
let pid: u32 = if pid_str == "self" {
current().as_thread().proc_data.proc.pid()
} else {
pid_str.parse().ok()?
};
let proc_data = match crate::task::get_process_data(pid) {
Ok(p) => p,
Err(_) => return Some(Err(AxError::NotFound)),
};
let nsproxy = proc_data.nsproxy.lock();
let nsfd: NsFd = match ns_type_str {
"uts" => NsFd::Uts(nsproxy.uts_ns.clone()),
"ipc" => NsFd::Ipc(nsproxy.ipc_ns.clone()),
"mnt" => NsFd::Mnt(nsproxy.mnt_ns.clone()),
"pid" => NsFd::Pid(nsproxy.pid_ns.clone()),
"net" => NsFd::Net(nsproxy.net_ns.clone()),
"user" => NsFd::User(nsproxy.user_ns.clone()),
_ => return Some(Err(AxError::NotFound)),
};
drop(nsproxy);
let fd = nsfd.add_to_fd_table(flags & O_CLOEXEC != 0);
Some(fd)
}
ktracepoint::define_event_trace!(
sys_enter_openat,
TP_kops(crate::tracepoint::KernelTraceAux),
TP_system(syscalls),
TP_PROTO(dfd: i32, path: *const u8, o_flags: u32, mode: u32),
TP_STRUCT__entry{
dfd: i32,
o_flags: u32,
path: u64,
mode: u32,
},
TP_fast_assign{
dfd: dfd,
path: path as u64,
o_flags: o_flags,
mode: mode,
},
TP_ident(__entry),
TP_printk({
format!(
"dfd: {}, path: {:#x}, o_flags: {:?}, mode: {:?}",
__entry.dfd,
__entry.path,
__entry.o_flags,
__entry.mode
)
})
);
pub fn sys_openat(
dirfd: c_int,
path: *const c_char,
flags: i32,
mode: __kernel_mode_t,
) -> AxResult<isize> {
trace_sys_enter_openat(dirfd, path as _, flags as _, mode);
let curr = current();
let thread = curr.as_thread();
let path = vm_load_string(path)?;
debug!("sys_openat <= {dirfd} {path:?} {flags:#o} {mode:#o}");
let uflags = flags as u32;
if path.len() >= 4096 {
return Err(AxError::NameTooLong);
}
if path.is_empty() {
return Err(AxError::NotFound);
}
if uflags & O_CREAT != 0 && uflags & O_DIRECTORY != 0 && uflags & O_PATH == 0 {
return Err(AxError::InvalidInput);
}
if uflags & O_TMPFILE == O_TMPFILE && uflags & 0b11 == O_RDONLY && uflags & O_PATH == 0 {
return Err(AxError::InvalidInput);
}
let dirfd = if path.starts_with('/') {
AT_FDCWD as _
} else {
dirfd
};
let mode = mode & !thread.proc_data.umask();
if let Some(result) = try_open_nsfd(&path, uflags) {
return result.map(|fd| fd as isize);
}
let cred = thread.cred();
let options = flags_to_options(flags, mode, (cred.fsuid, cred.fsgid));
let should_notify_create = uflags & O_CREAT != 0
&& uflags & O_PATH == 0
&& with_fs(dirfd, |fs| match fs.resolve_no_follow(&path) {
Ok(_) => Ok(false),
Err(AxError::NotFound) => Ok(true),
Err(err) => Err(err),
})?;
let fd =
with_fs(dirfd, |fs| options.open(fs, path)).and_then(|it| add_to_fd(it, flags as _))?;
if should_notify_create {
let file = get_file_like(fd)?;
crate::file::inotify::notify_create_path(file.path().as_ref(), false);
}
Ok(fd as isize)
}
#[cfg(target_arch = "x86_64")]
pub fn sys_open(path: *const c_char, flags: i32, mode: __kernel_mode_t) -> AxResult<isize> {
sys_openat(AT_FDCWD as _, path, flags, mode)
}
pub fn sys_close(fd: c_int) -> AxResult<isize> {
debug!("sys_close <= {fd}");
close_file_like(fd)?;
Ok(0)
}
bitflags! {
#[derive(Debug, Clone, Copy)]
struct CloseRangeFlags: u32 {
const UNSHARE = 1 << 1;
const CLOEXEC = 1 << 2;
}
}
pub fn sys_close_range(first: i32, last: i32, flags: u32) -> AxResult<isize> {
if first < 0 || last < first {
return Err(AxError::InvalidInput);
}
let flags = CloseRangeFlags::from_bits(flags).ok_or(AxError::InvalidInput)?;
debug!("sys_close_range <= fds: [{first}, {last}], flags: {flags:?}");
if flags.contains(CloseRangeFlags::UNSHARE) {
let curr = current();
let mut scope = curr.as_thread().proc_data.scope.write();
let mut guard = FD_TABLE.scope_mut(&mut scope);
let old_files = mem::take(guard.deref_mut());
old_files.write().clone_from(old_files.read().deref());
}
let cloexec = flags.contains(CloseRangeFlags::CLOEXEC);
let mut fd_table = FD_TABLE.write();
if let Some(max_index) = fd_table.ids().next_back() {
for fd in first..=last.min(max_index as i32) {
if cloexec {
if let Some(f) = fd_table.get_mut(fd as _) {
f.cloexec = true;
}
} else if let Some(f) = fd_table.remove(fd as _) {
crate::file::release_locks_on_close(f);
}
}
}
Ok(0)
}
fn dup_fd(old_fd: c_int, cloexec: bool) -> AxResult<isize> {
let f = get_file_like(old_fd)?;
let new_fd = add_file_like(f, cloexec)?;
Ok(new_fd as _)
}
fn dup_fd_min(old_fd: c_int, min_fd: c_int, cloexec: bool) -> AxResult<isize> {
if min_fd < 0 {
return Err(AxError::InvalidInput);
}
let f = get_file_like(old_fd)?;
let max_nofile = current().as_thread().proc_data.rlim.read()[RLIMIT_NOFILE].current as i32;
let mut fd_table = FD_TABLE.write();
for candidate in min_fd..max_nofile {
let entry = FileDescriptor {
inner: f.clone(),
cloexec,
};
if fd_table.add_at(candidate as _, entry).is_ok() {
return Ok(candidate as isize);
}
}
Err(AxError::TooManyOpenFiles)
}
pub fn sys_dup(old_fd: c_int) -> AxResult<isize> {
debug!("sys_dup <= {old_fd}");
dup_fd(old_fd, false)
}
#[cfg(target_arch = "x86_64")]
pub fn sys_dup2(old_fd: c_int, new_fd: c_int) -> AxResult<isize> {
if old_fd == new_fd {
get_file_like(new_fd)?;
return Ok(new_fd as _);
}
sys_dup3(old_fd, new_fd, 0)
}
bitflags::bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct Dup3Flags: c_int {
const O_CLOEXEC = O_CLOEXEC as _; }
}
pub fn sys_dup3(old_fd: c_int, new_fd: c_int, flags: c_int) -> AxResult<isize> {
let flags = Dup3Flags::from_bits(flags).ok_or(AxError::InvalidInput)?;
debug!("sys_dup3 <= old_fd: {old_fd}, new_fd: {new_fd}, flags: {flags:?}");
if old_fd == new_fd {
return Err(AxError::InvalidInput);
}
let mut fd_table = FD_TABLE.write();
let mut f = fd_table
.get(old_fd as _)
.cloned()
.ok_or(AxError::BadFileDescriptor)?;
f.cloexec = flags.contains(Dup3Flags::O_CLOEXEC);
if let Some(prev) = fd_table.remove(new_fd as _) {
crate::file::release_locks_on_close(prev);
}
fd_table
.add_at(new_fd as _, f)
.map_err(|_| AxError::BadFileDescriptor)?;
Ok(new_fd as _)
}
pub fn sys_fcntl(fd: c_int, cmd: c_int, arg: usize) -> AxResult<isize> {
debug!("sys_fcntl <= fd: {fd} cmd: {cmd} arg: {arg}");
if let Some(r) = super::lock::dispatch_fcntl(fd, cmd, arg) {
return r;
}
match cmd as u32 {
F_DUPFD => dup_fd_min(fd, arg as _, false),
F_DUPFD_CLOEXEC => dup_fd_min(fd, arg as _, true),
F_SETFL => {
let f = get_file_like(fd)?;
let async_mode = arg & (FASYNC as usize) != 0;
let async_mode_changed = async_mode != f.async_mode();
if async_mode_changed && !f.supports_async_mode() {
return Err(AxError::NotATty);
}
f.set_nonblocking(arg & (O_NONBLOCK as usize) > 0)?;
f.set_append(arg & (O_APPEND as usize) > 0)?;
if async_mode_changed {
f.set_async_mode(async_mode)?;
}
Ok(0)
}
F_GETFL => {
let f = get_file_like(fd)?;
let mut ret = f.open_flags() & !O_APPEND;
if f.nonblocking() {
ret |= O_NONBLOCK;
}
if f.append() {
ret |= O_APPEND;
}
if f.async_mode() {
ret |= FASYNC;
}
Ok(ret as _)
}
F_GETFD => {
let cloexec = FD_TABLE
.read()
.get(fd as _)
.ok_or(AxError::BadFileDescriptor)?
.cloexec;
Ok(if cloexec { FD_CLOEXEC as _ } else { 0 })
}
F_SETFD => {
let cloexec = arg & FD_CLOEXEC as usize != 0;
FD_TABLE
.write()
.get_mut(fd as _)
.ok_or(AxError::BadFileDescriptor)?
.cloexec = cloexec;
Ok(0)
}
F_SETOWN => {
let f = get_file_like(fd)?;
f.set_owner(arg as i32)?;
Ok(0)
}
F_GETOWN => {
let f = get_file_like(fd)?;
Ok(f.owner()? as _)
}
F_GETPIPE_SZ => {
let pipe = Pipe::from_fd(fd)?;
Ok(pipe.capacity() as _)
}
F_SETPIPE_SZ => {
let pipe = Pipe::from_fd(fd)?;
pipe.resize(arg)?;
Ok(0)
}
F_GET_SEALS => {
let memfd = Memfd::from_fd(fd)?;
Ok(memfd.get_seals() as _)
}
F_ADD_SEALS => {
let memfd = Memfd::from_fd(fd)?;
memfd.add_seals(arg as u32)?;
Ok(0)
}
_ => {
warn!("unsupported fcntl parameters: cmd: {cmd}");
Err(AxError::InvalidInput)
}
}
}
pub fn sys_flock(fd: c_int, operation: c_int) -> AxResult<isize> {
debug!("flock <= fd: {fd}, operation: {operation}");
super::lock::flock_op(fd, operation)
}