starry-kernel 0.5.11

A Linux-compatible OS kernel built on ArceOS unikernel
Documentation
use alloc::boxed::Box;

use ax_errno::{AxError, AxResult, LinuxError};
use ax_fs::FS_CONTEXT;
use ax_task::current;
use axfs_ng_vfs::{MetadataUpdate, NodeType};
#[cfg(feature = "vsock")]
use axnet::vsock::{VsockSocket, VsockStreamTransport};
use axnet::{
    Shutdown, Socket as SocketInner, SocketAddrEx, SocketOps,
    raw::{IpProtocol, IpVersion, RawSocket},
    tcp::TcpSocket,
    udp::UdpSocket,
    unix::{DgramTransport, StreamTransport, UnixSocket, UnixSocketAddr},
};
use linux_raw_sys::{
    general::{O_CLOEXEC, O_NONBLOCK},
    net::{
        AF_INET, AF_INET6, AF_NETLINK, AF_PACKET, AF_UNIX, AF_VSOCK, IPPROTO_ICMP, IPPROTO_TCP,
        IPPROTO_UDP, SHUT_RD, SHUT_RDWR, SHUT_WR, SOCK_DGRAM, SOCK_RAW, SOCK_SEQPACKET,
        SOCK_STREAM, sockaddr, socklen_t,
    },
    netlink::{NETLINK_GENERIC, NETLINK_KOBJECT_UEVENT, NETLINK_ROUTE},
};

use super::addr::{
    SocketAddrExt, normalize_socket_addr_ex_for_ip_stack, socket_addr_ex_for_user_name,
};
use crate::{
    file::{FileLike, PacketSocket, SockAddrLl, Socket, add_file_like, netlink::NetlinkSocket},
    mm::{UserConstPtr, UserPtr},
    task::AsThread,
};

pub fn sys_socket(domain: u32, raw_ty: u32, proto: u32) -> AxResult<isize> {
    debug!("sys_socket <= domain: {domain}, ty: {raw_ty}, proto: {proto}");
    let ty = raw_ty & 0xFF;

    if domain == AF_PACKET {
        if ty != SOCK_DGRAM {
            warn!("Unsupported packet socket type: {ty}");
            return Err(AxError::from(LinuxError::ESOCKTNOSUPPORT));
        }
        if !current().as_thread().cred().has_cap_net_raw() {
            return Err(AxError::from(LinuxError::EPERM));
        }
        let socket = PacketSocket::new(proto as u16);
        if raw_ty & O_NONBLOCK != 0 {
            socket.set_nonblocking(true)?;
        }
        let cloexec = raw_ty & O_CLOEXEC != 0;
        return socket.add_to_fd_table(cloexec).map(|fd| fd as isize);
    }

    let pid = current().as_thread().proc_data.proc.pid();
    let ip_domain = if domain == AF_INET || domain == AF_INET6 {
        domain
    } else {
        AF_INET
    };

    let socket = match (domain, ty) {
        (AF_INET | AF_INET6, SOCK_STREAM) => {
            if proto != 0 && proto != IPPROTO_TCP as _ {
                return Err(AxError::from(LinuxError::EPROTONOSUPPORT));
            }
            TcpSocket::new().into()
        }
        (AF_INET | AF_INET6, SOCK_DGRAM) => {
            if proto != 0 && proto != IPPROTO_UDP as _ {
                return Err(AxError::from(LinuxError::EPROTONOSUPPORT));
            }
            UdpSocket::new().into()
        }
        (AF_UNIX, SOCK_STREAM) => UnixSocket::new(StreamTransport::new(pid)).into(),
        (AF_UNIX, SOCK_DGRAM) => UnixSocket::new(DgramTransport::new(pid)).into(),
        (AF_NETLINK, SOCK_RAW) | (AF_NETLINK, SOCK_DGRAM) => {
            match proto {
                NETLINK_KOBJECT_UEVENT | NETLINK_ROUTE | NETLINK_GENERIC => {}
                _ => return Err(AxError::from(LinuxError::EPROTONOSUPPORT)),
            }
            if proto == NETLINK_KOBJECT_UEVENT && ty != SOCK_RAW {
                return Err(AxError::from(LinuxError::ESOCKTNOSUPPORT));
            }
            let socket = NetlinkSocket::new(proto);
            if raw_ty & O_NONBLOCK != 0 {
                socket.set_nonblocking(true)?;
            }
            let cloexec = raw_ty & O_CLOEXEC != 0;
            return add_file_like(socket as _, cloexec).map(|fd| fd as isize);
        }
        #[cfg(feature = "vsock")]
        (AF_VSOCK, SOCK_STREAM) => VsockSocket::new(VsockStreamTransport::new()).into(),
        (AF_INET, SOCK_RAW) => {
            if proto != IPPROTO_ICMP as u32 {
                return Err(AxError::from(LinuxError::EPROTONOSUPPORT));
            }
            if !current().as_thread().cred().has_cap_net_raw() {
                return Err(AxError::from(LinuxError::EPERM));
            }
            SocketInner::Raw(Box::new(RawSocket::new(IpVersion::Ipv4, IpProtocol::Icmp)))
        }
        (AF_INET | AF_INET6, _) | (AF_UNIX, _) | (AF_NETLINK, _) | (AF_VSOCK, _) => {
            warn!("Unsupported socket type: domain: {domain}, ty: {ty}");
            return Err(AxError::from(LinuxError::ESOCKTNOSUPPORT));
        }
        _ => {
            return Err(AxError::from(LinuxError::EAFNOSUPPORT));
        }
    };
    let socket = Socket::new(socket, ip_domain);

    if raw_ty & O_NONBLOCK != 0 {
        socket.set_nonblocking(true)?;
    }
    let cloexec = raw_ty & O_CLOEXEC != 0;

    socket.add_to_fd_table(cloexec).map(|fd| fd as isize)
}

pub fn sys_bind(fd: i32, addr: UserConstPtr<sockaddr>, addrlen: u32) -> AxResult<isize> {
    if let Ok(socket) = NetlinkSocket::from_fd(fd) {
        let mut addr = super::addr::read_netlink_addr(addr, addrlen as _)?;
        if addr.nl_pid == 0 {
            addr.nl_pid = current().as_thread().proc_data.proc.pid();
        }
        debug!("sys_bind <= fd: {fd}, netlink_addr: {addr:?}");
        socket.bind(addr)?;
        return Ok(0);
    }

    if let Ok(packet) = PacketSocket::from_fd(fd) {
        let addr =
            SockAddrLl::read_from_user(addr.address().as_usize() as *const sockaddr, addrlen)?;
        packet.bind_ll(addr)?;
        return Ok(0);
    }

    let socket = Socket::from_fd(fd)?;
    let mut addr = SocketAddrEx::read_from_user(addr, addrlen)?;
    if socket.ip_domain() == AF_INET6 {
        addr = normalize_socket_addr_ex_for_ip_stack(addr, true)?;
    }
    debug!("sys_bind <= fd: {fd}, addr: {addr:?}");

    let unix_path = match &addr {
        SocketAddrEx::Unix(UnixSocketAddr::Path(path)) => Some(path.clone()),
        _ => None,
    };
    let cred = current().as_thread().cred();

    socket.bind(addr)?;

    if let Some(path) = unix_path
        && let Err(err) = FS_CONTEXT
            .lock()
            .resolve_no_follow(path.as_ref())
            .and_then(|loc| {
                if loc.metadata()?.node_type == NodeType::Socket {
                    loc.update_metadata(MetadataUpdate {
                        owner: Some((cred.fsuid, cred.fsgid)),
                        ..Default::default()
                    })?;
                }
                Ok(())
            })
    {
        warn!("failed to update AF_UNIX socket owner for {path}: {err:?}");
    }

    Ok(0)
}

pub fn sys_connect(fd: i32, addr: UserConstPtr<sockaddr>, addrlen: u32) -> AxResult<isize> {
    let socket = Socket::from_fd(fd)?;
    let mut addr = SocketAddrEx::read_from_user(addr, addrlen)?;
    if socket.ip_domain() == AF_INET6 {
        addr = normalize_socket_addr_ex_for_ip_stack(addr, false)?;
    }
    debug!("sys_connect <= fd: {fd}, addr: {addr:?}");

    socket.connect(addr).map_err(|e| {
        if e == AxError::WouldBlock {
            AxError::InProgress
        } else {
            e
        }
    })?;

    Ok(0)
}

pub fn sys_listen(fd: i32, backlog: i32) -> AxResult<isize> {
    debug!("sys_listen <= fd: {fd}, backlog: {backlog}");

    if backlog < 0 && backlog != -1 {
        return Err(AxError::InvalidInput);
    }

    Socket::from_fd(fd)?.listen(backlog as usize)?;

    Ok(0)
}

pub fn sys_accept(
    fd: i32,
    addr: UserPtr<sockaddr>,
    addrlen: UserPtr<socklen_t>,
) -> AxResult<isize> {
    sys_accept4(fd, addr, addrlen, 0)
}

pub fn sys_accept4(
    fd: i32,
    addr: UserPtr<sockaddr>,
    addrlen: UserPtr<socklen_t>,
    flags: u32,
) -> AxResult<isize> {
    debug!("sys_accept <= fd: {fd}, flags: {flags}");

    let cloexec = flags & O_CLOEXEC != 0;

    let listener = Socket::from_fd(fd)?;
    let socket = Socket::new(listener.accept()?, listener.ip_domain());
    if flags & O_NONBLOCK != 0 {
        socket.set_nonblocking(true)?;
    }

    let remote_addr = socket_addr_ex_for_user_name(socket.ip_domain(), socket.peer_addr()?);
    let fd = socket.add_to_fd_table(cloexec).map(|fd| fd as isize)?;
    debug!("sys_accept => fd: {fd}, addr: {remote_addr:?}");

    if !addr.is_null() {
        remote_addr.write_to_user(addr, addrlen.get_as_mut()?)?;
    }

    Ok(fd)
}

pub fn sys_shutdown(fd: i32, how: u32) -> AxResult<isize> {
    debug!("sys_shutdown <= fd: {fd}, how: {how:?}");

    let socket = Socket::from_fd(fd)?;
    let how = match how {
        SHUT_RD => Shutdown::Read,
        SHUT_WR => Shutdown::Write,
        SHUT_RDWR => Shutdown::Both,
        _ => return Err(AxError::InvalidInput),
    };
    socket.shutdown(how).map(|_| 0)
}

pub fn sys_socketpair(
    domain: u32,
    raw_ty: u32,
    proto: u32,
    fds: UserPtr<[i32; 2]>,
) -> AxResult<isize> {
    debug!("sys_socketpair <= domain: {domain}, ty: {raw_ty}, proto: {proto}");
    let ty = raw_ty & 0xFF;

    if domain != AF_UNIX {
        return Err(AxError::from(LinuxError::EAFNOSUPPORT));
    }

    let pid = current().as_thread().proc_data.proc.pid();
    let (sock1, sock2) = match ty {
        SOCK_STREAM => {
            let (sock1, sock2) = StreamTransport::new_pair(pid);
            (UnixSocket::new(sock1), UnixSocket::new(sock2))
        }
        SOCK_DGRAM | SOCK_SEQPACKET => {
            let (sock1, sock2) = DgramTransport::new_pair(pid);
            (UnixSocket::new(sock1), UnixSocket::new(sock2))
        }
        _ => {
            warn!("Unsupported socketpair type: {ty}");
            return Err(AxError::from(LinuxError::ESOCKTNOSUPPORT));
        }
    };
    let sock1 = Socket::new(sock1.into(), AF_UNIX);
    let sock2 = Socket::new(sock2.into(), AF_UNIX);

    if raw_ty & O_NONBLOCK != 0 {
        sock1.set_nonblocking(true)?;
        sock2.set_nonblocking(true)?;
    }
    let cloexec = raw_ty & O_CLOEXEC != 0;

    *fds.get_as_mut()? = [
        sock1.add_to_fd_table(cloexec)?,
        sock2.add_to_fd_table(cloexec)?,
    ];
    Ok(0)
}