#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/sysinfo.h>
#include <poll.h>
#include <assert.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <linux/mman.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <limits.h>
#include "helpers.h"
#include "liburing.h"
#include "../src/syscall.h"
static int pagesize;
static rlim_t mlock_limit;
static int devnull;
static int expect_fail(int fd, unsigned int opcode, void *arg,
unsigned int nr_args, int error, int error2)
{
int ret;
ret = io_uring_register(fd, opcode, arg, nr_args);
if (ret >= 0) {
int ret2 = 0;
fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
if (opcode == IORING_REGISTER_BUFFERS) {
ret2 = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
0, 0);
} else if (opcode == IORING_REGISTER_FILES) {
ret2 = io_uring_register(fd, IORING_UNREGISTER_FILES, 0,
0);
}
if (ret2) {
fprintf(stderr, "internal error: failed to unregister\n");
exit(1);
}
return 1;
}
if (ret != error && (error2 && ret != error2)) {
fprintf(stderr, "expected %d/%d, got %d\n", error, error2, ret);
return 1;
}
return 0;
}
static int new_io_uring(int entries, struct io_uring_params *p)
{
int fd;
fd = io_uring_setup(entries, p);
if (fd < 0) {
perror("io_uring_setup");
exit(1);
}
return fd;
}
#define MAXFDS (UINT_MAX * sizeof(int))
static void *map_filebacked(size_t size)
{
int fd, ret;
void *addr;
char template[32] = "io_uring_register-test-XXXXXXXX";
fd = mkstemp(template);
if (fd < 0) {
perror("mkstemp");
return NULL;
}
unlink(template);
ret = ftruncate(fd, size);
if (ret < 0) {
perror("ftruncate");
close(fd);
return NULL;
}
addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (addr == MAP_FAILED) {
perror("mmap");
close(fd);
return NULL;
}
close(fd);
return addr;
}
static int test_max_fds(int uring_fd)
{
int status = 1;
int ret;
void *fd_as;
int fdtable_fd;
int io_fd;
int *fds;
char template[32] = "io_uring_register-test-XXXXXXXX";
unsigned long long i, nr_maps, nr_fds;
fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (fd_as == MAP_FAILED) {
if (errno == ENOMEM)
return 0;
perror("mmap fd_as");
exit(1);
}
fdtable_fd = mkstemp(template);
if (fdtable_fd < 0) {
perror("mkstemp");
exit(1);
}
unlink(template);
ret = ftruncate(fdtable_fd, 128*1024*1024);
if (ret < 0) {
perror("ftruncate");
exit(1);
}
io_fd = open("/dev/null", O_RDWR);
if (io_fd < 0) {
perror("open /dev/null");
exit(1);
}
fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
if (fds == MAP_FAILED) {
perror("mmap fdtable");
exit(1);
}
nr_fds = 128*1024*1024 / sizeof(int);
for (i = 0; i < nr_fds; i++)
fds[i] = io_fd;
nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
for (i = 0; i < nr_maps; i++) {
fds = &fds[nr_fds];
fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
if (fds == MAP_FAILED) {
fprintf(stderr, "mmap failed at offset %lu\n",
(unsigned long)((char *)fd_as - (char *)fds));
exit(1);
}
}
nr_fds = UINT_MAX;
while (nr_fds) {
ret = io_uring_register(uring_fd, IORING_REGISTER_FILES, fd_as,
nr_fds);
if (ret != 0) {
nr_fds /= 2;
continue;
}
status = 0;
ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0);
if (ret < 0) {
errno = -ret;
perror("io_uring_register UNREGISTER_FILES");
exit(1);
}
break;
}
close(io_fd);
close(fdtable_fd);
ret = munmap(fd_as, UINT_MAX * sizeof(int));
if (ret != 0) {
fprintf(stderr, "munmap(%zu) failed\n", UINT_MAX * sizeof(int));
exit(1);
}
return status;
}
static int test_memlock_exceeded(int fd)
{
int ret;
void *buf;
struct iovec iov;
if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL)
return 0;
iov.iov_len = mlock_limit * 2;
buf = t_malloc(iov.iov_len);
iov.iov_base = buf;
while (iov.iov_len) {
ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
if (ret == -ENOMEM) {
iov.iov_len /= 2;
continue;
} else if (ret == -EFAULT) {
free(buf);
return 0;
} else if (ret) {
fprintf(stderr, "expected success or EFAULT, got %d\n", ret);
free(buf);
return 1;
}
ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0);
if (ret != 0) {
fprintf(stderr, "error: unregister failed with %d\n", ret);
free(buf);
return 1;
}
break;
}
if (!iov.iov_len)
printf("Unable to register buffers. Check memlock rlimit.\n");
free(buf);
return 0;
}
static int test_iovec_nr(int fd)
{
int i, ret, status = 0;
unsigned int nr = 1000000;
struct iovec *iovs;
void *buf;
iovs = malloc(nr * sizeof(struct iovec));
if (!iovs) {
fprintf(stdout, "can't allocate iovecs, skip\n");
return 0;
}
buf = t_malloc(pagesize);
for (i = 0; i < nr; i++) {
iovs[i].iov_base = buf;
iovs[i].iov_len = pagesize;
}
status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL, 0);
nr = UIO_MAXIOV;
ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
if ((ret == -ENOMEM || ret == -EPERM) && geteuid()) {
fprintf(stderr, "can't register large iovec for regular users, skip\n");
} else if (ret != 0) {
fprintf(stderr, "expected success, got %d\n", ret);
status = 1;
} else {
io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
}
free(buf);
free(iovs);
return status;
}
static int test_iovec_size(int fd)
{
unsigned int status = 0;
int ret;
struct iovec iov;
void *buf;
iov.iov_base = 0;
iov.iov_len = 4096;
status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
iov.iov_base = &buf;
iov.iov_len = 0;
status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
assert(buf != MAP_FAILED);
ret = munmap(buf + pagesize, pagesize);
assert(ret == 0);
iov.iov_base = buf;
iov.iov_len = 2 * pagesize;
status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
munmap(buf, pagesize);
buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
-1, 0);
if (buf == MAP_FAILED) {
printf("Unable to map a huge page. Try increasing "
"/proc/sys/vm/nr_hugepages by at least 1.\n");
printf("Skipping the hugepage test\n");
} else {
iov.iov_base = buf;
iov.iov_len = 2*1024*1024;
ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
if (ret < 0) {
if (ret == -ENOMEM)
printf("Unable to test registering of a huge "
"page. Try increasing the "
"RLIMIT_MEMLOCK resource limit by at "
"least 2MB.");
else {
fprintf(stderr, "expected success, got %d\n", ret);
status = 1;
}
} else {
ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
0, 0);
if (ret < 0) {
fprintf(stderr, "io_uring_unregister: %s\n",
strerror(-ret));
status = 1;
}
}
}
ret = munmap(iov.iov_base, iov.iov_len);
assert(ret == 0);
buf = map_filebacked(2*1024*1024);
if (!buf)
status = 1;
iov.iov_base = buf;
iov.iov_len = 2*1024*1024;
status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, -EOPNOTSUPP);
munmap(buf, 2*1024*1024);
if (getuid() != 0)
status |= test_memlock_exceeded(fd);
return status;
}
static int ioring_poll(struct io_uring *ring, int fd, int fixed)
{
int ret;
struct io_uring_sqe *sqe;
struct io_uring_cqe *cqe;
sqe = io_uring_get_sqe(ring);
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_POLL_ADD;
if (fixed)
sqe->flags = IOSQE_FIXED_FILE;
sqe->fd = fd;
sqe->poll_events = POLLIN|POLLOUT;
ret = io_uring_submit(ring);
if (ret != 1) {
fprintf(stderr, "failed to submit poll sqe: %d.\n", ret);
return 1;
}
ret = io_uring_wait_cqe(ring, &cqe);
if (ret < 0) {
fprintf(stderr, "io_uring_wait_cqe failed with %d\n", ret);
return 1;
}
ret = 0;
if (!(cqe->res & POLLOUT)) {
fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
POLLOUT, cqe->res);
ret = 1;
}
io_uring_cqe_seen(ring, cqe);
return ret;
}
static int test_poll_ringfd(void)
{
int status = 0;
int ret;
int fd;
struct io_uring ring;
ret = io_uring_queue_init(1, &ring, 0);
if (ret) {
perror("io_uring_queue_init");
return 1;
}
fd = ring.ring_fd;
status = ioring_poll(&ring, fd, 0);
status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF, 0);
io_uring_queue_exit(&ring);
return status;
}
int main(int argc, char **argv)
{
int fd, ret;
unsigned int status = 0;
struct io_uring_params p;
struct rlimit rlim;
if (argc > 1)
return T_EXIT_SKIP;
pagesize = getpagesize();
ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
if (ret < 0) {
perror("getrlimit");
return T_EXIT_PASS;
}
mlock_limit = rlim.rlim_cur;
devnull = open("/dev/null", O_RDWR);
if (devnull < 0) {
perror("open /dev/null");
exit(T_EXIT_FAIL);
}
status |= expect_fail(-1, 0, NULL, 0, -EBADF, 0);
status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP, 0);
memset(&p, 0, sizeof(p));
fd = new_io_uring(1, &p);
ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL, 0);
if (ret) {
close(fd);
fd = new_io_uring(1, &p);
}
status |= test_iovec_size(fd);
status |= test_iovec_nr(fd);
status |= test_max_fds(fd);
close(fd);
status |= test_poll_ringfd();
if (status)
fprintf(stderr, "FAIL\n");
return status;
}