compio_driver/poll/
mod.rs

1#[cfg_attr(all(doc, docsrs), doc(cfg(all())))]
2#[allow(unused_imports)]
3pub use std::os::fd::{AsFd, AsRawFd, BorrowedFd, OwnedFd, RawFd};
4#[cfg(aio)]
5use std::ptr::NonNull;
6use std::{
7    collections::{HashMap, VecDeque},
8    io,
9    num::NonZeroUsize,
10    pin::Pin,
11    sync::Arc,
12    task::Poll,
13    time::Duration,
14};
15
16use compio_log::{instrument, trace};
17use crossbeam_queue::SegQueue;
18use polling::{Event, Events, Poller};
19
20use crate::{
21    AsyncifyPool, BufferPool, DriverType, Entry, Key, ProactorBuilder, op::Interest, syscall,
22};
23
24pub(crate) mod op;
25
26/// Abstraction of operations.
27pub trait OpCode {
28    /// Perform the operation before submit, and return [`Decision`] to
29    /// indicate whether submitting the operation to polling is required.
30    fn pre_submit(self: Pin<&mut Self>) -> io::Result<Decision>;
31
32    /// Get the operation type when an event is occurred.
33    fn op_type(self: Pin<&mut Self>) -> Option<OpType> {
34        None
35    }
36
37    /// Perform the operation after received corresponding
38    /// event. If this operation is blocking, the return value should be
39    /// [`Poll::Ready`].
40    fn operate(self: Pin<&mut Self>) -> Poll<io::Result<usize>>;
41}
42
43/// Result of [`OpCode::pre_submit`].
44#[non_exhaustive]
45pub enum Decision {
46    /// Instant operation, no need to submit
47    Completed(usize),
48    /// Async operation, needs to submit
49    Wait(WaitArg),
50    /// Blocking operation, needs to be spawned in another thread
51    Blocking,
52    /// AIO operation, needs to be spawned to the kernel.
53    #[cfg(aio)]
54    Aio(AioControl),
55}
56
57impl Decision {
58    /// Decide to wait for the given fd with the given interest.
59    pub fn wait_for(fd: RawFd, interest: Interest) -> Self {
60        Self::Wait(WaitArg { fd, interest })
61    }
62
63    /// Decide to wait for the given fd to be readable.
64    pub fn wait_readable(fd: RawFd) -> Self {
65        Self::wait_for(fd, Interest::Readable)
66    }
67
68    /// Decide to wait for the given fd to be writable.
69    pub fn wait_writable(fd: RawFd) -> Self {
70        Self::wait_for(fd, Interest::Writable)
71    }
72
73    /// Decide to spawn an AIO operation. `submit` is a method like `aio_read`.
74    #[cfg(aio)]
75    pub fn aio(
76        cb: &mut libc::aiocb,
77        submit: unsafe extern "C" fn(*mut libc::aiocb) -> i32,
78    ) -> Self {
79        Self::Aio(AioControl {
80            aiocbp: NonNull::from(cb),
81            submit,
82        })
83    }
84}
85
86/// Meta of polling operations.
87#[derive(Debug, Clone, Copy)]
88pub struct WaitArg {
89    /// The raw fd of the operation.
90    pub fd: RawFd,
91    /// The interest to be registered.
92    pub interest: Interest,
93}
94
95/// Meta of AIO operations.
96#[cfg(aio)]
97#[derive(Debug, Clone, Copy)]
98pub struct AioControl {
99    /// Pointer of the control block.
100    pub aiocbp: NonNull<libc::aiocb>,
101    /// The aio_* submit function.
102    pub submit: unsafe extern "C" fn(*mut libc::aiocb) -> i32,
103}
104
105#[derive(Debug, Default)]
106struct FdQueue {
107    read_queue: VecDeque<usize>,
108    write_queue: VecDeque<usize>,
109}
110
111impl FdQueue {
112    pub fn push_back_interest(&mut self, user_data: usize, interest: Interest) {
113        match interest {
114            Interest::Readable => self.read_queue.push_back(user_data),
115            Interest::Writable => self.write_queue.push_back(user_data),
116        }
117    }
118
119    pub fn push_front_interest(&mut self, user_data: usize, interest: Interest) {
120        match interest {
121            Interest::Readable => self.read_queue.push_front(user_data),
122            Interest::Writable => self.write_queue.push_front(user_data),
123        }
124    }
125
126    pub fn remove(&mut self, user_data: usize) {
127        self.read_queue.retain(|&k| k != user_data);
128        self.write_queue.retain(|&k| k != user_data);
129    }
130
131    pub fn event(&self) -> Event {
132        let mut event = Event::none(0);
133        if let Some(&key) = self.read_queue.front() {
134            event.readable = true;
135            event.key = key;
136        }
137        if let Some(&key) = self.write_queue.front() {
138            event.writable = true;
139            event.key = key;
140        }
141        event
142    }
143
144    pub fn pop_interest(&mut self, event: &Event) -> Option<(usize, Interest)> {
145        if event.readable {
146            if let Some(user_data) = self.read_queue.pop_front() {
147                return Some((user_data, Interest::Readable));
148            }
149        }
150        if event.writable {
151            if let Some(user_data) = self.write_queue.pop_front() {
152                return Some((user_data, Interest::Writable));
153            }
154        }
155        None
156    }
157}
158
159/// Represents the filter type of kqueue. `polling` crate doesn't expose such
160/// API, and we need to know about it when `cancel` is called.
161#[non_exhaustive]
162pub enum OpType {
163    /// The operation polls an fd.
164    Fd(RawFd),
165    /// The operation submits an AIO.
166    #[cfg(aio)]
167    Aio(NonNull<libc::aiocb>),
168}
169
170/// Low-level driver of polling.
171pub(crate) struct Driver {
172    events: Events,
173    poll: Arc<Poller>,
174    registry: HashMap<RawFd, FdQueue>,
175    pool: AsyncifyPool,
176    pool_completed: Arc<SegQueue<Entry>>,
177}
178
179impl Driver {
180    pub fn new(builder: &ProactorBuilder) -> io::Result<Self> {
181        instrument!(compio_log::Level::TRACE, "new", ?builder);
182        trace!("new poll driver");
183        let entries = builder.capacity as usize; // for the sake of consistency, use u32 like iour
184        let events = if entries == 0 {
185            Events::new()
186        } else {
187            Events::with_capacity(NonZeroUsize::new(entries).unwrap())
188        };
189
190        let poll = Arc::new(Poller::new()?);
191
192        Ok(Self {
193            events,
194            poll,
195            registry: HashMap::new(),
196            pool: builder.create_or_get_thread_pool(),
197            pool_completed: Arc::new(SegQueue::new()),
198        })
199    }
200
201    pub fn driver_type(&self) -> DriverType {
202        DriverType::Poll
203    }
204
205    pub fn create_op<T: crate::sys::OpCode + 'static>(&self, op: T) -> Key<T> {
206        Key::new(self.as_raw_fd(), op)
207    }
208
209    /// # Safety
210    /// The input fd should be valid.
211    unsafe fn submit(&mut self, user_data: usize, arg: WaitArg) -> io::Result<()> {
212        let need_add = !self.registry.contains_key(&arg.fd);
213        let queue = self.registry.entry(arg.fd).or_default();
214        queue.push_back_interest(user_data, arg.interest);
215        let event = queue.event();
216        if need_add {
217            self.poll.add(arg.fd, event)?;
218        } else {
219            let fd = BorrowedFd::borrow_raw(arg.fd);
220            self.poll.modify(fd, event)?;
221        }
222        Ok(())
223    }
224
225    fn renew(
226        poll: &Poller,
227        registry: &mut HashMap<RawFd, FdQueue>,
228        fd: BorrowedFd,
229        renew_event: Event,
230    ) -> io::Result<()> {
231        if !renew_event.readable && !renew_event.writable {
232            poll.delete(fd)?;
233            registry.remove(&fd.as_raw_fd());
234        } else {
235            poll.modify(fd, renew_event)?;
236        }
237        Ok(())
238    }
239
240    pub fn attach(&mut self, _fd: RawFd) -> io::Result<()> {
241        Ok(())
242    }
243
244    pub fn cancel(&mut self, op: &mut Key<dyn crate::sys::OpCode>) {
245        let op_pin = op.as_op_pin();
246        match op_pin.op_type() {
247            None => {}
248            Some(OpType::Fd(fd)) => {
249                let queue = self
250                    .registry
251                    .get_mut(&fd)
252                    .expect("the fd should be attached");
253                queue.remove(op.user_data());
254                let renew_event = queue.event();
255                if Self::renew(
256                    &self.poll,
257                    &mut self.registry,
258                    unsafe { BorrowedFd::borrow_raw(fd) },
259                    renew_event,
260                )
261                .is_ok()
262                {
263                    self.pool_completed.push(entry_cancelled(op.user_data()));
264                }
265            }
266            #[cfg(aio)]
267            Some(OpType::Aio(aiocbp)) => {
268                let aiocb = unsafe { aiocbp.as_ref() };
269                let fd = aiocb.aio_fildes;
270                syscall!(libc::aio_cancel(fd, aiocbp.as_ptr())).ok();
271            }
272        }
273    }
274
275    pub fn push(&mut self, op: &mut Key<dyn crate::sys::OpCode>) -> Poll<io::Result<usize>> {
276        instrument!(compio_log::Level::TRACE, "push", ?op);
277        let user_data = op.user_data();
278        let op_pin = op.as_op_pin();
279        match op_pin.pre_submit()? {
280            Decision::Wait(arg) => {
281                // SAFETY: fd is from the OpCode.
282                unsafe {
283                    self.submit(user_data, arg)?;
284                }
285                trace!("register {:?}", arg);
286                Poll::Pending
287            }
288            Decision::Completed(res) => Poll::Ready(Ok(res)),
289            Decision::Blocking => self.push_blocking(user_data),
290            #[cfg(aio)]
291            Decision::Aio(AioControl { mut aiocbp, submit }) => {
292                let aiocb = unsafe { aiocbp.as_mut() };
293                #[cfg(freebsd)]
294                {
295                    // sigev_notify_kqueue
296                    aiocb.aio_sigevent.sigev_signo = self.poll.as_raw_fd();
297                    aiocb.aio_sigevent.sigev_notify = libc::SIGEV_KEVENT;
298                    aiocb.aio_sigevent.sigev_value.sival_ptr = user_data as _;
299                }
300                #[cfg(solarish)]
301                let mut notify = libc::port_notify {
302                    portnfy_port: self.poll.as_raw_fd(),
303                    portnfy_user: user_data as _,
304                };
305                #[cfg(solarish)]
306                {
307                    aiocb.aio_sigevent.sigev_notify = libc::SIGEV_PORT;
308                    aiocb.aio_sigevent.sigev_value.sival_ptr = &mut notify as *mut _ as _;
309                }
310                match syscall!(submit(aiocbp.as_ptr())) {
311                    Ok(_) => Poll::Pending,
312                    // FreeBSD:
313                    //   * EOPNOTSUPP: It's on a filesystem without AIO support. Just fallback to
314                    //     blocking IO.
315                    //   * EAGAIN: The process-wide queue is full. No safe way to remove the (maybe)
316                    //     dead entries.
317                    // Solarish:
318                    //   * EAGAIN: Allocation failed.
319                    Err(e)
320                        if matches!(
321                            e.raw_os_error(),
322                            Some(libc::EOPNOTSUPP) | Some(libc::EAGAIN)
323                        ) =>
324                    {
325                        self.push_blocking(user_data)
326                    }
327                    Err(e) => Poll::Ready(Err(e)),
328                }
329            }
330        }
331    }
332
333    fn push_blocking(&mut self, user_data: usize) -> Poll<io::Result<usize>> {
334        let poll = self.poll.clone();
335        let completed = self.pool_completed.clone();
336        let mut closure = move || {
337            let mut op = unsafe { Key::<dyn crate::sys::OpCode>::new_unchecked(user_data) };
338            let op_pin = op.as_op_pin();
339            let res = match op_pin.operate() {
340                Poll::Pending => unreachable!("this operation is not non-blocking"),
341                Poll::Ready(res) => res,
342            };
343            completed.push(Entry::new(user_data, res));
344            poll.notify().ok();
345        };
346        loop {
347            match self.pool.dispatch(closure) {
348                Ok(()) => return Poll::Pending,
349                Err(e) => {
350                    closure = e.0;
351                    self.poll_blocking();
352                }
353            }
354        }
355    }
356
357    fn poll_blocking(&mut self) -> bool {
358        if self.pool_completed.is_empty() {
359            return false;
360        }
361        while let Some(entry) = self.pool_completed.pop() {
362            unsafe {
363                entry.notify();
364            }
365        }
366        true
367    }
368
369    pub unsafe fn poll(&mut self, timeout: Option<Duration>) -> io::Result<()> {
370        instrument!(compio_log::Level::TRACE, "poll", ?timeout);
371        if self.poll_blocking() {
372            return Ok(());
373        }
374        self.events.clear();
375        self.poll.wait(&mut self.events, timeout)?;
376        if self.events.is_empty() && timeout.is_some() {
377            return Err(io::Error::from_raw_os_error(libc::ETIMEDOUT));
378        }
379        for event in self.events.iter() {
380            let user_data = event.key;
381            trace!("receive {} for {:?}", user_data, event);
382            let mut op = Key::<dyn crate::sys::OpCode>::new_unchecked(user_data);
383            let op = op.as_op_pin();
384            match op.op_type() {
385                None => {
386                    // On epoll, multiple event may be received even if it is registered as
387                    // one-shot. It is safe to ignore it.
388                    trace!("op {} is completed", user_data);
389                }
390                Some(OpType::Fd(fd)) => {
391                    // If it's an FD op, the returned user_data is only for calling `op_type`. We
392                    // need to pop the real user_data from the queue.
393                    let queue = self
394                        .registry
395                        .get_mut(&fd)
396                        .expect("the fd should be attached");
397                    if let Some((user_data, interest)) = queue.pop_interest(&event) {
398                        let mut op = Key::<dyn crate::sys::OpCode>::new_unchecked(user_data);
399                        let op = op.as_op_pin();
400                        let res = match op.operate() {
401                            Poll::Pending => {
402                                // The operation should go back to the front.
403                                queue.push_front_interest(user_data, interest);
404                                None
405                            }
406                            Poll::Ready(res) => Some(res),
407                        };
408                        if let Some(res) = res {
409                            Entry::new(user_data, res).notify();
410                        }
411                    }
412                    let renew_event = queue.event();
413                    Self::renew(
414                        &self.poll,
415                        &mut self.registry,
416                        BorrowedFd::borrow_raw(fd),
417                        renew_event,
418                    )?;
419                }
420                #[cfg(aio)]
421                Some(OpType::Aio(aiocbp)) => {
422                    let err = unsafe { libc::aio_error(aiocbp.as_ptr()) };
423                    let res = match err {
424                        // If the user_data is reused but the previously registered event still
425                        // emits (for example, HUP in epoll; however it is impossible now
426                        // because we only use AIO on FreeBSD), we'd better ignore the current
427                        // one and wait for the real event.
428                        libc::EINPROGRESS => {
429                            trace!("op {} is not completed", user_data);
430                            continue;
431                        }
432                        libc::ECANCELED => {
433                            // Remove the aiocb from kqueue.
434                            libc::aio_return(aiocbp.as_ptr());
435                            Err(io::Error::from_raw_os_error(libc::ETIMEDOUT))
436                        }
437                        _ => syscall!(libc::aio_return(aiocbp.as_ptr())).map(|res| res as usize),
438                    };
439                    Entry::new(user_data, res).notify();
440                }
441            }
442        }
443        Ok(())
444    }
445
446    pub fn handle(&self) -> NotifyHandle {
447        NotifyHandle::new(self.poll.clone())
448    }
449
450    pub fn create_buffer_pool(
451        &mut self,
452        buffer_len: u16,
453        buffer_size: usize,
454    ) -> io::Result<BufferPool> {
455        #[cfg(fusion)]
456        {
457            Ok(BufferPool::new_poll(crate::FallbackBufferPool::new(
458                buffer_len,
459                buffer_size,
460            )))
461        }
462        #[cfg(not(fusion))]
463        {
464            Ok(BufferPool::new(buffer_len, buffer_size))
465        }
466    }
467
468    /// # Safety
469    ///
470    /// caller must make sure release the buffer pool with correct driver
471    pub unsafe fn release_buffer_pool(&mut self, _: BufferPool) -> io::Result<()> {
472        Ok(())
473    }
474}
475
476impl AsRawFd for Driver {
477    fn as_raw_fd(&self) -> RawFd {
478        self.poll.as_raw_fd()
479    }
480}
481
482impl Drop for Driver {
483    fn drop(&mut self) {
484        for fd in self.registry.keys() {
485            unsafe {
486                let fd = BorrowedFd::borrow_raw(*fd);
487                self.poll.delete(fd).ok();
488            }
489        }
490    }
491}
492
493fn entry_cancelled(user_data: usize) -> Entry {
494    Entry::new(
495        user_data,
496        Err(io::Error::from_raw_os_error(libc::ETIMEDOUT)),
497    )
498}
499
500/// A notify handle to the inner driver.
501pub struct NotifyHandle {
502    poll: Arc<Poller>,
503}
504
505impl NotifyHandle {
506    fn new(poll: Arc<Poller>) -> Self {
507        Self { poll }
508    }
509
510    /// Notify the inner driver.
511    pub fn notify(&self) -> io::Result<()> {
512        self.poll.notify()
513    }
514}