//! Events can be used to track status and dependencies, as well as to measure
//! the duration of work submitted to a CUDA stream.
//!
//! In CUDA, most work is performed asynchronously. Events help to manage tasks
//! scheduled on an asynchronous stream. This includes waiting for a task (or
//! multiple tasks) to complete, and measuring the time duration it takes to
//! complete a task. Events can also be used to sequence tasks on multiple
//! streams within the same context by specifying dependent tasks (not supported
//! yet by RustaCUDA).
//!
//! Events may be reused multiple times.

// TODO: I'm not sure that these events are/can be safe by Rust's model of safety; they inherently
// create state which can be mutated even while an immutable borrow is held.

use crate::error::{CudaError, CudaResult, DropResult, ToResult};
use crate::stream::Stream;
use cuda_driver_sys::{
    cuEventCreate, cuEventDestroy_v2, cuEventElapsedTime, cuEventQuery, cuEventRecord,
    cuEventSynchronize, CUevent,
};

use std::mem;
use std::ptr;

bitflags! {
    /// Bit flags for configuring a CUDA Event.
    ///
    /// The CUDA documentation claims that setting `DISABLE_TIMING` and `BLOCKING_SYNC` provides
    /// the best performance for `query()` and `stream.wait_event()`.
    pub struct EventFlags: u32 {
        /// The default event creation flag.
        const DEFAULT = 0x0;

        /// Specify that the created event should busy-wait on blocking
        /// function calls.
        const BLOCKING_SYNC = 0x1;

        /// Specify that the created event does not need to record timing data.
        const DISABLE_TIMING = 0x2;

        /// Specify that the created event may be used as an interprocess event.
        /// (not supported yet by RustaCUDA). This flag requires
        /// `DISABLE_TIMING` to be set as well.
        const INTERPROCESS = 0x4;
    }
}

/// Status enum that represents the current status of an event.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum EventStatus {
    /// Ready indicates that all work captured by the event has been completed.
    ///
    /// The CUDA documentation states that for Unified Memory, `EventStatus::Ready` is
    /// equivalent to having called `Event::synchronize`.
    Ready,

    /// `EventStatus::NotReady` indicates that the work captured by the event is still
    /// incomplete.
    NotReady,
}

/// An event to track work submitted to a stream.
///
/// See the module-level documentation for more information.
#[derive(Debug)]
pub struct Event(CUevent);

impl Event {
    /// Create a new event with the specified flags.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// // With default settings
    /// let event = Event::new(EventFlags::DEFAULT)?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn new(flags: EventFlags) -> CudaResult<Self> {
        unsafe {
            let mut event: CUevent = mem::zeroed();
            cuEventCreate(&mut event, flags.bits()).to_result()?;
            Ok(Event(event))
        }
    }

    /// Add the event to the given stream of work. The event will be completed when the stream
    /// completes all previously-submitted work and reaches the event in the queue.
    ///
    /// This function is used together with `query`, `synchronize`, and
    /// `elapsed_time_f32`. See the respective functions for more information.
    ///
    /// If the event is created with `EventFlags::BLOCKING_SYNC`, then `record`
    /// blocks until the event has actually been recorded.
    ///
    /// # Errors
    ///
    /// If the event and stream are not from the same context, an error is
    /// returned.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use rustacuda::stream::{Stream, StreamFlags};
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
    /// let event = Event::new(EventFlags::DEFAULT)?;
    ///
    /// // submit some work ...
    ///
    /// event.record(&stream)?;
    /// # Ok(())
    /// }
    /// ```
    pub fn record(&self, stream: &Stream) -> CudaResult<()> {
        unsafe {
            cuEventRecord(self.0, stream.as_inner()).to_result()?;
            Ok(())
        }
    }

    /// Return whether the stream this event was recorded on (see `record`) has processed this event
    /// yet or not. A return value of `EventStatus::Ready` indicates that all work submitted before
    /// the event has been completed.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use rustacuda::stream::{Stream, StreamFlags};
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags, EventStatus};
    ///
    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
    /// let event = Event::new(EventFlags::DEFAULT)?;
    ///
    /// // do some work ...
    ///
    /// // record an event
    /// event.record(&stream)?;
    ///
    /// // ... wait some time ...
    /// # event.synchronize()?;
    ///
    /// // query if the work is finished
    /// let status = event.query()?;
    /// assert_eq!(status, EventStatus::Ready);
    /// # Ok(())
    /// }
    /// ```
    pub fn query(&self) -> CudaResult<EventStatus> {
        let result = unsafe { cuEventQuery(self.0).to_result() };

        match result {
            Ok(()) => Ok(EventStatus::Ready),
            Err(CudaError::NotReady) => Ok(EventStatus::NotReady),
            Err(other) => Err(other),
        }
    }

    /// Wait for an event to complete.
    ///
    /// Blocks thread execution until all work submitted before the event was
    /// recorded has completed. `EventFlags::BLOCKING_SYNC` controls the mode of
    /// blocking. If the flag is set on event creation, the thread will sleep.
    /// Otherwise, the thread will busy-wait.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use rustacuda::stream::{Stream, StreamFlags};
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
    /// let event = Event::new(EventFlags::DEFAULT)?;
    ///
    /// // do some work ...
    ///
    /// // record an event
    /// event.record(&stream)?;
    ///
    /// // wait until the work is finished
    /// event.synchronize()?;
    /// # Ok(())
    /// }
    /// ```
    pub fn synchronize(&self) -> CudaResult<()> {
        unsafe {
            cuEventSynchronize(self.0).to_result()?;
            Ok(())
        }
    }

    /// Return the duration between two events.
    ///
    /// The duration is computed in milliseconds with a resolution of
    /// approximately 0.5 microseconds. This can be used to measure the duration of work
    /// queued in between the two events.
    ///
    /// # Errors
    ///
    /// `CudaError::NotReady` is returned if either event is not yet complete.
    ///
    /// `CudaError::InvalidHandle` is returned if
    /// - the two events are not from the same context, or if
    /// - `record` has not been called on either event, or if
    /// - the `DISABLE_TIMING` flag is set on either event.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::quick_init;
    /// # use rustacuda::stream::{Stream, StreamFlags};
    /// # use rustacuda::launch;
    /// # use rustacuda::module::Module;
    /// # use rustacuda::memory::DeviceBox;
    /// # use std::error::Error;
    /// # use std::ffi::CString;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// # let module_data = CString::new(include_str!("../resources/add.ptx"))?;
    /// # let module = Module::load_from_string(&module_data)?;
    /// # let mut x = DeviceBox::new(&10.0f32)?;
    /// # let mut y = DeviceBox::new(&20.0f32)?;
    /// # let mut result = DeviceBox::new(&0.0f32)?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
    /// let start_event = Event::new(EventFlags::DEFAULT)?;
    /// let stop_event = Event::new(EventFlags::DEFAULT)?;
    ///
    /// // start recording time
    /// start_event.record(&stream)?;
    ///
    /// // do some work ...
    /// # unsafe {
    /// #    launch!(module.sum<<<1, 1, 0, stream>>>(
    /// #            x.as_device_ptr(),
    /// #            y.as_device_ptr(),
    /// #            result.as_device_ptr(),
    /// #            1 // Length
    /// #            ))?;
    /// # }
    ///
    /// // stop recording time
    /// stop_event.record(&stream)?;
    ///
    /// // wait for the work to complete
    /// stop_event.synchronize()?;
    ///
    /// // compute the time elapsed between the start and stop events
    /// let time = stop_event.elapsed_time_f32(&start_event)?;
    ///
    /// # assert!(time > 0.0);
    /// # Ok(())
    /// }
    /// ```
    pub fn elapsed_time_f32(&self, start: &Self) -> CudaResult<f32> {
        unsafe {
            let mut millis: f32 = 0.0;
            cuEventElapsedTime(&mut millis, start.0, self.0).to_result()?;
            Ok(millis)
        }
    }

    // Get the inner `CUevent` from the `Event`.
    //
    // Necessary for certain CUDA functions outside of this
    // module that expect a bare `CUevent`.
    pub(crate) fn as_inner(&self) -> CUevent {
        self.0
    }

    /// Destroy an `Event` returning an error.
    ///
    /// Destroying an event can return errors from previous asynchronous work.
    /// This function destroys the given event and returns the error and the
    /// un-destroyed event on failure.
    ///
    /// # Example
    ///
    /// ```
    /// # use rustacuda::*;
    /// # use std::error::Error;
    /// # fn main() -> Result<(), Box<dyn Error>> {
    /// # let _context = quick_init()?;
    /// use rustacuda::event::{Event, EventFlags};
    ///
    /// let event = Event::new(EventFlags::DEFAULT)?;
    /// match Event::drop(event) {
    ///     Ok(()) => println!("Successfully destroyed"),
    ///     Err((cuda_error, event)) => {
    ///         println!("Failed to destroy event: {:?}", cuda_error);
    ///         // Do something with event
    ///     },
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn drop(mut event: Event) -> DropResult<Event> {
        if event.0.is_null() {
            return Ok(());
        }

        unsafe {
            let inner = mem::replace(&mut event.0, ptr::null_mut());
            match cuEventDestroy_v2(inner).to_result() {
                Ok(()) => {
                    mem::forget(event);
                    Ok(())
                }
                Err(e) => Err((e, Event(inner))),
            }
        }
    }
}

impl Drop for Event {
    fn drop(&mut self) {
        unsafe { cuEventDestroy_v2(self.0) }
            .to_result()
            .expect("Failed to destroy CUDA event");
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::quick_init;
    use crate::stream::StreamFlags;
    use std::error::Error;

    #[test]
    fn test_new_with_flags() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let _event = Event::new(EventFlags::BLOCKING_SYNC | EventFlags::DISABLE_TIMING)?;
        Ok(())
    }

    #[test]
    fn test_record_with_wrong_context() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let _new_context = quick_init()?;
        let event = Event::new(EventFlags::DEFAULT)?;
        let result = event.record(&stream);
        assert_eq!(result, Err(CudaError::InvalidHandle));
        Ok(())
    }

    #[test]
    fn test_elapsed_time_f32_with_wrong_context() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let fst_event = Event::new(EventFlags::DEFAULT)?;
        fst_event.record(&fst_stream)?;

        let _context = quick_init()?;
        let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let snd_event = Event::new(EventFlags::DEFAULT)?;
        snd_event.record(&snd_stream)?;

        fst_event.synchronize()?;
        snd_event.synchronize()?;
        let result = snd_event.elapsed_time_f32(&fst_event);
        assert_eq!(result, Err(CudaError::InvalidHandle));
        Ok(())
    }

    #[test]
    fn test_elapsed_time_f32_with_different_streams() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let fst_event = Event::new(EventFlags::DEFAULT)?;
        fst_event.record(&fst_stream)?;

        let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
        let snd_event = Event::new(EventFlags::DEFAULT)?;
        snd_event.record(&snd_stream)?;

        fst_event.synchronize()?;
        snd_event.synchronize()?;
        let _result = snd_event.elapsed_time_f32(&fst_event)?;
        Ok(())
    }

    #[test]
    fn test_elapsed_time_f32_with_disable_timing() -> Result<(), Box<dyn Error>> {
        let _context = quick_init()?;
        let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;

        let start_event = Event::new(EventFlags::DISABLE_TIMING)?;
        start_event.record(&stream)?;

        let stop_event = Event::new(EventFlags::DEFAULT)?;
        stop_event.record(&stream)?;

        stop_event.synchronize()?;
        let result = stop_event.elapsed_time_f32(&start_event);
        assert_eq!(result, Err(CudaError::InvalidHandle));
        Ok(())
    }
}