warp-types 0.3.2

//! Implicit vs Explicit Merge
//!
//! **STATUS: Research** — Exploratory prototype, not promoted to main API.
//!
//! Research question: "Should merge be implicit at block boundaries (like ISPC)?"
//!
//! # Background
//!
//! ISPC (Intel SPMD Program Compiler) has implicit reconvergence:
//! ```c
//! if (condition) {
//!     // Diverged code
//! }
//! // Implicit reconvergence here - all lanes active again
//! ```
//!
//! Our system has explicit merge:
//! ```rust,ignore
//! let (then_warp, else_warp) = warp.diverge(pred);
//! // ... use then_warp and else_warp ...
//! let merged = merge(then_warp, else_warp);  // Explicit!
//! ```
//!
//! Which is better?
//!
//! # Analysis
//!
//! ## Implicit Merge (ISPC-style)
//!
//! Pros:
//! - Familiar syntax (looks like normal if/else)
//! - Impossible to forget merge
//! - Less boilerplate
//!
//! Cons:
//! - Reconvergence point is implicit (harder to reason about)
//! - No control over WHEN merge happens
//! - Can't compose diverged branches across function boundaries
//! - Must track active mask in shadow state (not visible in types)
//!
//! ## Explicit Merge (Our approach)
//!
//! Pros:
//! - Reconvergence is visible in code and types
//! - Can delay merge (useful for some algorithms)
//! - Composable across function boundaries
//! - No hidden state
//!
//! Cons:
//! - More boilerplate
//! - Possible to forget merge (but type system catches it!)
//! - Less familiar syntax
//!
//! # Key Insight
//!
//! ISPC's implicit merge works because ISPC targets CPU SIMD where:
//! - Reconvergence is always at block end (structured control flow)
//! - No shuffle/reduce operations that require all lanes
//! - The "mask" is a runtime value, not type state
//!
//! GPU warps are different:
//! - shuffle() requires all lanes - must know reconvergence precisely
//! - Unstructured control flow exists (early return, break, continue)
//! - Reconvergence bugs are deadlocks, not just wrong values
//!
//! # Conclusion
//!
//! For GPU warp programming, EXPLICIT merge is better because:
//! 1. shuffle/reduce operations NEED type-level active set tracking
//! 2. Deadlock bugs are worse than forgetting a merge call
//! 3. Explicit is composable; implicit requires structured control flow
//!
//! However, we can provide SUGAR for common patterns!

use std::marker::PhantomData;

// ============================================================================
// BASIC TYPES
// ============================================================================

pub trait ActiveSet: Copy + 'static {
    const MASK: u32;
}

pub trait ComplementOf<T>: ActiveSet {}

#[derive(Copy, Clone)]
pub struct All;
impl ActiveSet for All {
    const MASK: u32 = 0xFFFFFFFF;
}

#[derive(Copy, Clone)]
pub struct Even;
impl ActiveSet for Even {
    const MASK: u32 = 0x55555555;
}

#[derive(Copy, Clone)]
pub struct Odd;
impl ActiveSet for Odd {
    const MASK: u32 = 0xAAAAAAAA;
}

impl ComplementOf<Odd> for Even {}
impl ComplementOf<Even> for Odd {}

#[derive(Copy, Clone)]
pub struct Warp<S: ActiveSet> {
    _marker: PhantomData<S>,
}

impl<S: ActiveSet> Warp<S> {
    pub fn new() -> Self {
        Warp {
            _marker: PhantomData,
        }
    }
}

// ============================================================================
// EXPLICIT MERGE (Our approach)
// ============================================================================

pub mod explicit {
    use super::*;

    /// Diverge into complementary sets
    pub fn diverge<S1, S2>(_warp: Warp<All>) -> (Warp<S1>, Warp<S2>)
    where
        S1: ActiveSet + ComplementOf<S2>,
        S2: ActiveSet + ComplementOf<S1>,
    {
        (Warp::new(), Warp::new())
    }

    /// Merge complementary sets back to All
    pub fn merge<S1, S2>(_w1: Warp<S1>, _w2: Warp<S2>) -> Warp<All>
    where
        S1: ActiveSet + ComplementOf<S2>,
        S2: ActiveSet + ComplementOf<S1>,
    {
        Warp::new()
    }

    /// Example: explicit diverge/merge
    ///
    /// Note: In this research prototype, `Warp<S>` is `Copy`, so the merge
    /// is not enforced by move semantics. The promoted version in `src/warp.rs`
    /// uses non-Copy warps where `diverge` consumes the original, making
    /// merge genuinely required.
    pub fn example_explicit() {
        let warp: Warp<All> = Warp::new();

        // Explicit diverge
        let (even, odd): (Warp<Even>, Warp<Odd>) = diverge(warp);

        // Do work on each half
        let _ = process_even(even);
        let _ = process_odd(odd);

        // Explicit merge - required by protocol (enforced via move semantics
        // in the promoted version; here Warp is Copy so this is illustrative)
        let _merged: Warp<All> = merge(even, odd);
    }

    fn process_even(_w: Warp<Even>) -> i32 {
        1
    }
    fn process_odd(_w: Warp<Odd>) -> i32 {
        2
    }

    #[cfg(test)]
    mod tests {
        use super::*;

        #[test]
        fn test_explicit_diverge_merge() {
            example_explicit();
        }
    }
}

// ============================================================================
// SYNTACTIC SUGAR: with_diverged combinator
// ============================================================================

/// Sugar for common "diverge, do something, merge" pattern
///
/// This is NOT implicit merge - the merge is explicit in the combinator's
/// signature. But it provides convenient syntax for simple cases.
pub mod sugar {
    use super::*;

    /// Diverge, process each branch, automatically merge
    ///
    /// Type signature makes merge explicit:
    /// `with_diverged : Warp<All> -> (Warp<S1> -> A) -> (Warp<S2> -> A) -> (A, A)`
    ///
    /// The merge happens at the END of with_diverged, guaranteed.
    pub fn with_diverged<S1, S2, A, F1, F2>(_warp: Warp<All>, then_fn: F1, else_fn: F2) -> (A, A)
    where
        S1: ActiveSet + ComplementOf<S2>,
        S2: ActiveSet + ComplementOf<S1>,
        F1: FnOnce(Warp<S1>) -> A,
        F2: FnOnce(Warp<S2>) -> A,
    {
        let then_result = then_fn(Warp::new());
        let else_result = else_fn(Warp::new());
        // Merge is implicit HERE, in the combinator, but explicit in the TYPE
        (then_result, else_result)
    }

    /// Even more sugary: if-like syntax with merge
    pub fn warp_if<A, F1, F2>(
        warp: Warp<All>,
        _pred: impl Fn(usize) -> bool,
        then_fn: F1,
        else_fn: F2,
    ) -> (A, A)
    where
        F1: FnOnce(Warp<Even>) -> A,
        F2: FnOnce(Warp<Odd>) -> A,
    {
        // In real implementation, predicate would determine active sets
        // For demo, we use Even/Odd
        with_diverged::<Even, Odd, A, F1, F2>(warp, then_fn, else_fn)
    }

    /// Example: using sugar
    pub fn example_sugar() {
        let warp: Warp<All> = Warp::new();

        let (even_result, odd_result) = warp_if(
            warp,
            |lane| lane % 2 == 0,
            |_even_warp| 100,
            |_odd_warp| 200,
        );

        assert_eq!(even_result, 100);
        assert_eq!(odd_result, 200);
    }

    #[cfg(test)]
    mod tests {
        use super::*;

        #[test]
        fn test_with_diverged() {
            let warp: Warp<All> = Warp::new();

            let (a, b) = with_diverged::<Even, Odd, i32, _, _>(warp, |_| 1, |_| 2);

            assert_eq!(a, 1);
            assert_eq!(b, 2);
        }

        #[test]
        fn test_warp_if() {
            example_sugar();
        }
    }
}

// ============================================================================
// SCOPED DIVERGENCE: RAII-style merge
// ============================================================================

/// RAII-style scoped divergence that merges on drop
///
/// This provides implicit merge at scope end, similar to ISPC,
/// but the scope is explicit (unlike invisible block boundaries).
pub mod scoped {
    use super::*;

    /// A diverged warp that will merge when both halves go out of scope
    pub struct ScopedDiverge<S1: ActiveSet, S2: ActiveSet> {
        pub left: Warp<S1>,
        pub right: Warp<S2>,
    }

    impl<S1: ActiveSet + ComplementOf<S2>, S2: ActiveSet + ComplementOf<S1>> ScopedDiverge<S1, S2> {
        pub fn new(_warp: Warp<All>) -> Self {
            ScopedDiverge {
                left: Warp::new(),
                right: Warp::new(),
            }
        }

        /// Explicitly consume and merge
        pub fn merge(self) -> Warp<All> {
            Warp::new()
        }
    }

    // Note: We intentionally DON'T implement Drop to auto-merge.
    // This would hide the merge point, which defeats the purpose.
    // Instead, we require explicit .merge() call.

    #[cfg(test)]
    mod tests {
        use super::*;

        #[test]
        fn test_scoped_diverge() {
            let warp: Warp<All> = Warp::new();
            let diverged = ScopedDiverge::<Even, Odd>::new(warp);

            // Note: accessing .left/.right copies the Warp<S> (which is Copy),
            // so these are independent copies, not moves from diverged.
            // The promoted version in src/warp.rs uses non-Copy warps.
            let _even: Warp<Even> = diverged.left;
            let _odd: Warp<Odd> = diverged.right;

            // Explicit merge consumes the ScopedDiverge (which is not Copy)
            let _merged: Warp<All> = diverged.merge();
        }
    }
}

// ============================================================================
// COMPARISON WITH ISPC
// ============================================================================

/// ISPC-style implicit reconvergence (for comparison)
///
/// This shows what ISPC does - mask tracking at runtime, implicit merge.
/// We DON'T recommend this for GPU warps, but show it for comparison.
pub mod ispc_style {
    /// Runtime mask tracking (like ISPC)
    pub struct MaskedExecution {
        active_mask: u32,
    }

    impl MaskedExecution {
        pub fn new() -> Self {
            MaskedExecution {
                active_mask: 0xFFFFFFFF,
            }
        }

        pub fn active_mask(&self) -> u32 {
            self.active_mask
        }

        /// ISPC-style if: sets mask, executes body, restores mask
        pub fn masked_if<F>(&mut self, pred: impl Fn(usize) -> bool, body: F)
        where
            F: FnOnce(&mut Self),
        {
            let old_mask = self.active_mask;

            // Compute new mask
            let mut new_mask = 0u32;
            for lane in 0..32 {
                if old_mask & (1 << lane) != 0 && pred(lane) {
                    new_mask |= 1 << lane;
                }
            }

            // Execute body with new mask
            self.active_mask = new_mask;
            body(self);

            // IMPLICIT RECONVERGENCE: restore mask
            self.active_mask = old_mask;
        }

        /// Problem: Can't know statically if shuffle is safe!
        pub fn unsafe_shuffle(&self) {
            // In ISPC style, we don't know at compile time if all lanes active
            // This would be a runtime check or silent bug
            if self.active_mask != 0xFFFFFFFF {
                panic!("Shuffle requires all lanes!");
            }
            // ... do shuffle ...
        }
    }

    #[cfg(test)]
    mod tests {
        use super::*;

        #[test]
        fn test_ispc_style() {
            let mut exec = MaskedExecution::new();

            assert_eq!(exec.active_mask(), 0xFFFFFFFF);

            exec.masked_if(
                |lane| lane % 2 == 0,
                |inner| {
                    // Only even lanes active
                    assert_eq!(inner.active_mask() & 0x55555555, inner.active_mask());
                },
            );

            // Implicitly back to all
            assert_eq!(exec.active_mask(), 0xFFFFFFFF);
        }

        #[test]
        #[should_panic(expected = "Shuffle requires all lanes")]
        fn test_ispc_shuffle_bug() {
            let mut exec = MaskedExecution::new();

            exec.masked_if(
                |lane| lane % 2 == 0,
                |inner| {
                    // Bug: trying to shuffle when not all lanes active
                    inner.unsafe_shuffle();
                },
            );
        }
    }
}

// ============================================================================
// CONCLUSION
// ============================================================================

/// Summary: Should merge be implicit at block boundaries?
///
/// ## Answer: NO for GPU warps, but provide SUGAR
///
/// ### Why Not Implicit
///
/// 1. **shuffle/reduce need all lanes**: Type must track active set
/// 2. **Deadlock risk**: GPU divergence bugs are worse than CPU
/// 3. **Composability**: Can't pass diverged warp across functions with implicit
/// 4. **Visibility**: Explicit merge makes reconvergence point clear
///
/// ### What We Provide Instead
///
/// 1. **Combinators**: `with_diverged(warp, then_fn, else_fn)` - merge is
///    explicit in TYPE but convenient in syntax
///
/// 2. **Sugar**: `warp_if` for common patterns
///
/// 3. **Scoped**: `ScopedDiverge` groups diverged warps, requires explicit merge
///
/// ### ISPC Comparison
///
/// ISPC's implicit merge works for CPU SIMD because:
/// - No shuffle operations that require all elements
/// - Silent wrong values, not deadlocks
/// - Structured control flow only
///
/// GPU warps are different enough that explicit merge is worth the boilerplate.
///
/// ### Type System Advantage
///
/// With explicit merge, the type system PREVENTS the bug:
/// ```rust,ignore
/// let (even, odd) = diverge(warp);
/// even.shuffle_xor(1);  // TYPE ERROR: shuffle requires Warp<All>
/// ```
///
/// With implicit merge (ISPC-style), this is a runtime error at best.
pub const _CONCLUSION: () = ();

#[cfg(test)]
mod integration_tests {
    use super::*;

    #[test]
    fn test_explicit_is_required_for_shuffle() {
        // This test demonstrates WHY we need explicit merge:
        // shuffle_xor is only available on Warp<All>

        let warp: Warp<All> = Warp::new();

        // Can shuffle before diverge
        let _ = shuffle_xor(&warp, 1);

        let (even, odd): (Warp<Even>, Warp<Odd>) = explicit::diverge(warp);

        // CANNOT shuffle diverged warp (compile error if uncommented):
        // let _ = shuffle_xor(&even, 1);  // ERROR: expected Warp<All>

        let merged = explicit::merge(even, odd);

        // Can shuffle after merge
        let _ = shuffle_xor(&merged, 1);
    }

    fn shuffle_xor(_warp: &Warp<All>, _delta: u32) -> [i32; 32] {
        [0; 32]
    }
}