leopard_vec/
leopard.rs

1//! # Leopard
2//!
3//! A high-performance parallelized vector container library with deferred execution.
4//!
5//! Leopard provides [`LVec`], a parallel vector container that records operations and
6//! executes them in a single bulk parallel pass. This design minimizes thread pool
7//! creation overhead by batching all operations together.
8//!
9//! ## Key Features
10//!
11//! - **Deferred Execution**: Operations are recorded between [`LQueue::start`] and
12//!   [`LQueue::end`], then executed in one parallel batch
13//! - **Type-Agnostic Queue**: A single [`LQueue`] can manage `LVec<T>` of different types
14//! - **SIMD-Style Masking**: Boolean masks with blend, select, and masked operations
15//! - **Operator Overloading**: Natural syntax with `+`, `-`, `*`, `/` operators
16//! - **Dependency Graph**: Operations are automatically ordered based on data dependencies
17//!
18//! ## Quick Start
19//!
20//! ```rust
21//! use leopard::{LQueue, LVec, LMask};
22//!
23//! let q = LQueue::new();
24//! let x: LVec<f64> = q.lvec_with_capacity(1000);
25//! let y: LVec<f64> = q.lvec_with_capacity(1000);
26//!
27//! q.start();
28//! let x = x.fill_with(|i| i as f64);
29//! let y = y.fill_with(|i| (i * 2) as f64);
30//! let z = &x * &y + &x;
31//! q.end();
32//!
33//! let result = z.materialize().unwrap();
34//! ```
35
36use std::any::Any;
37use std::cell::RefCell;
38use std::ops::{Add, Sub, Mul, Div, Index, BitAnd, BitOr, BitXor, Not};
39use std::rc::Rc;
40use std::sync::Arc;
41use rayon::prelude::*;
42
43/// Default reserved capacity for LVec when using [`LQueue::lvec`].
44const DEFAULT_CAPACITY: usize = 128;
45
46// ============================================================================
47// Type-erased Operation Trait (Internal)
48// ============================================================================
49
50/// Internal trait for type-erased operations that can be executed.
51trait ErasedOperation: Send + Sync {
52    /// Execute the operation and return the result as a type-erased Arc.
53    fn execute(&self, results: &[Option<Arc<dyn Any + Send + Sync>>]) -> Arc<dyn Any + Send + Sync>;
54    
55    /// Get the unique result ID for this operation.
56    fn result_id(&self) -> usize;
57    
58    /// Get the list of result IDs this operation depends on.
59    fn dependencies(&self) -> Vec<usize>;
60}
61
62// ============================================================================
63// Operation Types (Internal)
64// ============================================================================
65
66#[derive(Clone, Copy, Debug)]
67enum BinaryOpType {
68    Add,
69    Sub,
70    Mul,
71    Div,
72}
73
74/// Source of an operand - either direct data or a pending result from another operation.
75#[derive(Clone)]
76enum OperandSource<T: Clone + Send + Sync> {
77    /// Direct data that is already available.
78    Direct(Arc<Vec<T>>),
79    /// Pending result from another operation, identified by result ID.
80    Pending(usize),
81}
82
83/// Binary operation between two LVecs (add, sub, mul, div).
84struct BinaryOp<T: Clone + Send + Sync + 'static> {
85    op_type: BinaryOpType,
86    left: OperandSource<T>,
87    right: OperandSource<T>,
88    result_id: usize,
89}
90
91impl<T> ErasedOperation for BinaryOp<T>
92where
93    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
94{
95    fn execute(&self, results: &[Option<Arc<dyn Any + Send + Sync>>]) -> Arc<dyn Any + Send + Sync> {
96        let left_data = get_data::<T>(&self.left, results);
97        let right_data = get_data::<T>(&self.right, results);
98        let len = left_data.len().min(right_data.len());
99        let op_type = self.op_type;
100
101        let result: Vec<T> = (0..len)
102            .into_par_iter()
103            .map(|i| {
104                let l = left_data[i].clone();
105                let r = right_data[i].clone();
106                match op_type {
107                    BinaryOpType::Add => l + r,
108                    BinaryOpType::Sub => l - r,
109                    BinaryOpType::Mul => l * r,
110                    BinaryOpType::Div => l / r,
111                }
112            })
113            .collect();
114
115        Arc::new(result)
116    }
117
118    fn result_id(&self) -> usize {
119        self.result_id
120    }
121
122    fn dependencies(&self) -> Vec<usize> {
123        let mut deps = Vec::new();
124        if let OperandSource::Pending(id) = &self.left {
125            deps.push(*id);
126        }
127        if let OperandSource::Pending(id) = &self.right {
128            deps.push(*id);
129        }
130        deps
131    }
132}
133
134/// Map operation that transforms each element using a closure.
135struct MapOp<T: Clone + Send + Sync + 'static> {
136    source: OperandSource<T>,
137    func: Arc<dyn Fn(usize, &T) -> T + Send + Sync>,
138    len: usize,
139    result_id: usize,
140}
141
142impl<T: Clone + Send + Sync + 'static> ErasedOperation for MapOp<T> {
143    fn execute(&self, results: &[Option<Arc<dyn Any + Send + Sync>>]) -> Arc<dyn Any + Send + Sync> {
144        let data = get_data::<T>(&self.source, results);
145        let len = self.len.min(data.len());
146        let func = &self.func;
147
148        let result: Vec<T> = (0..len)
149            .into_par_iter()
150            .map(|i| func(i, &data[i]))
151            .collect();
152
153        Arc::new(result)
154    }
155
156    fn result_id(&self) -> usize {
157        self.result_id
158    }
159
160    fn dependencies(&self) -> Vec<usize> {
161        if let OperandSource::Pending(id) = &self.source {
162            vec![*id]
163        } else {
164            vec![]
165        }
166    }
167}
168
169/// Conditional map operation that applies different transformations based on a condition.
170struct MapWhereOp<T: Clone + Send + Sync + 'static> {
171    source: OperandSource<T>,
172    condition: Arc<dyn Fn(usize, &T) -> bool + Send + Sync>,
173    if_true: Arc<dyn Fn(usize, &T) -> T + Send + Sync>,
174    if_false: Arc<dyn Fn(usize, &T) -> T + Send + Sync>,
175    len: usize,
176    result_id: usize,
177}
178
179impl<T: Clone + Send + Sync + 'static> ErasedOperation for MapWhereOp<T> {
180    fn execute(&self, results: &[Option<Arc<dyn Any + Send + Sync>>]) -> Arc<dyn Any + Send + Sync> {
181        let data = get_data::<T>(&self.source, results);
182        let len = self.len.min(data.len());
183
184        let result: Vec<T> = (0..len)
185            .into_par_iter()
186            .map(|i| {
187                if (self.condition)(i, &data[i]) {
188                    (self.if_true)(i, &data[i])
189                } else {
190                    (self.if_false)(i, &data[i])
191                }
192            })
193            .collect();
194
195        Arc::new(result)
196    }
197
198    fn result_id(&self) -> usize {
199        self.result_id
200    }
201
202    fn dependencies(&self) -> Vec<usize> {
203        if let OperandSource::Pending(id) = &self.source {
204            vec![*id]
205        } else {
206            vec![]
207        }
208    }
209}
210
211/// Blend operation that selects elements from two vectors based on a mask.
212struct BlendOp<T: Clone + Send + Sync + 'static> {
213    if_false: OperandSource<T>,
214    if_true: OperandSource<T>,
215    mask: Arc<Vec<bool>>,
216    len: usize,
217    result_id: usize,
218}
219
220impl<T: Clone + Send + Sync + 'static> ErasedOperation for BlendOp<T> {
221    fn execute(&self, results: &[Option<Arc<dyn Any + Send + Sync>>]) -> Arc<dyn Any + Send + Sync> {
222        let false_data = get_data::<T>(&self.if_false, results);
223        let true_data = get_data::<T>(&self.if_true, results);
224        let mask = &self.mask;
225        let len = self.len;
226
227        let result: Vec<T> = (0..len)
228            .into_par_iter()
229            .map(|i| {
230                if mask[i] {
231                    true_data[i].clone()
232                } else {
233                    false_data[i].clone()
234                }
235            })
236            .collect();
237
238        Arc::new(result)
239    }
240
241    fn result_id(&self) -> usize {
242        self.result_id
243    }
244
245    fn dependencies(&self) -> Vec<usize> {
246        let mut deps = Vec::new();
247        if let OperandSource::Pending(id) = &self.if_false {
248            deps.push(*id);
249        }
250        if let OperandSource::Pending(id) = &self.if_true {
251            deps.push(*id);
252        }
253        deps
254    }
255}
256
257/// Masked apply operation that applies a function only where mask is true.
258struct MaskedApplyOp<T: Clone + Send + Sync + 'static> {
259    source: OperandSource<T>,
260    mask: Arc<Vec<bool>>,
261    func: Arc<dyn Fn(usize, &T) -> T + Send + Sync>,
262    len: usize,
263    result_id: usize,
264}
265
266impl<T: Clone + Send + Sync + 'static> ErasedOperation for MaskedApplyOp<T> {
267    fn execute(&self, results: &[Option<Arc<dyn Any + Send + Sync>>]) -> Arc<dyn Any + Send + Sync> {
268        let data = get_data::<T>(&self.source, results);
269        let mask = &self.mask;
270        let len = self.len;
271        let func = &self.func;
272
273        let result: Vec<T> = (0..len)
274            .into_par_iter()
275            .map(|i| {
276                if mask[i] {
277                    func(i, &data[i])
278                } else {
279                    data[i].clone()
280                }
281            })
282            .collect();
283
284        Arc::new(result)
285    }
286
287    fn result_id(&self) -> usize {
288        self.result_id
289    }
290
291    fn dependencies(&self) -> Vec<usize> {
292        if let OperandSource::Pending(id) = &self.source {
293            vec![*id]
294        } else {
295            vec![]
296        }
297    }
298}
299
300/// Fill operation that initializes a vector using a closure.
301struct FillOp<T: Clone + Send + Sync + 'static> {
302    func: Arc<dyn Fn(usize) -> T + Send + Sync>,
303    len: usize,
304    result_id: usize,
305}
306
307impl<T: Clone + Send + Sync + 'static> ErasedOperation for FillOp<T> {
308    fn execute(&self, _results: &[Option<Arc<dyn Any + Send + Sync>>]) -> Arc<dyn Any + Send + Sync> {
309        let len = self.len;
310        let func = &self.func;
311
312        let result: Vec<T> = (0..len)
313            .into_par_iter()
314            .map(|i| func(i))
315            .collect();
316
317        Arc::new(result)
318    }
319
320    fn result_id(&self) -> usize {
321        self.result_id
322    }
323
324    fn dependencies(&self) -> Vec<usize> {
325        vec![]
326    }
327}
328
329/// Helper to extract data from an operand source.
330fn get_data<T: Clone + Send + Sync + 'static>(
331    source: &OperandSource<T>,
332    results: &[Option<Arc<dyn Any + Send + Sync>>],
333) -> Arc<Vec<T>> {
334    match source {
335        OperandSource::Direct(data) => Arc::clone(data),
336        OperandSource::Pending(id) => {
337            let any_ref = results[*id].as_ref().unwrap();
338            any_ref.clone().downcast::<Vec<T>>().unwrap()
339        }
340    }
341}
342
343// ============================================================================
344// LMask - Boolean mask for SIMD-style branchless operations
345// ============================================================================
346
347/// A boolean mask for parallel conditional operations.
348///
349/// `LMask` provides SIMD-style masking capabilities for [`LVec`] operations.
350/// Masks can be combined using logical operators (`&`, `|`, `^`, `!`) and
351/// used with methods like [`LVec::blend`], [`LVec::masked_apply`], and
352/// [`LVec::masked_fill`].
353///
354/// # Examples
355///
356/// ```rust
357/// use leopard::LMask;
358///
359/// // Create a uniform mask
360/// let all_true = LMask::new(10, true);
361///
362/// // Create a pattern mask
363/// let evens = LMask::from_fn(10, |i| i % 2 == 0);
364///
365/// // Combine masks
366/// let combined = &all_true & &evens;
367/// let inverted = !&evens;
368/// ```
369#[derive(Clone)]
370pub struct LMask {
371    data: Arc<Vec<bool>>,
372    len: usize,
373}
374
375impl LMask {
376    /// Creates a new mask with all elements set to the same value.
377    ///
378    /// # Arguments
379    ///
380    /// * `len` - The length of the mask
381    /// * `value` - The boolean value for all elements
382    ///
383    /// # Examples
384    ///
385    /// ```rust
386    /// use leopard::LMask;
387    ///
388    /// let all_true = LMask::new(100, true);
389    /// let all_false = LMask::new(100, false);
390    /// ```
391    pub fn new(len: usize, value: bool) -> Self {
392        LMask {
393            data: Arc::new(vec![value; len]),
394            len,
395        }
396    }
397
398    /// Creates a mask from a closure that takes an index and returns a boolean.
399    ///
400    /// # Arguments
401    ///
402    /// * `len` - The length of the mask
403    /// * `f` - A closure that takes an index `usize` and returns `bool`
404    ///
405    /// # Examples
406    ///
407    /// ```rust
408    /// use leopard::LMask;
409    ///
410    /// // Even indices
411    /// let evens = LMask::from_fn(10, |i| i % 2 == 0);
412    ///
413    /// // First half
414    /// let first_half = LMask::from_fn(100, |i| i < 50);
415    /// ```
416    pub fn from_fn<F>(len: usize, f: F) -> Self
417    where
418        F: Fn(usize) -> bool,
419    {
420        let data: Vec<bool> = (0..len).map(f).collect();
421        LMask {
422            data: Arc::new(data),
423            len,
424        }
425    }
426
427    /// Returns the length of the mask.
428    ///
429    /// # Examples
430    ///
431    /// ```rust
432    /// use leopard::LMask;
433    ///
434    /// let mask = LMask::new(100, true);
435    /// assert_eq!(mask.len(), 100);
436    /// ```
437    #[inline]
438    pub fn len(&self) -> usize {
439        self.len
440    }
441
442    /// Returns `true` if the mask has no elements.
443    ///
444    /// # Examples
445    ///
446    /// ```rust
447    /// use leopard::LMask;
448    ///
449    /// let empty = LMask::new(0, true);
450    /// assert!(empty.is_empty());
451    /// ```
452    #[inline]
453    pub fn is_empty(&self) -> bool {
454        self.len == 0
455    }
456
457    /// Returns the mask data as a slice.
458    ///
459    /// # Examples
460    ///
461    /// ```rust
462    /// use leopard::LMask;
463    ///
464    /// let mask = LMask::from_fn(5, |i| i > 2);
465    /// assert_eq!(mask.as_slice(), &[false, false, false, true, true]);
466    /// ```
467    #[inline]
468    pub fn as_slice(&self) -> &[bool] {
469        &self.data[..self.len]
470    }
471}
472
473impl Index<usize> for LMask {
474    type Output = bool;
475    
476    /// Accesses the mask value at the given index.
477    ///
478    /// # Panics
479    ///
480    /// Panics if `index >= self.len()`.
481    #[inline]
482    fn index(&self, index: usize) -> &Self::Output {
483        &self.data[index]
484    }
485}
486
487impl BitAnd for &LMask {
488    type Output = LMask;
489    
490    /// Performs element-wise logical AND between two masks.
491    fn bitand(self, other: Self) -> Self::Output {
492        let len = self.len.min(other.len);
493        let data: Vec<bool> = (0..len).map(|i| self.data[i] && other.data[i]).collect();
494        LMask { data: Arc::new(data), len }
495    }
496}
497
498impl BitAnd for LMask {
499    type Output = LMask;
500    fn bitand(self, other: Self) -> Self::Output { (&self).bitand(&other) }
501}
502
503impl BitOr for &LMask {
504    type Output = LMask;
505    
506    /// Performs element-wise logical OR between two masks.
507    fn bitor(self, other: Self) -> Self::Output {
508        let len = self.len.min(other.len);
509        let data: Vec<bool> = (0..len).map(|i| self.data[i] || other.data[i]).collect();
510        LMask { data: Arc::new(data), len }
511    }
512}
513
514impl BitOr for LMask {
515    type Output = LMask;
516    fn bitor(self, other: Self) -> Self::Output { (&self).bitor(&other) }
517}
518
519impl BitXor for &LMask {
520    type Output = LMask;
521    
522    /// Performs element-wise logical XOR between two masks.
523    fn bitxor(self, other: Self) -> Self::Output {
524        let len = self.len.min(other.len);
525        let data: Vec<bool> = (0..len).map(|i| self.data[i] ^ other.data[i]).collect();
526        LMask { data: Arc::new(data), len }
527    }
528}
529
530impl BitXor for LMask {
531    type Output = LMask;
532    fn bitxor(self, other: Self) -> Self::Output { (&self).bitxor(&other) }
533}
534
535impl Not for &LMask {
536    type Output = LMask;
537    
538    /// Performs element-wise logical NOT on the mask.
539    fn not(self) -> Self::Output {
540        let data: Vec<bool> = self.data.iter().map(|&b| !b).collect();
541        LMask { data: Arc::new(data), len: self.len }
542    }
543}
544
545impl Not for LMask {
546    type Output = LMask;
547    fn not(self) -> Self::Output { (&self).not() }
548}
549
550impl std::fmt::Debug for LMask {
551    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
552        write!(f, "LMask({:?})", self.as_slice())
553    }
554}
555
556// ============================================================================
557// LQueue - The operation queue
558// ============================================================================
559
560/// Internal state of the operation queue.
561struct LQueueInner {
562    operations: Vec<Box<dyn ErasedOperation>>,
563    results: Vec<Option<Arc<dyn Any + Send + Sync>>>,
564    recording: bool,
565    next_result_id: usize,
566}
567
568/// The operation queue that records and executes parallel operations.
569///
570/// `LQueue` is the central coordinator for deferred parallel execution. It manages
571/// the recording of operations and their bulk execution, minimizing thread pool
572/// overhead by batching all operations together.
573///
574/// # Workflow
575///
576/// 1. Create vectors with [`LQueue::lvec`] or [`LQueue::lvec_with_capacity`]
577/// 2. Call [`LQueue::start`] to begin recording
578/// 3. Perform operations on [`LVec`] instances (all operations are recorded, not executed)
579/// 4. Call [`LQueue::end`] to execute all recorded operations in parallel
580/// 5. Use [`LVec::materialize`] to retrieve results
581///
582/// # Type Agnostic
583///
584/// A single `LQueue` can manage vectors of different types simultaneously:
585///
586/// ```rust
587/// use leopard::{LQueue, LVec};
588///
589/// let q = LQueue::new();
590/// let integers: LVec<i32> = q.lvec_with_capacity(100);
591/// let floats: LVec<f64> = q.lvec_with_capacity(100);
592/// // Both can be used in the same recording session
593/// ```
594///
595/// # Examples
596///
597/// ```rust
598/// use leopard::{LQueue, LVec};
599///
600/// let q = LQueue::new();
601/// let x: LVec<f64> = q.lvec_with_capacity(1000);
602/// let y: LVec<f64> = q.lvec_with_capacity(1000);
603///
604/// q.start();
605/// let x = x.fill_with(|i| i as f64);
606/// let y = y.fill_with(|i| (i * 2) as f64);
607/// let z = &x + &y;
608/// q.end();
609///
610/// let result = z.materialize().unwrap();
611/// println!("Sum: {:?}", &result[0..5]);
612/// ```
613#[derive(Clone)]
614pub struct LQueue {
615    inner: Rc<RefCell<LQueueInner>>,
616}
617
618impl LQueue {
619    /// Creates a new operation queue.
620    ///
621    /// # Examples
622    ///
623    /// ```rust
624    /// use leopard::LQueue;
625    ///
626    /// let q = LQueue::new();
627    /// ```
628    pub fn new() -> Self {
629        LQueue {
630            inner: Rc::new(RefCell::new(LQueueInner {
631                operations: Vec::new(),
632                results: Vec::new(),
633                recording: false,
634                next_result_id: 0,
635            })),
636        }
637    }
638
639    /// Creates a new [`LVec`] with the default capacity (128 elements).
640    ///
641    /// # Type Parameters
642    ///
643    /// * `T` - The element type, must implement `Clone + Send + Sync + Default`
644    ///
645    /// # Examples
646    ///
647    /// ```rust
648    /// use leopard::{LQueue, LVec};
649    ///
650    /// let q = LQueue::new();
651    /// let vec: LVec<f64> = q.lvec();
652    /// assert_eq!(vec.capacity(), 128);
653    /// ```
654    pub fn lvec<T>(&self) -> LVec<T>
655    where
656        T: Clone + Send + Sync + Default + 'static,
657    {
658        self.lvec_with_capacity(DEFAULT_CAPACITY)
659    }
660
661    /// Creates a new [`LVec`] with the specified capacity.
662    ///
663    /// # Arguments
664    ///
665    /// * `capacity` - The number of elements the vector can hold
666    ///
667    /// # Type Parameters
668    ///
669    /// * `T` - The element type, must implement `Clone + Send + Sync + Default`
670    ///
671    /// # Examples
672    ///
673    /// ```rust
674    /// use leopard::{LQueue, LVec};
675    ///
676    /// let q = LQueue::new();
677    /// let vec: LVec<f64> = q.lvec_with_capacity(10000);
678    /// assert_eq!(vec.capacity(), 10000);
679    /// ```
680    pub fn lvec_with_capacity<T>(&self, capacity: usize) -> LVec<T>
681    where
682        T: Clone + Send + Sync + Default + 'static,
683    {
684        LVec {
685            data: Arc::new(vec![T::default(); capacity]),
686            len: capacity,
687            capacity,
688            queue: Rc::clone(&self.inner),
689            pending_result_id: None,
690        }
691    }
692
693    /// Starts recording operations.
694    ///
695    /// After calling `start()`, all operations on [`LVec`] instances created from
696    /// this queue will be recorded rather than executed. The operations will be
697    /// executed when [`LQueue::end`] is called.
698    ///
699    /// # Panics
700    ///
701    /// Operations on [`LVec`] will panic if called outside of a `start()`/`end()` block.
702    ///
703    /// # Examples
704    ///
705    /// ```rust
706    /// use leopard::{LQueue, LVec};
707    ///
708    /// let q = LQueue::new();
709    /// let x: LVec<f64> = q.lvec_with_capacity(100);
710    ///
711    /// q.start();  // Begin recording
712    /// let x = x.fill(42.0);
713    /// // ... more operations ...
714    /// q.end();    // Execute all recorded operations
715    /// ```
716    pub fn start(&self) {
717        let mut inner = self.inner.borrow_mut();
718        inner.recording = true;
719        inner.operations.clear();
720        inner.results.clear();
721        inner.next_result_id = 0;
722    }
723
724    /// Stops recording and executes all recorded operations in parallel.
725    ///
726    /// This method executes all operations that were recorded since the last
727    /// [`LQueue::start`] call. Operations are executed in dependency order,
728    /// with independent operations running in parallel.
729    ///
730    /// After `end()` is called, results can be retrieved using [`LVec::materialize`].
731    ///
732    /// # Performance
733    ///
734    /// All parallel execution happens in this single call, minimizing thread pool
735    /// creation overhead compared to executing operations individually.
736    ///
737    /// # Examples
738    ///
739    /// ```rust
740    /// use leopard::{LQueue, LVec};
741    ///
742    /// let q = LQueue::new();
743    /// let x: LVec<f64> = q.lvec_with_capacity(1000);
744    ///
745    /// q.start();
746    /// let x = x.fill_with(|i| i as f64);
747    /// let y = x.map(|_, v| v * 2.0);
748    /// q.end();  // Both fill and map execute here
749    ///
750    /// let result = y.materialize().unwrap();
751    /// ```
752    pub fn end(&self) {
753        let mut inner = self.inner.borrow_mut();
754        inner.recording = false;
755        Self::execute_all(&mut inner);
756    }
757
758    /// Executes all recorded operations respecting dependencies.
759    fn execute_all(inner: &mut LQueueInner) {
760        if inner.operations.is_empty() {
761            return;
762        }
763
764        // Resize results vector
765        inner.results.resize_with(inner.next_result_id, || None);
766
767        // Build dependency levels for proper execution order
768        let mut levels: Vec<Vec<usize>> = Vec::new();
769        let mut op_levels: Vec<usize> = vec![0; inner.operations.len()];
770
771        let mut result_to_op: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
772        for (i, op) in inner.operations.iter().enumerate() {
773            result_to_op.insert(op.result_id(), i);
774        }
775
776        // Compute the level for each operation based on dependencies
777        for (i, op) in inner.operations.iter().enumerate() {
778            let mut level = 0;
779            for dep_id in op.dependencies() {
780                if let Some(&dep_op_idx) = result_to_op.get(&dep_id) {
781                    level = level.max(op_levels[dep_op_idx] + 1);
782                }
783            }
784            op_levels[i] = level;
785            while levels.len() <= level {
786                levels.push(Vec::new());
787            }
788            levels[level].push(i);
789        }
790
791        // Execute each level (operations within a level have no dependencies on each other)
792        for level_ops in levels {
793            for &op_idx in &level_ops {
794                let result = inner.operations[op_idx].execute(&inner.results);
795                let result_id = inner.operations[op_idx].result_id();
796                inner.results[result_id] = Some(result);
797            }
798        }
799    }
800
801    /// Returns `true` if the queue is currently recording operations.
802    ///
803    /// # Examples
804    ///
805    /// ```rust
806    /// use leopard::LQueue;
807    ///
808    /// let q = LQueue::new();
809    /// assert!(!q.is_recording());
810    ///
811    /// q.start();
812    /// assert!(q.is_recording());
813    ///
814    /// q.end();
815    /// assert!(!q.is_recording());
816    /// ```
817    pub fn is_recording(&self) -> bool {
818        self.inner.borrow().recording
819    }
820}
821
822impl Default for LQueue {
823    fn default() -> Self {
824        Self::new()
825    }
826}
827
828// ============================================================================
829// LVec - The parallelized vector container
830// ============================================================================
831
832/// A parallelized vector container with deferred execution.
833///
834/// `LVec` is the main data structure in Leopard. It represents a vector that
835/// supports parallel operations through deferred execution. Operations on `LVec`
836/// are recorded when called between [`LQueue::start`] and [`LQueue::end`], and
837/// executed in bulk when `end()` is called.
838///
839/// # Creating LVec
840///
841/// `LVec` instances must be created through an [`LQueue`]:
842///
843/// ```rust
844/// use leopard::{LQueue, LVec};
845///
846/// let q = LQueue::new();
847/// let vec: LVec<f64> = q.lvec_with_capacity(1000);
848/// ```
849///
850/// # Operations
851///
852/// All operations must be called between `q.start()` and `q.end()`:
853///
854/// - **Initialization**: [`fill`](LVec::fill), [`fill_with`](LVec::fill_with)
855/// - **Transformation**: [`map`](LVec::map), [`map_where`](LVec::map_where)
856/// - **Arithmetic**: `+`, `-`, `*`, `/` operators
857/// - **Masking**: [`blend`](LVec::blend), [`masked_apply`](LVec::masked_apply), [`masked_fill`](LVec::masked_fill)
858///
859/// # Retrieving Results
860///
861/// After `q.end()`, use [`materialize`](LVec::materialize) to get the computed data:
862///
863/// ```rust
864/// use leopard::{LQueue, LVec};
865///
866/// let q = LQueue::new();
867/// let x: LVec<f64> = q.lvec_with_capacity(10);
868///
869/// q.start();
870/// let x = x.fill_with(|i| i as f64);
871/// q.end();
872///
873/// let data = x.materialize().unwrap();
874/// println!("{:?}", &data[..]);
875/// ```
876///
877/// # Pending State
878///
879/// After an operation is recorded, the resulting `LVec` is in a "pending" state.
880/// The actual data is not available until after `q.end()` is called and
881/// [`materialize`](LVec::materialize) is used to retrieve it.
882#[derive(Clone)]
883pub struct LVec<T: Clone + Send + Sync + 'static> {
884    data: Arc<Vec<T>>,
885    len: usize,
886    capacity: usize,
887    queue: Rc<RefCell<LQueueInner>>,
888    pending_result_id: Option<usize>,
889}
890
891impl<T: Clone + Send + Sync + 'static> LVec<T> {
892    /// Returns the length of the vector.
893    ///
894    /// # Examples
895    ///
896    /// ```rust
897    /// use leopard::{LQueue, LVec};
898    ///
899    /// let q = LQueue::new();
900    /// let vec: LVec<f64> = q.lvec_with_capacity(100);
901    /// assert_eq!(vec.len(), 100);
902    /// ```
903    #[inline]
904    pub fn len(&self) -> usize {
905        self.len
906    }
907
908    /// Returns `true` if the vector has no elements.
909    #[inline]
910    pub fn is_empty(&self) -> bool {
911        self.len == 0
912    }
913
914    /// Returns the capacity of the vector.
915    #[inline]
916    pub fn capacity(&self) -> usize {
917        self.capacity
918    }
919
920    /// Returns `true` if this vector has a pending operation.
921    ///
922    /// A pending vector's data is not yet available. Call [`materialize`](LVec::materialize)
923    /// after [`LQueue::end`] to retrieve the computed data.
924    ///
925    /// # Examples
926    ///
927    /// ```rust
928    /// use leopard::{LQueue, LVec};
929    ///
930    /// let q = LQueue::new();
931    /// let x: LVec<f64> = q.lvec_with_capacity(10);
932    /// assert!(!x.is_pending());
933    ///
934    /// q.start();
935    /// let y = x.fill(1.0);
936    /// assert!(y.is_pending());
937    /// q.end();
938    /// ```
939    #[inline]
940    pub fn is_pending(&self) -> bool {
941        self.pending_result_id.is_some()
942    }
943
944    /// Retrieves the computed data after [`LQueue::end`] has been called.
945    ///
946    /// For pending vectors (those created by operations), this returns the
947    /// computed result. For non-pending vectors, this returns the original data.
948    ///
949    /// # Returns
950    ///
951    /// - `Some(Arc<Vec<T>>)` if the data is available
952    /// - `None` if the vector is pending and `q.end()` hasn't been called
953    ///
954    /// # Examples
955    ///
956    /// ```rust
957    /// use leopard::{LQueue, LVec};
958    ///
959    /// let q = LQueue::new();
960    /// let x: LVec<f64> = q.lvec_with_capacity(5);
961    ///
962    /// q.start();
963    /// let x = x.fill_with(|i| i as f64 * 2.0);
964    /// q.end();
965    ///
966    /// let data = x.materialize().unwrap();
967    /// assert_eq!(&data[..], &[0.0, 2.0, 4.0, 6.0, 8.0]);
968    /// ```
969    pub fn materialize(&self) -> Option<Arc<Vec<T>>> {
970        if let Some(result_id) = self.pending_result_id {
971            let inner = self.queue.borrow();
972            inner.results.get(result_id).and_then(|r| {
973                r.as_ref().and_then(|arc| arc.clone().downcast::<Vec<T>>().ok())
974            })
975        } else {
976            Some(Arc::clone(&self.data))
977        }
978    }
979
980    /// Returns the data as a slice.
981    ///
982    /// # Warning
983    ///
984    /// For pending vectors, this returns an empty or default-initialized slice,
985    /// not the computed result. Use [`materialize`](LVec::materialize) after
986    /// [`LQueue::end`] to get the actual computed data.
987    #[inline]
988    pub fn as_slice(&self) -> &[T] {
989        &self.data[..self.len]
990    }
991
992    // ========================================================================
993    // Internal helpers
994    // ========================================================================
995
996    fn get_source(&self) -> OperandSource<T> {
997        if let Some(id) = self.pending_result_id {
998            OperandSource::Pending(id)
999        } else {
1000            OperandSource::Direct(Arc::clone(&self.data))
1001        }
1002    }
1003
1004    fn create_pending(&self, result_id: usize, len: usize) -> Self {
1005        LVec {
1006            data: Arc::new(Vec::new()),
1007            len,
1008            capacity: len,
1009            queue: Rc::clone(&self.queue),
1010            pending_result_id: Some(result_id),
1011        }
1012    }
1013
1014    // ========================================================================
1015    // Recording Operations
1016    // ========================================================================
1017
1018    /// Fills the vector using a closure that takes an index.
1019    ///
1020    /// This is a recorded operation that must be called between [`LQueue::start`]
1021    /// and [`LQueue::end`].
1022    ///
1023    /// # Arguments
1024    ///
1025    /// * `f` - A closure that takes an index `usize` and returns the value `T`
1026    ///
1027    /// # Panics
1028    ///
1029    /// Panics if called outside of a `start()`/`end()` block.
1030    ///
1031    /// # Examples
1032    ///
1033    /// ```rust
1034    /// use leopard::{LQueue, LVec};
1035    ///
1036    /// let q = LQueue::new();
1037    /// let x: LVec<f64> = q.lvec_with_capacity(5);
1038    ///
1039    /// q.start();
1040    /// let x = x.fill_with(|i| i as f64 * 10.0);
1041    /// q.end();
1042    ///
1043    /// let data = x.materialize().unwrap();
1044    /// assert_eq!(&data[..], &[0.0, 10.0, 20.0, 30.0, 40.0]);
1045    /// ```
1046    pub fn fill_with<F>(&self, f: F) -> Self
1047    where
1048        F: Fn(usize) -> T + Send + Sync + 'static,
1049    {
1050        let mut inner = self.queue.borrow_mut();
1051        if !inner.recording {
1052            panic!("fill_with must be called between q.start() and q.end()");
1053        }
1054
1055        let result_id = inner.next_result_id;
1056        inner.next_result_id += 1;
1057
1058        inner.operations.push(Box::new(FillOp {
1059            func: Arc::new(f),
1060            len: self.len,
1061            result_id,
1062        }));
1063
1064        drop(inner);
1065        self.create_pending(result_id, self.len)
1066    }
1067
1068    /// Fills the vector with a constant value.
1069    ///
1070    /// This is a recorded operation that must be called between [`LQueue::start`]
1071    /// and [`LQueue::end`].
1072    ///
1073    /// # Arguments
1074    ///
1075    /// * `value` - The value to fill the vector with
1076    ///
1077    /// # Panics
1078    ///
1079    /// Panics if called outside of a `start()`/`end()` block.
1080    ///
1081    /// # Examples
1082    ///
1083    /// ```rust
1084    /// use leopard::{LQueue, LVec};
1085    ///
1086    /// let q = LQueue::new();
1087    /// let x: LVec<i32> = q.lvec_with_capacity(5);
1088    ///
1089    /// q.start();
1090    /// let x = x.fill(42);
1091    /// q.end();
1092    ///
1093    /// let data = x.materialize().unwrap();
1094    /// assert_eq!(&data[..], &[42, 42, 42, 42, 42]);
1095    /// ```
1096    pub fn fill(&self, value: T) -> Self {
1097        self.fill_with(move |_| value.clone())
1098    }
1099
1100    /// Transforms each element using a closure.
1101    ///
1102    /// This is a recorded operation that must be called between [`LQueue::start`]
1103    /// and [`LQueue::end`].
1104    ///
1105    /// # Arguments
1106    ///
1107    /// * `f` - A closure that takes an index and a reference to the element,
1108    ///         and returns the transformed value
1109    ///
1110    /// # Panics
1111    ///
1112    /// Panics if called outside of a `start()`/`end()` block.
1113    ///
1114    /// # Examples
1115    ///
1116    /// ```rust
1117    /// use leopard::{LQueue, LVec};
1118    ///
1119    /// let q = LQueue::new();
1120    /// let x: LVec<f64> = q.lvec_with_capacity(5);
1121    ///
1122    /// q.start();
1123    /// let x = x.fill_with(|i| i as f64);
1124    /// let y = x.map(|_, v| v * v);  // Square each element
1125    /// q.end();
1126    ///
1127    /// let data = y.materialize().unwrap();
1128    /// assert_eq!(&data[..], &[0.0, 1.0, 4.0, 9.0, 16.0]);
1129    /// ```
1130    pub fn map<F>(&self, f: F) -> Self
1131    where
1132        F: Fn(usize, &T) -> T + Send + Sync + 'static,
1133    {
1134        let mut inner = self.queue.borrow_mut();
1135        if !inner.recording {
1136            panic!("map must be called between q.start() and q.end()");
1137        }
1138
1139        let result_id = inner.next_result_id;
1140        inner.next_result_id += 1;
1141
1142        inner.operations.push(Box::new(MapOp {
1143            source: self.get_source(),
1144            func: Arc::new(f),
1145            len: self.len,
1146            result_id,
1147        }));
1148
1149        drop(inner);
1150        self.create_pending(result_id, self.len)
1151    }
1152
1153    /// Applies different transformations based on a condition.
1154    ///
1155    /// For each element, if the condition returns `true`, `if_true` is applied;
1156    /// otherwise, `if_false` is applied. This is SIMD-style branchless conditional
1157    /// execution.
1158    ///
1159    /// This is a recorded operation that must be called between [`LQueue::start`]
1160    /// and [`LQueue::end`].
1161    ///
1162    /// # Arguments
1163    ///
1164    /// * `condition` - A closure that returns `true` or `false` for each element
1165    /// * `if_true` - Transformation to apply when condition is `true`
1166    /// * `if_false` - Transformation to apply when condition is `false`
1167    ///
1168    /// # Panics
1169    ///
1170    /// Panics if called outside of a `start()`/`end()` block.
1171    ///
1172    /// # Examples
1173    ///
1174    /// ```rust
1175    /// use leopard::{LQueue, LVec};
1176    ///
1177    /// let q = LQueue::new();
1178    /// let x: LVec<i32> = q.lvec_with_capacity(6);
1179    ///
1180    /// q.start();
1181    /// let x = x.fill_with(|i| i as i32 + 1);
1182    /// let y = x.map_where(
1183    ///     |_, v| *v % 2 == 0,  // Is even?
1184    ///     |_, v| v * 10,       // If even: multiply by 10
1185    ///     |_, v| v + 1000,     // If odd: add 1000
1186    /// );
1187    /// q.end();
1188    ///
1189    /// let data = y.materialize().unwrap();
1190    /// // [1, 2, 3, 4, 5, 6] -> [1001, 20, 1003, 40, 1005, 60]
1191    /// ```
1192    pub fn map_where<C, TF, FF>(&self, condition: C, if_true: TF, if_false: FF) -> Self
1193    where
1194        C: Fn(usize, &T) -> bool + Send + Sync + 'static,
1195        TF: Fn(usize, &T) -> T + Send + Sync + 'static,
1196        FF: Fn(usize, &T) -> T + Send + Sync + 'static,
1197    {
1198        let mut inner = self.queue.borrow_mut();
1199        if !inner.recording {
1200            panic!("map_where must be called between q.start() and q.end()");
1201        }
1202
1203        let result_id = inner.next_result_id;
1204        inner.next_result_id += 1;
1205
1206        inner.operations.push(Box::new(MapWhereOp {
1207            source: self.get_source(),
1208            condition: Arc::new(condition),
1209            if_true: Arc::new(if_true),
1210            if_false: Arc::new(if_false),
1211            len: self.len,
1212            result_id,
1213        }));
1214
1215        drop(inner);
1216        self.create_pending(result_id, self.len)
1217    }
1218
1219    /// Creates a mask from a predicate applied to each element.
1220    ///
1221    /// Unlike other operations, this executes immediately (not recorded) because
1222    /// masks are needed to define subsequent masking operations.
1223    ///
1224    /// # Arguments
1225    ///
1226    /// * `predicate` - A closure that returns `true` or `false` for each element
1227    ///
1228    /// # Warning
1229    ///
1230    /// This only works on non-pending vectors. For pending vectors, use
1231    /// [`LMask::from_fn`] with the appropriate pattern.
1232    ///
1233    /// # Examples
1234    ///
1235    /// ```rust
1236    /// use leopard::{LQueue, LVec};
1237    ///
1238    /// let q = LQueue::new();
1239    /// let x: LVec<i32> = q.lvec_with_capacity(10);
1240    ///
1241    /// // Note: mask() works on the initial (non-pending) vector
1242    /// let mask = x.mask(|i, _| i >= 5);
1243    /// ```
1244    pub fn mask<F>(&self, predicate: F) -> LMask
1245    where
1246        F: Fn(usize, &T) -> bool,
1247    {
1248        let data: Vec<bool> = (0..self.len)
1249            .map(|i| predicate(i, &self.data[i]))
1250            .collect();
1251        
1252        LMask {
1253            data: Arc::new(data),
1254            len: self.len,
1255        }
1256    }
1257
1258    /// Blends two vectors using a mask (SIMD-style select).
1259    ///
1260    /// For each element, if the mask is `true`, the value from `other` is used;
1261    /// otherwise, the value from `self` is used.
1262    ///
1263    /// This is a recorded operation that must be called between [`LQueue::start`]
1264    /// and [`LQueue::end`].
1265    ///
1266    /// # Arguments
1267    ///
1268    /// * `other` - The vector to blend with
1269    /// * `mask` - The mask determining which vector to select from
1270    ///
1271    /// # Panics
1272    ///
1273    /// Panics if called outside of a `start()`/`end()` block.
1274    ///
1275    /// # Examples
1276    ///
1277    /// ```rust
1278    /// use leopard::{LQueue, LVec, LMask};
1279    ///
1280    /// let q = LQueue::new();
1281    /// let a: LVec<i32> = q.lvec_with_capacity(5);
1282    /// let b: LVec<i32> = q.lvec_with_capacity(5);
1283    /// let mask = LMask::from_fn(5, |i| i >= 3);
1284    ///
1285    /// q.start();
1286    /// let a = a.fill(1);
1287    /// let b = b.fill(100);
1288    /// let c = a.blend(&b, &mask);
1289    /// q.end();
1290    ///
1291    /// let data = c.materialize().unwrap();
1292    /// // mask: [false, false, false, true, true]
1293    /// // result: [1, 1, 1, 100, 100]
1294    /// ```
1295    pub fn blend(&self, other: &Self, mask: &LMask) -> Self {
1296        let mut inner = self.queue.borrow_mut();
1297        if !inner.recording {
1298            panic!("blend must be called between q.start() and q.end()");
1299        }
1300
1301        let result_id = inner.next_result_id;
1302        inner.next_result_id += 1;
1303        let len = self.len.min(other.len).min(mask.len());
1304
1305        inner.operations.push(Box::new(BlendOp {
1306            if_false: self.get_source(),
1307            if_true: other.get_source(),
1308            mask: Arc::clone(&mask.data),
1309            len,
1310            result_id,
1311        }));
1312
1313        drop(inner);
1314        self.create_pending(result_id, len)
1315    }
1316
1317    /// Selects between two vectors based on a mask.
1318    ///
1319    /// This is equivalent to `if_false.blend(if_true, mask)`.
1320    ///
1321    /// # Arguments
1322    ///
1323    /// * `mask` - The mask determining which vector to select from
1324    /// * `if_true` - Values to use where mask is `true`
1325    /// * `if_false` - Values to use where mask is `false`
1326    ///
1327    /// # Panics
1328    ///
1329    /// Panics if called outside of a `start()`/`end()` block.
1330    pub fn select(mask: &LMask, if_true: &Self, if_false: &Self) -> Self {
1331        if_false.blend(if_true, mask)
1332    }
1333
1334    /// Applies a function only where the mask is `true`.
1335    ///
1336    /// Elements where the mask is `false` retain their original values.
1337    ///
1338    /// This is a recorded operation that must be called between [`LQueue::start`]
1339    /// and [`LQueue::end`].
1340    ///
1341    /// # Arguments
1342    ///
1343    /// * `mask` - The mask determining where to apply the function
1344    /// * `f` - The function to apply
1345    ///
1346    /// # Panics
1347    ///
1348    /// Panics if called outside of a `start()`/`end()` block.
1349    ///
1350    /// # Examples
1351    ///
1352    /// ```rust
1353    /// use leopard::{LQueue, LVec, LMask};
1354    ///
1355    /// let q = LQueue::new();
1356    /// let x: LVec<i32> = q.lvec_with_capacity(5);
1357    /// let mask = LMask::from_fn(5, |i| i >= 3);
1358    ///
1359    /// q.start();
1360    /// let x = x.fill_with(|i| i as i32);
1361    /// let y = x.masked_apply(&mask, |_, v| v * 100);
1362    /// q.end();
1363    ///
1364    /// let data = y.materialize().unwrap();
1365    /// // [0, 1, 2, 3, 4] with mask [F, F, F, T, T]
1366    /// // result: [0, 1, 2, 300, 400]
1367    /// ```
1368    pub fn masked_apply<F>(&self, mask: &LMask, f: F) -> Self
1369    where
1370        F: Fn(usize, &T) -> T + Send + Sync + 'static,
1371    {
1372        let mut inner = self.queue.borrow_mut();
1373        if !inner.recording {
1374            panic!("masked_apply must be called between q.start() and q.end()");
1375        }
1376
1377        let result_id = inner.next_result_id;
1378        inner.next_result_id += 1;
1379        let len = self.len.min(mask.len());
1380
1381        inner.operations.push(Box::new(MaskedApplyOp {
1382            source: self.get_source(),
1383            mask: Arc::clone(&mask.data),
1384            func: Arc::new(f),
1385            len,
1386            result_id,
1387        }));
1388
1389        drop(inner);
1390        self.create_pending(result_id, len)
1391    }
1392
1393    /// Fills with a constant value only where the mask is `true`.
1394    ///
1395    /// Elements where the mask is `false` retain their original values.
1396    ///
1397    /// This is a recorded operation that must be called between [`LQueue::start`]
1398    /// and [`LQueue::end`].
1399    ///
1400    /// # Arguments
1401    ///
1402    /// * `mask` - The mask determining where to fill
1403    /// * `value` - The value to fill with
1404    ///
1405    /// # Panics
1406    ///
1407    /// Panics if called outside of a `start()`/`end()` block.
1408    ///
1409    /// # Examples
1410    ///
1411    /// ```rust
1412    /// use leopard::{LQueue, LVec, LMask};
1413    ///
1414    /// let q = LQueue::new();
1415    /// let x: LVec<i32> = q.lvec_with_capacity(5);
1416    /// let mask = LMask::from_fn(5, |i| i >= 3);
1417    ///
1418    /// q.start();
1419    /// let x = x.fill_with(|i| i as i32);
1420    /// let y = x.masked_fill(&mask, 999);
1421    /// q.end();
1422    ///
1423    /// let data = y.materialize().unwrap();
1424    /// // [0, 1, 2, 3, 4] with mask [F, F, F, T, T]
1425    /// // result: [0, 1, 2, 999, 999]
1426    /// ```
1427    pub fn masked_fill(&self, mask: &LMask, value: T) -> Self
1428    where
1429        T: 'static,
1430    {
1431        self.masked_apply(mask, move |_, _| value.clone())
1432    }
1433}
1434
1435impl<T: Clone + Send + Sync + Default + 'static> Default for LVec<T> {
1436    fn default() -> Self {
1437        panic!("LVec must be created via LQueue::lvec() or LQueue::lvec_with_capacity()")
1438    }
1439}
1440
1441impl<T: Clone + Send + Sync + 'static> Index<usize> for LVec<T> {
1442    type Output = T;
1443    
1444    /// Accesses the element at the given index.
1445    ///
1446    /// # Warning
1447    ///
1448    /// For pending vectors, this accesses the original (uncomputed) data,
1449    /// not the result of the pending operation. Use [`materialize`](LVec::materialize)
1450    /// after [`LQueue::end`] to get the computed data.
1451    ///
1452    /// # Panics
1453    ///
1454    /// Panics if `index >= self.len()`.
1455    #[inline]
1456    fn index(&self, index: usize) -> &Self::Output {
1457        &self.data[index]
1458    }
1459}
1460
1461// ============================================================================
1462// Operator Overloading (all recorded)
1463// ============================================================================
1464
1465fn record_binary_op<T>(left: &LVec<T>, right: &LVec<T>, op_type: BinaryOpType) -> LVec<T>
1466where
1467    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1468{
1469    let mut inner = left.queue.borrow_mut();
1470    if !inner.recording {
1471        panic!("Binary operations must be called between q.start() and q.end()");
1472    }
1473
1474    let result_id = inner.next_result_id;
1475    inner.next_result_id += 1;
1476    let len = left.len.min(right.len);
1477
1478    inner.operations.push(Box::new(BinaryOp {
1479        op_type,
1480        left: left.get_source(),
1481        right: right.get_source(),
1482        result_id,
1483    }));
1484
1485    drop(inner);
1486    left.create_pending(result_id, len)
1487}
1488
1489impl<T> Add for LVec<T>
1490where
1491    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1492{
1493    type Output = Self;
1494    
1495    /// Adds two vectors element-wise (recorded operation).
1496    fn add(self, other: Self) -> Self::Output {
1497        record_binary_op(&self, &other, BinaryOpType::Add)
1498    }
1499}
1500
1501impl<T> Add for &LVec<T>
1502where
1503    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1504{
1505    type Output = LVec<T>;
1506    
1507    /// Adds two vectors element-wise (recorded operation).
1508    fn add(self, other: Self) -> Self::Output {
1509        record_binary_op(self, other, BinaryOpType::Add)
1510    }
1511}
1512
1513impl<T> Sub for LVec<T>
1514where
1515    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1516{
1517    type Output = Self;
1518    
1519    /// Subtracts two vectors element-wise (recorded operation).
1520    fn sub(self, other: Self) -> Self::Output {
1521        record_binary_op(&self, &other, BinaryOpType::Sub)
1522    }
1523}
1524
1525impl<T> Sub for &LVec<T>
1526where
1527    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1528{
1529    type Output = LVec<T>;
1530    
1531    /// Subtracts two vectors element-wise (recorded operation).
1532    fn sub(self, other: Self) -> Self::Output {
1533        record_binary_op(self, other, BinaryOpType::Sub)
1534    }
1535}
1536
1537impl<T> Mul for LVec<T>
1538where
1539    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1540{
1541    type Output = Self;
1542    
1543    /// Multiplies two vectors element-wise (recorded operation).
1544    fn mul(self, other: Self) -> Self::Output {
1545        record_binary_op(&self, &other, BinaryOpType::Mul)
1546    }
1547}
1548
1549impl<T> Mul for &LVec<T>
1550where
1551    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1552{
1553    type Output = LVec<T>;
1554    
1555    /// Multiplies two vectors element-wise (recorded operation).
1556    fn mul(self, other: Self) -> Self::Output {
1557        record_binary_op(self, other, BinaryOpType::Mul)
1558    }
1559}
1560
1561impl<T> Div for LVec<T>
1562where
1563    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1564{
1565    type Output = Self;
1566    
1567    /// Divides two vectors element-wise (recorded operation).
1568    fn div(self, other: Self) -> Self::Output {
1569        record_binary_op(&self, &other, BinaryOpType::Div)
1570    }
1571}
1572
1573impl<T> Div for &LVec<T>
1574where
1575    T: Clone + Send + Sync + Add<Output = T> + Sub<Output = T> + Mul<Output = T> + Div<Output = T> + 'static,
1576{
1577    type Output = LVec<T>;
1578    
1579    /// Divides two vectors element-wise (recorded operation).
1580    fn div(self, other: Self) -> Self::Output {
1581        record_binary_op(self, other, BinaryOpType::Div)
1582    }
1583}
1584
1585impl<T: Clone + Send + Sync + std::fmt::Debug + 'static> std::fmt::Debug for LVec<T> {
1586    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1587        if self.is_pending() {
1588            write!(f, "LVec<pending>")
1589        } else {
1590            write!(f, "LVec({:?})", self.as_slice())
1591        }
1592    }
1593}