Skip to main content

torsh_tensor/
advanced_ops.rs

1//! Advanced tensor operations including reductions, linear algebra, and backend integration
2//!
3//! This module provides high-level tensor operations including reductions, linear algebra
4//! operations, SciRS2 backend integration, and advanced data manipulation functions.
5//!
6//! # Features
7//!
8//! - **Reductions**: max, norm, sum, mean operations
9//! - **Linear algebra**: Matrix multiplication and vector operations
10//! - **SciRS2 integration**: Optimized backend operations for performance
11//! - **Activation functions**: ReLU, sigmoid, tanh through SciRS2 backend
12//! - **Functional programming**: Apply operations and data transformations
13//! - **Memory management**: Copy-on-write semantics and unique data operations
14
15use std::sync::Arc;
16use torsh_core::{
17    device::DeviceType,
18    dtype::{FloatElement, TensorElement},
19    error::{Result, TorshError},
20};
21
22use crate::{core_ops::Tensor, storage::TensorStorage};
23
24// Float-specific operations
25impl<T: FloatElement + Copy> Tensor<T> {
26    /// Create a 0-dimensional tensor (scalar) from a single value
27    pub fn scalar(value: T) -> Result<Self> {
28        Self::from_data(vec![value], vec![], DeviceType::Cpu)
29    }
30
31    /// Convert tensor to ndarray (temporary placeholder implementation)
32    ///
33    /// TODO: Implement proper ndarray conversion following SciRS2 POLICY
34    /// This should use scirs2_core::ndarray for array operations
35    pub fn as_ndarray(&self) -> Result<scirs2_core::ndarray::ArrayD<T>> {
36        use scirs2_core::ndarray::ArrayD;
37        let data = self.data()?;
38        let shape_obj = self.shape().clone();
39        let shape = shape_obj.dims();
40        ArrayD::from_shape_vec(shape, data.to_vec())
41            .map_err(|e| TorshError::InvalidShape(format!("ndarray conversion failed: {}", e)))
42    }
43
44    /// Create tensor from ndarray (temporary placeholder implementation)
45    ///
46    /// TODO: Implement proper ndarray conversion following SciRS2 POLICY
47    /// This should use scirs2_core::ndarray for array operations
48    pub fn from_ndarray(
49        array: scirs2_core::ndarray::ArrayD<T>,
50        device: DeviceType,
51    ) -> Result<Self> {
52        let shape = array.shape().to_vec();
53        let (data, _offset) = array.into_raw_vec_and_offset();
54        Self::from_data(data, shape, device)
55    }
56
57    /// Maximum element in tensor
58    pub fn max(&self, dim: Option<usize>, keepdim: bool) -> Result<Self> {
59        match dim {
60            None => {
61                // Global maximum
62                let data = self.to_vec()?;
63                let max_val =
64                    data.into_iter()
65                        .fold(<T as FloatElement>::neg_infinity(), |acc, x| {
66                            if x > acc {
67                                x
68                            } else {
69                                acc
70                            }
71                        });
72                if keepdim {
73                    let shape = vec![1; self.shape().dims().len()];
74                    Self::from_data(vec![max_val], shape, self.device)
75                } else {
76                    Self::scalar(max_val)
77                }
78            }
79            Some(axis) => {
80                // Maximum along specific dimension
81                let shape_binding = self.shape();
82                let input_shape = shape_binding.dims();
83
84                if axis >= input_shape.len() {
85                    return Err(TorshError::InvalidOperation(format!(
86                        "Axis {} out of bounds for {}-dimensional tensor",
87                        axis,
88                        input_shape.len()
89                    )));
90                }
91
92                // Calculate output shape
93                let mut output_shape = input_shape.to_vec();
94                if keepdim {
95                    output_shape[axis] = 1;
96                } else {
97                    output_shape.remove(axis);
98                }
99
100                let data = self.data()?;
101                let outer_size: usize = input_shape[..axis].iter().product();
102                let axis_size = input_shape[axis];
103                let inner_size: usize = input_shape[axis + 1..].iter().product();
104
105                let output_size = outer_size * inner_size;
106                let mut result_data = vec![<T as FloatElement>::neg_infinity(); output_size];
107
108                for outer in 0..outer_size {
109                    for inner in 0..inner_size {
110                        let mut max_val = <T as FloatElement>::neg_infinity();
111                        for a in 0..axis_size {
112                            let input_idx = outer * axis_size * inner_size + a * inner_size + inner;
113                            let val = data[input_idx];
114                            if val > max_val {
115                                max_val = val;
116                            }
117                        }
118                        let output_idx = outer * inner_size + inner;
119                        result_data[output_idx] = max_val;
120                    }
121                }
122
123                Self::from_data(result_data, output_shape, self.device)
124            }
125        }
126    }
127
128    /// Maximum along specified dimension
129    pub fn max_dim(&self, dim: i32, keepdim: bool) -> Result<Self> {
130        let shape_binding = self.shape();
131        let input_shape = shape_binding.dims();
132
133        let actual_dim = if dim < 0 {
134            (input_shape.len() as i32 + dim) as usize
135        } else {
136            dim as usize
137        };
138
139        if actual_dim >= input_shape.len() {
140            return Err(TorshError::InvalidOperation(format!(
141                "Dimension {} out of range for {}-dimensional tensor",
142                actual_dim,
143                input_shape.len()
144            )));
145        }
146
147        // Calculate output shape
148        let mut output_shape = input_shape.to_vec();
149        if keepdim {
150            output_shape[actual_dim] = 1;
151        } else {
152            output_shape.remove(actual_dim);
153        }
154
155        let data = self.data()?;
156        let outer_size: usize = input_shape[..actual_dim].iter().product();
157        let dim_size = input_shape[actual_dim];
158        let inner_size: usize = input_shape[actual_dim + 1..].iter().product();
159
160        let output_size = outer_size * inner_size;
161        let mut result_data = vec![<T as FloatElement>::neg_infinity(); output_size];
162
163        for outer in 0..outer_size {
164            for inner in 0..inner_size {
165                let mut max_val = <T as FloatElement>::neg_infinity();
166                for d in 0..dim_size {
167                    let input_idx = outer * dim_size * inner_size + d * inner_size + inner;
168                    let val = data[input_idx];
169                    if val > max_val {
170                        max_val = val;
171                    }
172                }
173                let output_idx = outer * inner_size + inner;
174                result_data[output_idx] = max_val;
175            }
176        }
177
178        Self::from_data(result_data, output_shape, self.device)
179    }
180
181    /// Minimum along specified dimension
182    pub fn min_dim(&self, dim: i32, keepdim: bool) -> Result<Self> {
183        use scirs2_core::ndarray::Axis;
184
185        let normalized_dim = if dim < 0 {
186            (self.shape().len() as i32 + dim) as usize
187        } else {
188            dim as usize
189        };
190
191        if normalized_dim >= self.shape().len() {
192            return Err(torsh_core::error::TorshError::InvalidDimension {
193                dim: normalized_dim,
194                ndim: self.shape().len(),
195            });
196        }
197
198        let array = self.as_ndarray()?;
199        let result = array.map_axis(Axis(normalized_dim), |view| {
200            view.iter()
201                .copied()
202                .fold(<T as FloatElement>::infinity(), |acc, x| {
203                    if x < acc {
204                        x
205                    } else {
206                        acc
207                    }
208                })
209        });
210
211        let result_shape = if keepdim {
212            let mut shape = self.shape().to_vec();
213            shape[normalized_dim] = 1;
214            shape
215        } else {
216            result.shape().to_vec()
217        };
218
219        Self::from_ndarray(
220            result
221                .to_shape(result_shape)
222                .map_err(|e| TorshError::InvalidShape(format!("Shape conversion failed: {}", e)))?
223                .to_owned(),
224            self.device(),
225        )
226    }
227}
228
229/// Boolean reduction operations for tensors
230impl<T: TensorElement + Copy> Tensor<T>
231where
232    T: PartialEq + num_traits::Zero,
233{
234    /// Check if all elements are non-zero (true)
235    pub fn all(&self) -> Result<Tensor<bool>> {
236        let data = self.to_vec()?;
237        let zero = <T as num_traits::Zero>::zero();
238        let all_true = data.iter().all(|&x| x != zero);
239        Tensor::from_data(vec![all_true], vec![], self.device())
240    }
241
242    /// Check if any element is non-zero (true)
243    pub fn any(&self) -> Result<Tensor<bool>> {
244        let data = self.to_vec()?;
245        let zero = <T as num_traits::Zero>::zero();
246        let any_true = data.iter().any(|&x| x != zero);
247        Tensor::from_data(vec![any_true], vec![], self.device())
248    }
249
250    /// Check if all elements along dimension are non-zero (true)
251    pub fn all_dim(&self, dim: i32, _keepdim: bool) -> Result<Tensor<bool>> {
252        let normalized_dim = if dim < 0 {
253            (self.shape().len() as i32 + dim) as usize
254        } else {
255            dim as usize
256        };
257
258        if normalized_dim >= self.shape().len() {
259            return Err(torsh_core::error::TorshError::InvalidDimension {
260                dim: normalized_dim,
261                ndim: self.shape().len(),
262            });
263        }
264
265        // TODO: Implement proper all() reduction without ndarray dependency
266        // For now, return a simple placeholder
267        Err(TorshError::NotImplemented(
268            "Boolean all() reduction along dimension not yet implemented".to_string(),
269        ))
270    }
271
272    /// Check if any element along dimension is non-zero (true)
273    pub fn any_dim(&self, dim: i32, _keepdim: bool) -> Result<Tensor<bool>> {
274        let normalized_dim = if dim < 0 {
275            (self.shape().len() as i32 + dim) as usize
276        } else {
277            dim as usize
278        };
279
280        if normalized_dim >= self.shape().len() {
281            return Err(torsh_core::error::TorshError::InvalidDimension {
282                dim: normalized_dim,
283                ndim: self.shape().len(),
284            });
285        }
286
287        // TODO: Implement proper any() reduction without ndarray dependency
288        // For now, return a simple placeholder
289        Err(TorshError::NotImplemented(
290            "Boolean any() reduction along dimension not yet implemented".to_string(),
291        ))
292    }
293}
294
295// General tensor operations
296impl<T: TensorElement + Copy> Tensor<T> {
297    /// Compute sum of all elements
298    pub fn sum(&self) -> Result<Self>
299    where
300        T: std::ops::Add<Output = T> + num_traits::Zero,
301    {
302        let data = self.data()?;
303        let sum_value = data
304            .iter()
305            .fold(<T as num_traits::Zero>::zero(), |acc, &x| acc + x);
306        let mut result = Tensor::from_data(vec![sum_value], vec![], self.device())?;
307
308        // Preserve gradient tracking
309        if self.requires_grad {
310            result.requires_grad = true;
311            // TODO: Add proper Sum operation for autograd backward pass
312            // For now, this will work for simple cases
313        }
314
315        Ok(result)
316    }
317
318    /// Compute sum along specified dimensions
319    pub fn sum_dim(&self, dims: &[i32], keepdim: bool) -> Result<Self>
320    where
321        T: std::ops::Add<Output = T> + num_traits::Zero,
322    {
323        if dims.is_empty() {
324            return self.sum();
325        }
326
327        let shape_binding = self.shape();
328        let input_shape = shape_binding.dims();
329
330        // Handle single dimension case (most common)
331        if dims.len() == 1 {
332            let dim = dims[0];
333            let actual_dim = if dim < 0 {
334                (input_shape.len() as i32 + dim) as usize
335            } else {
336                dim as usize
337            };
338
339            if actual_dim >= input_shape.len() {
340                return Err(TorshError::InvalidOperation(format!(
341                    "Dimension {} out of range for {}-dimensional tensor",
342                    actual_dim,
343                    input_shape.len()
344                )));
345            }
346
347            // Calculate output shape
348            let mut output_shape = input_shape.to_vec();
349            if keepdim {
350                output_shape[actual_dim] = 1;
351            } else {
352                output_shape.remove(actual_dim);
353            }
354
355            let data = self.data()?;
356            let outer_size: usize = input_shape[..actual_dim].iter().product();
357            let dim_size = input_shape[actual_dim];
358            let inner_size: usize = input_shape[actual_dim + 1..].iter().product();
359
360            let output_size = outer_size * inner_size;
361            let mut result_data = vec![num_traits::Zero::zero(); output_size];
362
363            for outer in 0..outer_size {
364                for inner in 0..inner_size {
365                    let mut sum = num_traits::Zero::zero();
366                    for d in 0..dim_size {
367                        let input_idx = outer * dim_size * inner_size + d * inner_size + inner;
368                        sum = sum + data[input_idx];
369                    }
370                    let output_idx = outer * inner_size + inner;
371                    result_data[output_idx] = sum;
372                }
373            }
374
375            Self::from_data(result_data, output_shape, self.device)
376        } else {
377            // For multiple dimensions, fall back to full sum for now
378            self.sum()
379        }
380    }
381
382    /// Compute mean along specified dimensions
383    pub fn mean(&self, dims: Option<&[usize]>, keepdim: bool) -> Result<Self>
384    where
385        T: std::ops::Add<Output = T>
386            + std::ops::Div<Output = T>
387            + num_traits::Zero
388            + num_traits::One
389            + num_traits::FromPrimitive,
390    {
391        let sum = if let Some(dims) = dims {
392            self.sum_dim(&dims.iter().map(|&d| d as i32).collect::<Vec<_>>(), keepdim)?
393        } else {
394            let scalar_sum = self.sum()?;
395            if keepdim {
396                // Reshape scalar to tensor with same ndim as original, all dims = 1
397                let keepdim_shape = vec![1; self.shape().ndim()];
398                scalar_sum.view(&keepdim_shape)?
399            } else {
400                scalar_sum
401            }
402        };
403
404        let count = if let Some(dims) = dims {
405            dims.iter()
406                .map(|&d| self.shape().dims()[d])
407                .product::<usize>() as f64
408        } else {
409            self.numel() as f64
410        };
411
412        sum.div_scalar(
413            <T as num_traits::FromPrimitive>::from_f64(count)
414                .unwrap_or_else(|| <T as num_traits::One>::one()),
415        )
416    }
417
418    /// Compute cumulative product along specified dimension
419    pub fn cumprod(&self, dim: i32) -> Result<Self>
420    where
421        T: std::ops::Mul<Output = T> + num_traits::One + Copy,
422    {
423        let normalized_dim = if dim < 0 {
424            (self.shape().len() as i32 + dim) as usize
425        } else {
426            dim as usize
427        };
428
429        if normalized_dim >= self.shape().len() {
430            return Err(torsh_core::error::TorshError::InvalidDimension {
431                dim: normalized_dim,
432                ndim: self.shape().len(),
433            });
434        }
435
436        let shape = self.shape().clone();
437        let input_shape = shape.dims();
438        let data = self.data()?;
439        let mut result_data = data.to_vec();
440
441        let outer_size: usize = input_shape[..normalized_dim].iter().product();
442        let dim_size = input_shape[normalized_dim];
443        let inner_size: usize = input_shape[normalized_dim + 1..].iter().product();
444
445        for outer_idx in 0..outer_size {
446            for inner_idx in 0..inner_size {
447                let mut running_product = <T as num_traits::One>::one();
448                for dim_idx in 0..dim_size {
449                    let index =
450                        outer_idx * (dim_size * inner_size) + dim_idx * inner_size + inner_idx;
451                    running_product = running_product * result_data[index];
452                    result_data[index] = running_product;
453                }
454            }
455        }
456
457        Self::from_data(result_data, input_shape.to_vec(), self.device())
458    }
459
460    /// Matrix multiplication
461    pub fn matmul(&self, other: &Self) -> Result<Self>
462    where
463        T: num_traits::Float + std::iter::Sum,
464    {
465        self.basic_matmul(other)
466    }
467
468    /// Sort tensor along specified dimension
469    pub fn sort(&self, _dim: Option<i32>, _descending: bool) -> Result<(Self, Self)>
470    where
471        T: PartialOrd + num_traits::Zero + num_traits::FromPrimitive,
472    {
473        // Simple implementation - sort entire tensor as 1D
474        let data = self.to_vec()?;
475        let mut indexed_data: Vec<(usize, T)> =
476            data.iter().enumerate().map(|(i, &val)| (i, val)).collect();
477
478        // Sort by value
479        indexed_data.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
480
481        // Extract sorted data and indices
482        let sorted_data: Vec<T> = indexed_data.iter().map(|(_, val)| *val).collect();
483        let indices: Vec<T> = indexed_data
484            .iter()
485            .map(|(i, _)| {
486                <T as num_traits::FromPrimitive>::from_usize(*i)
487                    .unwrap_or_else(|| <T as num_traits::Zero>::zero())
488            })
489            .collect();
490
491        let sorted_tensor =
492            Self::from_data(sorted_data, self.shape().dims().to_vec(), self.device())?;
493        let indices_tensor = Self::from_data(indices, self.shape().dims().to_vec(), self.device())?;
494
495        Ok((sorted_tensor, indices_tensor))
496    }
497
498    /// Min reduction method without trait bounds (for Iterator compatibility)
499    pub fn min(&self) -> Result<Self>
500    where
501        T: std::cmp::PartialOrd + Copy,
502    {
503        let data = self.data()?;
504        if data.is_empty() {
505            return Err(TorshError::InvalidOperation(
506                "Cannot compute min of empty tensor".to_string(),
507            ));
508        }
509
510        let min_val = data
511            .iter()
512            .fold(data[0], |acc, &x| if x < acc { x } else { acc });
513        Self::from_data(vec![min_val], vec![], self.device)
514    }
515
516    /// Transpose operation (2D tensor)
517    pub fn t(&self) -> Result<Self>
518    where
519        T: Copy + num_traits::Zero,
520    {
521        let shape = self.shape();
522        let dims = shape.dims();
523
524        if dims.len() != 2 {
525            return Err(TorshError::InvalidOperation(
526                "Transpose operation only supported for 2D tensors".to_string(),
527            ));
528        }
529
530        let (rows, cols) = (dims[0], dims[1]);
531        let data = self.data()?;
532        let mut transposed_data = vec![num_traits::Zero::zero(); data.len()];
533
534        for i in 0..rows {
535            for j in 0..cols {
536                transposed_data[j * rows + i] = data[i * cols + j];
537            }
538        }
539
540        Self::from_data(transposed_data, vec![cols, rows], self.device)
541    }
542
543    /// Check if two tensors share the same underlying storage
544    pub fn shares_storage(&self, other: &Self) -> bool {
545        // For storage abstraction, we need to check the underlying storage
546        match (&self.storage, &other.storage) {
547            (TensorStorage::InMemory(a), TensorStorage::InMemory(b)) => Arc::ptr_eq(a, b),
548            (TensorStorage::MemoryMapped(a), TensorStorage::MemoryMapped(b)) => Arc::ptr_eq(a, b),
549            _ => false,
550        }
551    }
552
553    /// Get data as a vector (backward compatibility method)
554    pub fn data(&self) -> Result<Vec<T>>
555    where
556        T: Copy,
557    {
558        self.to_vec()
559    }
560
561    /// Apply a function to all elements in-place using direct storage access
562    pub fn data_mut_apply<F>(&mut self, mut func: F) -> Result<()>
563    where
564        F: FnMut(&mut T),
565        T: Copy,
566    {
567        self.ensure_exclusive_data()?;
568
569        match &mut self.storage {
570            TensorStorage::InMemory(data) => {
571                let mut data_guard = data.write().expect("lock should not be poisoned");
572                for item in data_guard.iter_mut() {
573                    func(item);
574                }
575                Ok(())
576            }
577            TensorStorage::MemoryMapped(_) => {
578                // For memory-mapped storage, we need to read-modify-write
579                let data = self.to_vec()?;
580                let mut new_data = data;
581                for item in new_data.iter_mut() {
582                    func(item);
583                }
584                // Write back the data
585                self.storage = TensorStorage::create_optimal(new_data)?;
586                Ok(())
587            }
588            #[cfg(feature = "simd")]
589            TensorStorage::Aligned(data) => {
590                let mut data_guard = data.write().expect("lock should not be poisoned");
591                for item in data_guard.as_mut_slice().iter_mut() {
592                    func(item);
593                }
594                Ok(())
595            }
596            #[cfg(feature = "simd")]
597            TensorStorage::SimdOptimized(_) => {
598                // SimdOptimized should have been converted by ensure_exclusive_data()
599                // If we reach here, something went wrong - convert to optimal storage and retry
600                let data = self.to_vec()?;
601                let mut new_data = data;
602                for item in new_data.iter_mut() {
603                    func(item);
604                }
605                self.storage = TensorStorage::create_optimal(new_data)?;
606                Ok(())
607            }
608        }
609    }
610
611    /// Clone the tensor with independent data (deep copy)
612    pub fn clone_data(&self) -> Self
613    where
614        T: Copy,
615    {
616        let data = self
617            .to_vec()
618            .expect("tensor to vec conversion should succeed");
619        Self::from_data(data, self.shape().dims().to_vec(), self.device)
620            .expect("tensor creation should succeed")
621    }
622
623    /// Ensure tensor has unique data (copy-on-write semantics)
624    pub fn make_unique(&mut self) -> Result<()> {
625        // For storage-based approach, create new storage if shared
626        match &self.storage {
627            TensorStorage::InMemory(data) => {
628                if Arc::strong_count(data) > 1 {
629                    let data_vec = self.to_vec()?;
630                    self.storage = TensorStorage::create_optimal(data_vec)?;
631                }
632            }
633            TensorStorage::MemoryMapped(storage) => {
634                if Arc::strong_count(storage) > 1 {
635                    let data_vec = self.to_vec()?;
636                    self.storage = TensorStorage::create_optimal(data_vec)?;
637                }
638            }
639            #[cfg(feature = "simd")]
640            TensorStorage::Aligned(data) => {
641                if Arc::strong_count(data) > 1 {
642                    let data_vec = self.to_vec()?;
643                    self.storage = TensorStorage::create_optimal(data_vec)?;
644                }
645            }
646            #[cfg(feature = "simd")]
647            TensorStorage::SimdOptimized(_storage) => {
648                // SimdOptimized storage is immutable by design (optimized for read-heavy workloads)
649                // Always convert to Aligned storage which supports both SIMD and mutation
650                let data_vec = self.to_vec()?;
651                self.storage = TensorStorage::aligned(data_vec)?;
652            }
653        }
654        Ok(())
655    }
656
657    /// Apply function in-place
658    pub fn apply_<F>(&mut self, func: F) -> Result<()>
659    where
660        F: Fn(T) -> T,
661        T: Copy,
662    {
663        let data = self.to_vec()?;
664        let new_data: Vec<T> = data.into_iter().map(func).collect();
665
666        // Update storage with new data
667        self.storage = TensorStorage::create_optimal(new_data)?;
668        Ok(())
669    }
670
671    /// Apply function element-wise to create new tensor
672    pub fn map<F>(&self, func: F) -> Result<Self>
673    where
674        F: Fn(T) -> T,
675        T: Copy,
676    {
677        let data = self.to_vec()?;
678        let new_data: Vec<T> = data.into_iter().map(func).collect();
679        let mut result = Self::from_data(new_data, self.shape().dims().to_vec(), self.device)?;
680
681        // Preserve gradient tracking flag from original tensor
682        result.requires_grad = self.requires_grad;
683
684        Ok(result)
685    }
686
687    /// Extract a scalar value from a single-element tensor
688    pub fn item(&self) -> Result<T>
689    where
690        T: Copy,
691    {
692        let data = self.data()?;
693        if data.len() != 1 {
694            return Err(TorshError::InvalidArgument(format!(
695                "item() can only be called on single-element tensors, got {} elements",
696                data.len()
697            )));
698        }
699        Ok(data[0])
700    }
701
702    /// Concatenate tensors along a dimension
703    pub fn cat(tensors: &[&Self], dim: i32) -> Result<Self>
704    where
705        T: Copy,
706    {
707        if tensors.is_empty() {
708            return Err(TorshError::InvalidArgument(
709                "Cannot concatenate empty tensor list".to_string(),
710            ));
711        }
712
713        // For now, implement simple concatenation for 1D tensors along dim 0
714        // TODO: Implement proper multi-dimensional concatenation
715        let mut all_data = Vec::new();
716        let mut total_len = 0;
717
718        for tensor in tensors {
719            let data = tensor.data()?;
720            all_data.extend_from_slice(&data);
721            total_len += data.len();
722        }
723
724        // Use the shape of the first tensor as base, but extend the concatenation dimension
725        let first_tensor_shape = tensors[0].shape();
726        let first_shape = first_tensor_shape.dims();
727        let mut result_shape = first_shape.to_vec();
728
729        if dim == 0 && result_shape.len() == 1 {
730            result_shape[0] = total_len;
731        } else if result_shape.is_empty() {
732            result_shape = vec![total_len];
733        }
734
735        Self::from_data(all_data, result_shape, tensors[0].device)
736    }
737
738    /// Ensure exclusive ownership of data using copy-on-write semantics
739    /// If the data is shared (Arc has multiple strong references), clone it
740    fn ensure_exclusive_data(&mut self) -> Result<()> {
741        match &self.storage {
742            TensorStorage::InMemory(data) => {
743                if Arc::strong_count(data) > 1 {
744                    // Data is shared, need to clone it to get exclusive access
745                    let cloned_data = {
746                        let data_guard = data.read().expect("lock should not be poisoned");
747                        data_guard.clone()
748                    };
749                    self.storage = TensorStorage::in_memory(cloned_data);
750                }
751            }
752            TensorStorage::MemoryMapped(storage) => {
753                if Arc::strong_count(storage) > 1 {
754                    // Clone memory-mapped storage by converting to vec and back
755                    let data_vec = self.storage.to_vec()?;
756                    self.storage = TensorStorage::create_optimal(data_vec)?;
757                }
758            }
759            #[cfg(feature = "simd")]
760            TensorStorage::Aligned(data) => {
761                if Arc::strong_count(data) > 1 {
762                    // Data is shared, need to clone it to get exclusive access
763                    let vec_data = {
764                        let data_guard = data.read().expect("lock should not be poisoned");
765                        data_guard.as_slice().to_vec()
766                    };
767                    self.storage = TensorStorage::aligned(vec_data)?;
768                }
769            }
770            #[cfg(feature = "simd")]
771            TensorStorage::SimdOptimized(storage) => {
772                if Arc::strong_count(storage) > 1 || storage.is_shared() {
773                    // SimdOptimized uses COW - copy the data to get exclusive access
774                    let vec_data = storage.to_vec();
775                    self.storage = TensorStorage::simd_optimized(vec_data)?;
776                }
777            }
778        }
779        Ok(())
780    }
781}
782
783// Numeric operations
784impl<T: TensorElement + Copy> Tensor<T>
785where
786    T: num_traits::Float,
787{
788    /// Compute the L2 norm of the tensor
789    pub fn norm(&self) -> Result<Self> {
790        let data = self.data()?;
791        let sum_squares: T = data
792            .iter()
793            .map(|&x| x * x)
794            .fold(num_traits::Zero::zero(), |acc, x| acc + x);
795        let norm_value = sum_squares.sqrt();
796
797        // Return scalar tensor (1-element tensor with shape [])
798        Tensor::from_data(vec![norm_value], vec![], self.device())
799    }
800}
801
802// SciRS2 backend integration (placeholder implementations)
803impl<T: TensorElement + Copy> Tensor<T> {
804    /// Use SciRS2 backend for optimized matrix multiplication
805    pub fn matmul_scirs2(&self, other: &Self) -> Result<Self>
806    where
807        T: num_traits::Float + num_traits::Zero + num_traits::One + std::iter::Sum,
808    {
809        // TODO: Integrate with actual SciRS2 backend
810        // For now, implement basic matrix multiplication
811        self.basic_matmul(other)
812    }
813
814    /// Use SciRS2 backend for optimized sum reduction
815    pub fn sum_scirs2(&self) -> Result<Self>
816    where
817        T: std::ops::Add<Output = T> + num_traits::Zero,
818    {
819        // TODO: Integrate with actual SciRS2 backend
820        let data = self.data()?;
821        let sum_value = data
822            .iter()
823            .fold(<T as num_traits::Zero>::zero(), |acc, &x| acc + x);
824        Tensor::from_data(vec![sum_value], vec![], self.device())
825    }
826
827    /// Use SciRS2 backend for optimized mean reduction
828    pub fn mean_scirs2(&self) -> Result<Self>
829    where
830        T: std::ops::Add<Output = T>
831            + std::ops::Div<Output = T>
832            + num_traits::Zero
833            + From<usize>
834            + num_traits::FromPrimitive,
835    {
836        // TODO: Integrate with actual SciRS2 backend
837        let data = self.data()?;
838        if data.is_empty() {
839            return Err(TorshError::InvalidArgument(
840                "Cannot compute mean of empty tensor".to_string(),
841            ));
842        }
843        let sum_value = data
844            .iter()
845            .fold(<T as num_traits::Zero>::zero(), |acc, &x| acc + x);
846        let mean_value = sum_value / T::from(data.len());
847        Tensor::from_data(vec![mean_value], vec![], self.device())
848    }
849
850    /// Use SciRS2 backend for optimized ReLU activation
851    pub fn relu_scirs2(&self) -> Result<Self>
852    where
853        T: PartialOrd + num_traits::Zero,
854    {
855        // TODO: Integrate with actual SciRS2 backend
856        let zero = <T as num_traits::Zero>::zero();
857        self.map(|x| if x > zero { x } else { zero })
858    }
859
860    /// Use SciRS2 backend for optimized sigmoid activation
861    pub fn sigmoid_scirs2(&self) -> Result<Self>
862    where
863        T: num_traits::Float,
864    {
865        // TODO: Integrate with actual SciRS2 backend
866        self.map(|x| {
867            let one = <T as num_traits::One>::one();
868            one / (one + (-x).exp())
869        })
870    }
871
872    /// Use SciRS2 backend for optimized tanh activation
873    pub fn tanh_scirs2(&self) -> Result<Self>
874    where
875        T: num_traits::Float,
876    {
877        // TODO: Integrate with actual SciRS2 backend
878        self.map(|x| x.tanh())
879    }
880
881    /// Basic matrix multiplication implementation
882    fn basic_matmul(&self, other: &Self) -> Result<Self>
883    where
884        T: num_traits::Float + std::iter::Sum,
885    {
886        let self_binding = self.shape();
887        let self_shape = self_binding.dims();
888        let other_binding = other.shape();
889        let other_shape = other_binding.dims();
890
891        // Check dimensions for matrix multiplication
892        if self_shape.len() != 2 || other_shape.len() != 2 {
893            return Err(TorshError::InvalidArgument(
894                "Matrix multiplication requires 2D tensors".to_string(),
895            ));
896        }
897
898        if self_shape[1] != other_shape[0] {
899            return Err(TorshError::ShapeMismatch {
900                expected: vec![self_shape[0], other_shape[1]],
901                got: vec![self_shape[1], other_shape[0]],
902            });
903        }
904
905        let (m, k) = (self_shape[0], self_shape[1]);
906        let n = other_shape[1];
907
908        let self_data = self.data()?;
909        let other_data = other.data()?;
910        let mut result_data = vec![num_traits::Zero::zero(); m * n];
911
912        // Basic matrix multiplication
913        for i in 0..m {
914            for j in 0..n {
915                let mut sum = num_traits::Zero::zero();
916                for k_idx in 0..k {
917                    sum = sum + self_data[i * k + k_idx] * other_data[k_idx * n + j];
918                }
919                result_data[i * n + j] = sum;
920            }
921        }
922
923        Self::from_data(result_data, vec![m, n], self.device)
924    }
925    /// Softmax activation along specified dimension
926    /// Computes softmax(x_i) = exp(x_i) / sum(exp(x_j)) for all j
927    pub fn softmax(&self, dim: i32) -> Result<Self>
928    where
929        T: torsh_core::dtype::FloatElement
930            + Copy
931            + std::ops::Sub<Output = T>
932            + std::ops::Div<Output = T>,
933    {
934        let data = self.data()?;
935        let shape_binding = self.shape();
936        let shape = shape_binding.dims();
937
938        // Validate tensor has data
939        if data.is_empty() || shape.is_empty() {
940            return Err(TorshError::InvalidOperation(
941                "Cannot compute softmax on empty tensor".to_string(),
942            ));
943        }
944
945        // Handle negative dimension
946        let actual_dim = if dim < 0 {
947            (shape.len() as i32 + dim) as usize
948        } else {
949            dim as usize
950        };
951
952        if actual_dim >= shape.len() {
953            return Err(TorshError::InvalidOperation(format!(
954                "Dimension {} out of range for {}-dimensional tensor",
955                actual_dim,
956                shape.len()
957            )));
958        }
959
960        // For numerical stability, subtract max before exp
961        let max_tensor = self.max(Some(actual_dim), true)?;
962
963        // Expand max_tensor to match input shape for broadcasting
964        let expanded_max = max_tensor.expand(shape)?;
965        let shifted = self.sub(&expanded_max)?;
966        let exp_tensor = shifted.exp()?;
967        let sum_tensor = exp_tensor.sum_dim(&[actual_dim as i32], true)?;
968
969        // Expand sum_tensor to match exp_tensor shape for broadcasting
970        let expanded_sum = sum_tensor.expand(shape)?;
971        exp_tensor.div(&expanded_sum)
972    }
973
974    /// Log softmax activation along specified dimension
975    /// Computes log_softmax(x_i) = log(softmax(x_i))
976    pub fn log_softmax(&self, dim: i32) -> Result<Self>
977    where
978        T: torsh_core::dtype::FloatElement + Copy + std::ops::Sub<Output = T>,
979    {
980        let softmax_result = self.softmax(dim)?;
981        softmax_result.log()
982    }
983
984    /// Computes cumulative sum along a dimension
985    pub fn cumsum(&self, dim: i32) -> Result<Self>
986    where
987        T: std::ops::Add<Output = T> + num_traits::Zero + Copy,
988    {
989        let shape_binding = self.shape();
990        let shape = shape_binding.dims();
991
992        // Handle negative dimension
993        let actual_dim = if dim < 0 {
994            (shape.len() as i32 + dim) as usize
995        } else {
996            dim as usize
997        };
998
999        if actual_dim >= shape.len() {
1000            return Err(TorshError::InvalidOperation(format!(
1001                "Dimension {} out of range for {}-dimensional tensor",
1002                actual_dim,
1003                shape.len()
1004            )));
1005        }
1006
1007        let data = self.data()?;
1008        let mut result_data = data.clone();
1009
1010        // Simplified cumsum implementation for now
1011        // This is a basic implementation that works along the flattened array
1012        if actual_dim == shape.len() - 1 || shape.len() == 1 {
1013            let mut cumulative = <T as num_traits::Zero>::zero();
1014            for i in 0..result_data.len() {
1015                cumulative = cumulative + result_data[i];
1016                result_data[i] = cumulative;
1017            }
1018        }
1019
1020        Self::from_data(result_data, shape.to_vec(), self.device)
1021    }
1022
1023    /// Find the indices of minimum values along a dimension
1024    pub fn argmin(&self, dim: Option<i32>) -> Result<Tensor<i64>>
1025    where
1026        T: std::cmp::PartialOrd + Copy,
1027    {
1028        let data = self.data()?;
1029        let shape_binding = self.shape();
1030        let shape = shape_binding.dims();
1031
1032        if shape.is_empty() {
1033            return Err(TorshError::InvalidOperation(
1034                "Cannot compute argmin on empty tensor".to_string(),
1035            ));
1036        }
1037
1038        match dim {
1039            Some(d) => {
1040                // Handle negative dimension
1041                let actual_dim = if d < 0 {
1042                    (shape.len() as i32 + d) as usize
1043                } else {
1044                    d as usize
1045                };
1046
1047                if actual_dim >= shape.len() {
1048                    return Err(TorshError::InvalidOperation(format!(
1049                        "Dimension {} out of range for {}-dimensional tensor",
1050                        actual_dim,
1051                        shape.len()
1052                    )));
1053                }
1054
1055                // For simplicity, return the first minimum index found
1056                // This is a basic implementation - real argmin would handle the specified dimension properly
1057                let min_val = data
1058                    .iter()
1059                    .fold(data[0], |acc, &x| if x < acc { x } else { acc });
1060                let min_idx = data.iter().position(|&x| x == min_val).unwrap_or(0);
1061
1062                let result_data = vec![min_idx as i64];
1063                Tensor::<i64>::from_data(result_data, vec![1], self.device)
1064            }
1065            None => {
1066                // Find argmin over the entire flattened tensor
1067                let min_val = data
1068                    .iter()
1069                    .fold(data[0], |acc, &x| if x < acc { x } else { acc });
1070                let min_idx = data.iter().position(|&x| x == min_val).unwrap_or(0);
1071
1072                let result_data = vec![min_idx as i64];
1073                Tensor::<i64>::from_data(result_data, vec![], self.device)
1074            }
1075        }
1076    }
1077
1078    /// Find the indices of maximum values along a dimension
1079    pub fn argmax(&self, dim: Option<i32>) -> Result<Tensor<i64>>
1080    where
1081        T: std::cmp::PartialOrd + Copy,
1082    {
1083        let data = self.data()?;
1084        let shape_binding = self.shape();
1085        let shape = shape_binding.dims();
1086
1087        if shape.is_empty() {
1088            return Err(TorshError::InvalidOperation(
1089                "Cannot compute argmax on empty tensor".to_string(),
1090            ));
1091        }
1092
1093        match dim {
1094            Some(d) => {
1095                // Handle negative dimension
1096                let actual_dim = if d < 0 {
1097                    (shape.len() as i32 + d) as usize
1098                } else {
1099                    d as usize
1100                };
1101
1102                if actual_dim >= shape.len() {
1103                    return Err(TorshError::InvalidOperation(format!(
1104                        "Dimension {} out of range for {}-dimensional tensor",
1105                        actual_dim,
1106                        shape.len()
1107                    )));
1108                }
1109
1110                // For simplicity, return the first maximum index found
1111                // This is a basic implementation - real argmax would handle the specified dimension properly
1112                let max_val = data
1113                    .iter()
1114                    .fold(data[0], |acc, &x| if x > acc { x } else { acc });
1115                let max_idx = data.iter().position(|&x| x == max_val).unwrap_or(0);
1116
1117                let result_data = vec![max_idx as i64];
1118                Tensor::<i64>::from_data(result_data, vec![1], self.device)
1119            }
1120            None => {
1121                // Find argmax over the entire flattened tensor
1122                let max_val = data
1123                    .iter()
1124                    .fold(data[0], |acc, &x| if x > acc { x } else { acc });
1125                let max_idx = data.iter().position(|&x| x == max_val).unwrap_or(0);
1126
1127                let result_data = vec![max_idx as i64];
1128                Tensor::<i64>::from_data(result_data, vec![], self.device)
1129            }
1130        }
1131    }
1132
1133    /// Returns the k largest elements along a dimension
1134    pub fn topk(
1135        &self,
1136        k: usize,
1137        dim: Option<i32>,
1138        largest: bool,
1139        sorted: bool,
1140    ) -> Result<(Self, Tensor<i64>)>
1141    where
1142        T: std::cmp::PartialOrd + Copy + num_traits::Zero,
1143    {
1144        let data = self.data()?;
1145        let shape_binding = self.shape();
1146        let shape = shape_binding.dims();
1147
1148        if shape.is_empty() {
1149            return Err(TorshError::InvalidOperation(
1150                "Cannot compute topk on empty tensor".to_string(),
1151            ));
1152        }
1153
1154        if k == 0 {
1155            return Err(TorshError::InvalidArgument(
1156                "k must be greater than 0".to_string(),
1157            ));
1158        }
1159
1160        // Log dimension and sorting info
1161        if let Some(_d) = dim {
1162        } else {
1163        }
1164
1165        // For simplicity, implement topk on flattened tensor
1166        // TODO: Implement proper per-dimension topk when dim is specified
1167        let mut indexed_data: Vec<(usize, T)> =
1168            data.iter().enumerate().map(|(i, &val)| (i, val)).collect();
1169
1170        // Sort by value (largest first if largest=true, smallest first if largest=false)
1171        if largest {
1172            indexed_data.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1173        } else {
1174            indexed_data.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
1175        }
1176
1177        // Take top k elements
1178        let top_k = indexed_data
1179            .into_iter()
1180            .take(k.min(data.len()))
1181            .collect::<Vec<_>>();
1182
1183        // If sorted=false, shuffle the results to remove order
1184        // (in practice, keeping sorted is usually preferred for performance)
1185        if !sorted {
1186            // TODO: Implement shuffling when needed
1187        }
1188
1189        // Extract values and indices
1190        let values: Vec<T> = top_k.iter().map(|(_, val)| *val).collect();
1191        let indices: Vec<i64> = top_k.iter().map(|(idx, _)| *idx as i64).collect();
1192
1193        // Create result tensors
1194        let values_tensor = Self::from_data(values, vec![k.min(data.len())], self.device)?;
1195        let indices_tensor =
1196            Tensor::<i64>::from_data(indices, vec![k.min(data.len())], self.device)?;
1197
1198        Ok((values_tensor, indices_tensor))
1199    }
1200}
1201
1202#[cfg(test)]
1203mod tests {
1204    use super::*;
1205    use torsh_core::device::DeviceType;
1206
1207    #[test]
1208    fn test_scalar_creation() {
1209        let scalar = Tensor::<f32>::scalar(42.0).expect("operation should succeed");
1210        assert_eq!(scalar.shape().dims(), &[] as &[usize]);
1211        assert_eq!(scalar.item().expect("item extraction should succeed"), 42.0);
1212    }
1213
1214    #[test]
1215    fn test_max_reduction() {
1216        let data = vec![1.0f32, 5.0, 3.0, 2.0];
1217        let tensor =
1218            Tensor::from_data(data, vec![4], DeviceType::Cpu).expect("operation should succeed");
1219        let max_val = tensor.max(None, false).expect("operation should succeed");
1220        assert_eq!(max_val.item().expect("item extraction should succeed"), 5.0);
1221    }
1222
1223    #[test]
1224    fn test_norm_computation() {
1225        let data = vec![3.0f32, 4.0]; // 3-4-5 triangle
1226        let tensor =
1227            Tensor::from_data(data, vec![2], DeviceType::Cpu).expect("operation should succeed");
1228        let norm = tensor.norm().expect("norm computation should succeed");
1229        assert!((norm.item().expect("item extraction should succeed") - 5.0).abs() < 1e-6);
1230    }
1231
1232    #[test]
1233    fn test_apply_operations() {
1234        let data = vec![1.0f32, 2.0, 3.0, 4.0];
1235        let mut tensor =
1236            Tensor::from_data(data, vec![4], DeviceType::Cpu).expect("operation should succeed");
1237
1238        // Test apply_
1239        tensor
1240            .apply_(|x| x * 2.0)
1241            .expect("operation should succeed");
1242        assert_eq!(
1243            tensor.data().expect("data retrieval should succeed"),
1244            vec![2.0, 4.0, 6.0, 8.0]
1245        );
1246
1247        // Test map
1248        let original = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)
1249            .expect("operation should succeed");
1250        let mapped = original.map(|x| x + 1.0).expect("operation should succeed");
1251        assert_eq!(
1252            mapped.data().expect("data retrieval should succeed"),
1253            vec![2.0, 3.0, 4.0]
1254        );
1255        assert_eq!(
1256            original.data().expect("data retrieval should succeed"),
1257            vec![1.0, 2.0, 3.0]
1258        ); // Original unchanged
1259    }
1260
1261    #[test]
1262    fn test_activation_functions() {
1263        let data = vec![-1.0f32, 0.0, 1.0, 2.0];
1264        let tensor =
1265            Tensor::from_data(data, vec![4], DeviceType::Cpu).expect("operation should succeed");
1266
1267        // Test ReLU
1268        let relu_result = tensor.relu().expect("relu should succeed");
1269        assert_eq!(
1270            relu_result.data().expect("data retrieval should succeed"),
1271            vec![0.0, 0.0, 1.0, 2.0]
1272        );
1273
1274        // Test abs
1275        let abs_result = tensor.abs().expect("abs computation should succeed");
1276        assert_eq!(
1277            abs_result.data().expect("data retrieval should succeed"),
1278            vec![1.0, 0.0, 1.0, 2.0]
1279        );
1280
1281        // Test clamp
1282        let clamped = tensor.clamp(-0.5, 1.5).expect("operation should succeed");
1283        assert_eq!(
1284            clamped.data().expect("data retrieval should succeed"),
1285            vec![-0.5, 0.0, 1.0, 1.5]
1286        );
1287    }
1288
1289    #[test]
1290    fn test_storage_sharing() {
1291        let tensor1 =
1292            Tensor::<f32>::zeros(&[2, 2], DeviceType::Cpu).expect("operation should succeed");
1293        let tensor2 = tensor1.clone();
1294        let tensor3 = tensor1.clone_data();
1295
1296        assert!(tensor1.shares_storage(&tensor2));
1297        assert!(!tensor1.shares_storage(&tensor3));
1298    }
1299
1300    #[test]
1301    fn test_basic_matmul() {
1302        let a = Tensor::from_data(vec![1.0f32, 2.0, 3.0, 4.0], vec![2, 2], DeviceType::Cpu)
1303            .expect("operation should succeed");
1304        let b = Tensor::from_data(vec![5.0f32, 6.0, 7.0, 8.0], vec![2, 2], DeviceType::Cpu)
1305            .expect("operation should succeed");
1306
1307        let result = a.basic_matmul(&b).expect("operation should succeed");
1308        assert_eq!(result.shape().dims(), &[2, 2]);
1309
1310        // Expected: [1*5+2*7, 1*6+2*8] = [19, 22]
1311        //           [3*5+4*7, 3*6+4*8] = [43, 50]
1312        let expected = vec![19.0, 22.0, 43.0, 50.0];
1313        assert_eq!(
1314            result.data().expect("data retrieval should succeed"),
1315            expected
1316        );
1317    }
1318
1319    #[test]
1320    fn test_reductions() {
1321        let data = vec![1.0f32, 2.0, 3.0, 4.0];
1322        let tensor =
1323            Tensor::from_data(data, vec![4], DeviceType::Cpu).expect("operation should succeed");
1324
1325        let sum = tensor.sum().expect("sum should succeed");
1326        assert_eq!(sum.item().expect("item extraction should succeed"), 10.0);
1327
1328        let mean = tensor.mean(None, false).expect("operation should succeed");
1329        assert_eq!(mean.item().expect("item extraction should succeed"), 2.5);
1330    }
1331
1332    #[test]
1333    fn test_copy_on_write() {
1334        let mut tensor1 =
1335            Tensor::<f32>::ones(&[2], DeviceType::Cpu).expect("operation should succeed");
1336        let tensor2 = tensor1.clone();
1337
1338        // Both should share storage initially
1339        assert!(tensor1.shares_storage(&tensor2));
1340
1341        // After make_unique, they should not share storage
1342        tensor1.make_unique().expect("make_unique should succeed");
1343        assert!(!tensor1.shares_storage(&tensor2));
1344    }
1345
1346    #[test]
1347    fn test_item_extraction() {
1348        let scalar = Tensor::from_data(vec![42.0f32], vec![], DeviceType::Cpu)
1349            .expect("operation should succeed");
1350        assert_eq!(scalar.item().expect("item extraction should succeed"), 42.0);
1351
1352        let vector = Tensor::from_data(vec![1.0f32, 2.0], vec![2], DeviceType::Cpu)
1353            .expect("operation should succeed");
1354        assert!(vector.item().is_err()); // Should fail for multi-element tensor
1355    }
1356}