1use std::sync::Arc;
16use torsh_core::{
17 device::DeviceType,
18 dtype::{FloatElement, TensorElement},
19 error::{Result, TorshError},
20};
21
22use crate::{core_ops::Tensor, storage::TensorStorage};
23
24impl<T: FloatElement + Copy> Tensor<T> {
26 pub fn scalar(value: T) -> Result<Self> {
28 Self::from_data(vec![value], vec![], DeviceType::Cpu)
29 }
30
31 pub fn as_ndarray(&self) -> Result<scirs2_core::ndarray::ArrayD<T>> {
36 use scirs2_core::ndarray::ArrayD;
37 let data = self.data()?;
38 let shape_obj = self.shape().clone();
39 let shape = shape_obj.dims();
40 ArrayD::from_shape_vec(shape, data.to_vec())
41 .map_err(|e| TorshError::InvalidShape(format!("ndarray conversion failed: {}", e)))
42 }
43
44 pub fn from_ndarray(
49 array: scirs2_core::ndarray::ArrayD<T>,
50 device: DeviceType,
51 ) -> Result<Self> {
52 let shape = array.shape().to_vec();
53 let (data, _offset) = array.into_raw_vec_and_offset();
54 Self::from_data(data, shape, device)
55 }
56
57 pub fn max(&self, dim: Option<usize>, keepdim: bool) -> Result<Self> {
59 match dim {
60 None => {
61 let data = self.to_vec()?;
63 let max_val =
64 data.into_iter()
65 .fold(<T as FloatElement>::neg_infinity(), |acc, x| {
66 if x > acc {
67 x
68 } else {
69 acc
70 }
71 });
72 if keepdim {
73 let shape = vec![1; self.shape().dims().len()];
74 Self::from_data(vec![max_val], shape, self.device)
75 } else {
76 Self::scalar(max_val)
77 }
78 }
79 Some(axis) => {
80 let shape_binding = self.shape();
82 let input_shape = shape_binding.dims();
83
84 if axis >= input_shape.len() {
85 return Err(TorshError::InvalidOperation(format!(
86 "Axis {} out of bounds for {}-dimensional tensor",
87 axis,
88 input_shape.len()
89 )));
90 }
91
92 let mut output_shape = input_shape.to_vec();
94 if keepdim {
95 output_shape[axis] = 1;
96 } else {
97 output_shape.remove(axis);
98 }
99
100 let data = self.data()?;
101 let outer_size: usize = input_shape[..axis].iter().product();
102 let axis_size = input_shape[axis];
103 let inner_size: usize = input_shape[axis + 1..].iter().product();
104
105 let output_size = outer_size * inner_size;
106 let mut result_data = vec![<T as FloatElement>::neg_infinity(); output_size];
107
108 for outer in 0..outer_size {
109 for inner in 0..inner_size {
110 let mut max_val = <T as FloatElement>::neg_infinity();
111 for a in 0..axis_size {
112 let input_idx = outer * axis_size * inner_size + a * inner_size + inner;
113 let val = data[input_idx];
114 if val > max_val {
115 max_val = val;
116 }
117 }
118 let output_idx = outer * inner_size + inner;
119 result_data[output_idx] = max_val;
120 }
121 }
122
123 Self::from_data(result_data, output_shape, self.device)
124 }
125 }
126 }
127
128 pub fn max_dim(&self, dim: i32, keepdim: bool) -> Result<Self> {
130 let shape_binding = self.shape();
131 let input_shape = shape_binding.dims();
132
133 let actual_dim = if dim < 0 {
134 (input_shape.len() as i32 + dim) as usize
135 } else {
136 dim as usize
137 };
138
139 if actual_dim >= input_shape.len() {
140 return Err(TorshError::InvalidOperation(format!(
141 "Dimension {} out of range for {}-dimensional tensor",
142 actual_dim,
143 input_shape.len()
144 )));
145 }
146
147 let mut output_shape = input_shape.to_vec();
149 if keepdim {
150 output_shape[actual_dim] = 1;
151 } else {
152 output_shape.remove(actual_dim);
153 }
154
155 let data = self.data()?;
156 let outer_size: usize = input_shape[..actual_dim].iter().product();
157 let dim_size = input_shape[actual_dim];
158 let inner_size: usize = input_shape[actual_dim + 1..].iter().product();
159
160 let output_size = outer_size * inner_size;
161 let mut result_data = vec![<T as FloatElement>::neg_infinity(); output_size];
162
163 for outer in 0..outer_size {
164 for inner in 0..inner_size {
165 let mut max_val = <T as FloatElement>::neg_infinity();
166 for d in 0..dim_size {
167 let input_idx = outer * dim_size * inner_size + d * inner_size + inner;
168 let val = data[input_idx];
169 if val > max_val {
170 max_val = val;
171 }
172 }
173 let output_idx = outer * inner_size + inner;
174 result_data[output_idx] = max_val;
175 }
176 }
177
178 Self::from_data(result_data, output_shape, self.device)
179 }
180
181 pub fn min_dim(&self, dim: i32, keepdim: bool) -> Result<Self> {
183 use scirs2_core::ndarray::Axis;
184
185 let normalized_dim = if dim < 0 {
186 (self.shape().len() as i32 + dim) as usize
187 } else {
188 dim as usize
189 };
190
191 if normalized_dim >= self.shape().len() {
192 return Err(torsh_core::error::TorshError::InvalidDimension {
193 dim: normalized_dim,
194 ndim: self.shape().len(),
195 });
196 }
197
198 let array = self.as_ndarray()?;
199 let result = array.map_axis(Axis(normalized_dim), |view| {
200 view.iter()
201 .copied()
202 .fold(<T as FloatElement>::infinity(), |acc, x| {
203 if x < acc {
204 x
205 } else {
206 acc
207 }
208 })
209 });
210
211 let result_shape = if keepdim {
212 let mut shape = self.shape().to_vec();
213 shape[normalized_dim] = 1;
214 shape
215 } else {
216 result.shape().to_vec()
217 };
218
219 Self::from_ndarray(
220 result
221 .to_shape(result_shape)
222 .map_err(|e| TorshError::InvalidShape(format!("Shape conversion failed: {}", e)))?
223 .to_owned(),
224 self.device(),
225 )
226 }
227}
228
229impl<T: TensorElement + Copy> Tensor<T>
231where
232 T: PartialEq + num_traits::Zero,
233{
234 pub fn all(&self) -> Result<Tensor<bool>> {
236 let data = self.to_vec()?;
237 let zero = <T as num_traits::Zero>::zero();
238 let all_true = data.iter().all(|&x| x != zero);
239 Tensor::from_data(vec![all_true], vec![], self.device())
240 }
241
242 pub fn any(&self) -> Result<Tensor<bool>> {
244 let data = self.to_vec()?;
245 let zero = <T as num_traits::Zero>::zero();
246 let any_true = data.iter().any(|&x| x != zero);
247 Tensor::from_data(vec![any_true], vec![], self.device())
248 }
249
250 pub fn all_dim(&self, dim: i32, _keepdim: bool) -> Result<Tensor<bool>> {
252 let normalized_dim = if dim < 0 {
253 (self.shape().len() as i32 + dim) as usize
254 } else {
255 dim as usize
256 };
257
258 if normalized_dim >= self.shape().len() {
259 return Err(torsh_core::error::TorshError::InvalidDimension {
260 dim: normalized_dim,
261 ndim: self.shape().len(),
262 });
263 }
264
265 Err(TorshError::NotImplemented(
268 "Boolean all() reduction along dimension not yet implemented".to_string(),
269 ))
270 }
271
272 pub fn any_dim(&self, dim: i32, _keepdim: bool) -> Result<Tensor<bool>> {
274 let normalized_dim = if dim < 0 {
275 (self.shape().len() as i32 + dim) as usize
276 } else {
277 dim as usize
278 };
279
280 if normalized_dim >= self.shape().len() {
281 return Err(torsh_core::error::TorshError::InvalidDimension {
282 dim: normalized_dim,
283 ndim: self.shape().len(),
284 });
285 }
286
287 Err(TorshError::NotImplemented(
290 "Boolean any() reduction along dimension not yet implemented".to_string(),
291 ))
292 }
293}
294
295impl<T: TensorElement + Copy> Tensor<T> {
297 pub fn sum(&self) -> Result<Self>
299 where
300 T: std::ops::Add<Output = T> + num_traits::Zero,
301 {
302 let data = self.data()?;
303 let sum_value = data
304 .iter()
305 .fold(<T as num_traits::Zero>::zero(), |acc, &x| acc + x);
306 let mut result = Tensor::from_data(vec![sum_value], vec![], self.device())?;
307
308 if self.requires_grad {
310 result.requires_grad = true;
311 }
314
315 Ok(result)
316 }
317
318 pub fn sum_dim(&self, dims: &[i32], keepdim: bool) -> Result<Self>
320 where
321 T: std::ops::Add<Output = T> + num_traits::Zero,
322 {
323 if dims.is_empty() {
324 return self.sum();
325 }
326
327 let shape_binding = self.shape();
328 let input_shape = shape_binding.dims();
329
330 if dims.len() == 1 {
332 let dim = dims[0];
333 let actual_dim = if dim < 0 {
334 (input_shape.len() as i32 + dim) as usize
335 } else {
336 dim as usize
337 };
338
339 if actual_dim >= input_shape.len() {
340 return Err(TorshError::InvalidOperation(format!(
341 "Dimension {} out of range for {}-dimensional tensor",
342 actual_dim,
343 input_shape.len()
344 )));
345 }
346
347 let mut output_shape = input_shape.to_vec();
349 if keepdim {
350 output_shape[actual_dim] = 1;
351 } else {
352 output_shape.remove(actual_dim);
353 }
354
355 let data = self.data()?;
356 let outer_size: usize = input_shape[..actual_dim].iter().product();
357 let dim_size = input_shape[actual_dim];
358 let inner_size: usize = input_shape[actual_dim + 1..].iter().product();
359
360 let output_size = outer_size * inner_size;
361 let mut result_data = vec![num_traits::Zero::zero(); output_size];
362
363 for outer in 0..outer_size {
364 for inner in 0..inner_size {
365 let mut sum = num_traits::Zero::zero();
366 for d in 0..dim_size {
367 let input_idx = outer * dim_size * inner_size + d * inner_size + inner;
368 sum = sum + data[input_idx];
369 }
370 let output_idx = outer * inner_size + inner;
371 result_data[output_idx] = sum;
372 }
373 }
374
375 Self::from_data(result_data, output_shape, self.device)
376 } else {
377 self.sum()
379 }
380 }
381
382 pub fn mean(&self, dims: Option<&[usize]>, keepdim: bool) -> Result<Self>
384 where
385 T: std::ops::Add<Output = T>
386 + std::ops::Div<Output = T>
387 + num_traits::Zero
388 + num_traits::One
389 + num_traits::FromPrimitive,
390 {
391 let sum = if let Some(dims) = dims {
392 self.sum_dim(&dims.iter().map(|&d| d as i32).collect::<Vec<_>>(), keepdim)?
393 } else {
394 let scalar_sum = self.sum()?;
395 if keepdim {
396 let keepdim_shape = vec![1; self.shape().ndim()];
398 scalar_sum.view(&keepdim_shape)?
399 } else {
400 scalar_sum
401 }
402 };
403
404 let count = if let Some(dims) = dims {
405 dims.iter()
406 .map(|&d| self.shape().dims()[d])
407 .product::<usize>() as f64
408 } else {
409 self.numel() as f64
410 };
411
412 sum.div_scalar(
413 <T as num_traits::FromPrimitive>::from_f64(count)
414 .unwrap_or_else(|| <T as num_traits::One>::one()),
415 )
416 }
417
418 pub fn cumprod(&self, dim: i32) -> Result<Self>
420 where
421 T: std::ops::Mul<Output = T> + num_traits::One + Copy,
422 {
423 let normalized_dim = if dim < 0 {
424 (self.shape().len() as i32 + dim) as usize
425 } else {
426 dim as usize
427 };
428
429 if normalized_dim >= self.shape().len() {
430 return Err(torsh_core::error::TorshError::InvalidDimension {
431 dim: normalized_dim,
432 ndim: self.shape().len(),
433 });
434 }
435
436 let shape = self.shape().clone();
437 let input_shape = shape.dims();
438 let data = self.data()?;
439 let mut result_data = data.to_vec();
440
441 let outer_size: usize = input_shape[..normalized_dim].iter().product();
442 let dim_size = input_shape[normalized_dim];
443 let inner_size: usize = input_shape[normalized_dim + 1..].iter().product();
444
445 for outer_idx in 0..outer_size {
446 for inner_idx in 0..inner_size {
447 let mut running_product = <T as num_traits::One>::one();
448 for dim_idx in 0..dim_size {
449 let index =
450 outer_idx * (dim_size * inner_size) + dim_idx * inner_size + inner_idx;
451 running_product = running_product * result_data[index];
452 result_data[index] = running_product;
453 }
454 }
455 }
456
457 Self::from_data(result_data, input_shape.to_vec(), self.device())
458 }
459
460 pub fn matmul(&self, other: &Self) -> Result<Self>
462 where
463 T: num_traits::Float + std::iter::Sum,
464 {
465 self.basic_matmul(other)
466 }
467
468 pub fn sort(&self, _dim: Option<i32>, _descending: bool) -> Result<(Self, Self)>
470 where
471 T: PartialOrd + num_traits::Zero + num_traits::FromPrimitive,
472 {
473 let data = self.to_vec()?;
475 let mut indexed_data: Vec<(usize, T)> =
476 data.iter().enumerate().map(|(i, &val)| (i, val)).collect();
477
478 indexed_data.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
480
481 let sorted_data: Vec<T> = indexed_data.iter().map(|(_, val)| *val).collect();
483 let indices: Vec<T> = indexed_data
484 .iter()
485 .map(|(i, _)| {
486 <T as num_traits::FromPrimitive>::from_usize(*i)
487 .unwrap_or_else(|| <T as num_traits::Zero>::zero())
488 })
489 .collect();
490
491 let sorted_tensor =
492 Self::from_data(sorted_data, self.shape().dims().to_vec(), self.device())?;
493 let indices_tensor = Self::from_data(indices, self.shape().dims().to_vec(), self.device())?;
494
495 Ok((sorted_tensor, indices_tensor))
496 }
497
498 pub fn min(&self) -> Result<Self>
500 where
501 T: std::cmp::PartialOrd + Copy,
502 {
503 let data = self.data()?;
504 if data.is_empty() {
505 return Err(TorshError::InvalidOperation(
506 "Cannot compute min of empty tensor".to_string(),
507 ));
508 }
509
510 let min_val = data
511 .iter()
512 .fold(data[0], |acc, &x| if x < acc { x } else { acc });
513 Self::from_data(vec![min_val], vec![], self.device)
514 }
515
516 pub fn t(&self) -> Result<Self>
518 where
519 T: Copy + num_traits::Zero,
520 {
521 let shape = self.shape();
522 let dims = shape.dims();
523
524 if dims.len() != 2 {
525 return Err(TorshError::InvalidOperation(
526 "Transpose operation only supported for 2D tensors".to_string(),
527 ));
528 }
529
530 let (rows, cols) = (dims[0], dims[1]);
531 let data = self.data()?;
532 let mut transposed_data = vec![num_traits::Zero::zero(); data.len()];
533
534 for i in 0..rows {
535 for j in 0..cols {
536 transposed_data[j * rows + i] = data[i * cols + j];
537 }
538 }
539
540 Self::from_data(transposed_data, vec![cols, rows], self.device)
541 }
542
543 pub fn shares_storage(&self, other: &Self) -> bool {
545 match (&self.storage, &other.storage) {
547 (TensorStorage::InMemory(a), TensorStorage::InMemory(b)) => Arc::ptr_eq(a, b),
548 (TensorStorage::MemoryMapped(a), TensorStorage::MemoryMapped(b)) => Arc::ptr_eq(a, b),
549 _ => false,
550 }
551 }
552
553 pub fn data(&self) -> Result<Vec<T>>
555 where
556 T: Copy,
557 {
558 self.to_vec()
559 }
560
561 pub fn data_mut_apply<F>(&mut self, mut func: F) -> Result<()>
563 where
564 F: FnMut(&mut T),
565 T: Copy,
566 {
567 self.ensure_exclusive_data()?;
568
569 match &mut self.storage {
570 TensorStorage::InMemory(data) => {
571 let mut data_guard = data.write().expect("lock should not be poisoned");
572 for item in data_guard.iter_mut() {
573 func(item);
574 }
575 Ok(())
576 }
577 TensorStorage::MemoryMapped(_) => {
578 let data = self.to_vec()?;
580 let mut new_data = data;
581 for item in new_data.iter_mut() {
582 func(item);
583 }
584 self.storage = TensorStorage::create_optimal(new_data)?;
586 Ok(())
587 }
588 #[cfg(feature = "simd")]
589 TensorStorage::Aligned(data) => {
590 let mut data_guard = data.write().expect("lock should not be poisoned");
591 for item in data_guard.as_mut_slice().iter_mut() {
592 func(item);
593 }
594 Ok(())
595 }
596 #[cfg(feature = "simd")]
597 TensorStorage::SimdOptimized(_) => {
598 let data = self.to_vec()?;
601 let mut new_data = data;
602 for item in new_data.iter_mut() {
603 func(item);
604 }
605 self.storage = TensorStorage::create_optimal(new_data)?;
606 Ok(())
607 }
608 }
609 }
610
611 pub fn clone_data(&self) -> Self
613 where
614 T: Copy,
615 {
616 let data = self
617 .to_vec()
618 .expect("tensor to vec conversion should succeed");
619 Self::from_data(data, self.shape().dims().to_vec(), self.device)
620 .expect("tensor creation should succeed")
621 }
622
623 pub fn make_unique(&mut self) -> Result<()> {
625 match &self.storage {
627 TensorStorage::InMemory(data) => {
628 if Arc::strong_count(data) > 1 {
629 let data_vec = self.to_vec()?;
630 self.storage = TensorStorage::create_optimal(data_vec)?;
631 }
632 }
633 TensorStorage::MemoryMapped(storage) => {
634 if Arc::strong_count(storage) > 1 {
635 let data_vec = self.to_vec()?;
636 self.storage = TensorStorage::create_optimal(data_vec)?;
637 }
638 }
639 #[cfg(feature = "simd")]
640 TensorStorage::Aligned(data) => {
641 if Arc::strong_count(data) > 1 {
642 let data_vec = self.to_vec()?;
643 self.storage = TensorStorage::create_optimal(data_vec)?;
644 }
645 }
646 #[cfg(feature = "simd")]
647 TensorStorage::SimdOptimized(_storage) => {
648 let data_vec = self.to_vec()?;
651 self.storage = TensorStorage::aligned(data_vec)?;
652 }
653 }
654 Ok(())
655 }
656
657 pub fn apply_<F>(&mut self, func: F) -> Result<()>
659 where
660 F: Fn(T) -> T,
661 T: Copy,
662 {
663 let data = self.to_vec()?;
664 let new_data: Vec<T> = data.into_iter().map(func).collect();
665
666 self.storage = TensorStorage::create_optimal(new_data)?;
668 Ok(())
669 }
670
671 pub fn map<F>(&self, func: F) -> Result<Self>
673 where
674 F: Fn(T) -> T,
675 T: Copy,
676 {
677 let data = self.to_vec()?;
678 let new_data: Vec<T> = data.into_iter().map(func).collect();
679 let mut result = Self::from_data(new_data, self.shape().dims().to_vec(), self.device)?;
680
681 result.requires_grad = self.requires_grad;
683
684 Ok(result)
685 }
686
687 pub fn item(&self) -> Result<T>
689 where
690 T: Copy,
691 {
692 let data = self.data()?;
693 if data.len() != 1 {
694 return Err(TorshError::InvalidArgument(format!(
695 "item() can only be called on single-element tensors, got {} elements",
696 data.len()
697 )));
698 }
699 Ok(data[0])
700 }
701
702 pub fn cat(tensors: &[&Self], dim: i32) -> Result<Self>
704 where
705 T: Copy,
706 {
707 if tensors.is_empty() {
708 return Err(TorshError::InvalidArgument(
709 "Cannot concatenate empty tensor list".to_string(),
710 ));
711 }
712
713 let mut all_data = Vec::new();
716 let mut total_len = 0;
717
718 for tensor in tensors {
719 let data = tensor.data()?;
720 all_data.extend_from_slice(&data);
721 total_len += data.len();
722 }
723
724 let first_tensor_shape = tensors[0].shape();
726 let first_shape = first_tensor_shape.dims();
727 let mut result_shape = first_shape.to_vec();
728
729 if dim == 0 && result_shape.len() == 1 {
730 result_shape[0] = total_len;
731 } else if result_shape.is_empty() {
732 result_shape = vec![total_len];
733 }
734
735 Self::from_data(all_data, result_shape, tensors[0].device)
736 }
737
738 fn ensure_exclusive_data(&mut self) -> Result<()> {
741 match &self.storage {
742 TensorStorage::InMemory(data) => {
743 if Arc::strong_count(data) > 1 {
744 let cloned_data = {
746 let data_guard = data.read().expect("lock should not be poisoned");
747 data_guard.clone()
748 };
749 self.storage = TensorStorage::in_memory(cloned_data);
750 }
751 }
752 TensorStorage::MemoryMapped(storage) => {
753 if Arc::strong_count(storage) > 1 {
754 let data_vec = self.storage.to_vec()?;
756 self.storage = TensorStorage::create_optimal(data_vec)?;
757 }
758 }
759 #[cfg(feature = "simd")]
760 TensorStorage::Aligned(data) => {
761 if Arc::strong_count(data) > 1 {
762 let vec_data = {
764 let data_guard = data.read().expect("lock should not be poisoned");
765 data_guard.as_slice().to_vec()
766 };
767 self.storage = TensorStorage::aligned(vec_data)?;
768 }
769 }
770 #[cfg(feature = "simd")]
771 TensorStorage::SimdOptimized(storage) => {
772 if Arc::strong_count(storage) > 1 || storage.is_shared() {
773 let vec_data = storage.to_vec();
775 self.storage = TensorStorage::simd_optimized(vec_data)?;
776 }
777 }
778 }
779 Ok(())
780 }
781}
782
783impl<T: TensorElement + Copy> Tensor<T>
785where
786 T: num_traits::Float,
787{
788 pub fn norm(&self) -> Result<Self> {
790 let data = self.data()?;
791 let sum_squares: T = data
792 .iter()
793 .map(|&x| x * x)
794 .fold(num_traits::Zero::zero(), |acc, x| acc + x);
795 let norm_value = sum_squares.sqrt();
796
797 Tensor::from_data(vec![norm_value], vec![], self.device())
799 }
800}
801
802impl<T: TensorElement + Copy> Tensor<T> {
804 pub fn matmul_scirs2(&self, other: &Self) -> Result<Self>
806 where
807 T: num_traits::Float + num_traits::Zero + num_traits::One + std::iter::Sum,
808 {
809 self.basic_matmul(other)
812 }
813
814 pub fn sum_scirs2(&self) -> Result<Self>
816 where
817 T: std::ops::Add<Output = T> + num_traits::Zero,
818 {
819 let data = self.data()?;
821 let sum_value = data
822 .iter()
823 .fold(<T as num_traits::Zero>::zero(), |acc, &x| acc + x);
824 Tensor::from_data(vec![sum_value], vec![], self.device())
825 }
826
827 pub fn mean_scirs2(&self) -> Result<Self>
829 where
830 T: std::ops::Add<Output = T>
831 + std::ops::Div<Output = T>
832 + num_traits::Zero
833 + From<usize>
834 + num_traits::FromPrimitive,
835 {
836 let data = self.data()?;
838 if data.is_empty() {
839 return Err(TorshError::InvalidArgument(
840 "Cannot compute mean of empty tensor".to_string(),
841 ));
842 }
843 let sum_value = data
844 .iter()
845 .fold(<T as num_traits::Zero>::zero(), |acc, &x| acc + x);
846 let mean_value = sum_value / T::from(data.len());
847 Tensor::from_data(vec![mean_value], vec![], self.device())
848 }
849
850 pub fn relu_scirs2(&self) -> Result<Self>
852 where
853 T: PartialOrd + num_traits::Zero,
854 {
855 let zero = <T as num_traits::Zero>::zero();
857 self.map(|x| if x > zero { x } else { zero })
858 }
859
860 pub fn sigmoid_scirs2(&self) -> Result<Self>
862 where
863 T: num_traits::Float,
864 {
865 self.map(|x| {
867 let one = <T as num_traits::One>::one();
868 one / (one + (-x).exp())
869 })
870 }
871
872 pub fn tanh_scirs2(&self) -> Result<Self>
874 where
875 T: num_traits::Float,
876 {
877 self.map(|x| x.tanh())
879 }
880
881 fn basic_matmul(&self, other: &Self) -> Result<Self>
883 where
884 T: num_traits::Float + std::iter::Sum,
885 {
886 let self_binding = self.shape();
887 let self_shape = self_binding.dims();
888 let other_binding = other.shape();
889 let other_shape = other_binding.dims();
890
891 if self_shape.len() != 2 || other_shape.len() != 2 {
893 return Err(TorshError::InvalidArgument(
894 "Matrix multiplication requires 2D tensors".to_string(),
895 ));
896 }
897
898 if self_shape[1] != other_shape[0] {
899 return Err(TorshError::ShapeMismatch {
900 expected: vec![self_shape[0], other_shape[1]],
901 got: vec![self_shape[1], other_shape[0]],
902 });
903 }
904
905 let (m, k) = (self_shape[0], self_shape[1]);
906 let n = other_shape[1];
907
908 let self_data = self.data()?;
909 let other_data = other.data()?;
910 let mut result_data = vec![num_traits::Zero::zero(); m * n];
911
912 for i in 0..m {
914 for j in 0..n {
915 let mut sum = num_traits::Zero::zero();
916 for k_idx in 0..k {
917 sum = sum + self_data[i * k + k_idx] * other_data[k_idx * n + j];
918 }
919 result_data[i * n + j] = sum;
920 }
921 }
922
923 Self::from_data(result_data, vec![m, n], self.device)
924 }
925 pub fn softmax(&self, dim: i32) -> Result<Self>
928 where
929 T: torsh_core::dtype::FloatElement
930 + Copy
931 + std::ops::Sub<Output = T>
932 + std::ops::Div<Output = T>,
933 {
934 let data = self.data()?;
935 let shape_binding = self.shape();
936 let shape = shape_binding.dims();
937
938 if data.is_empty() || shape.is_empty() {
940 return Err(TorshError::InvalidOperation(
941 "Cannot compute softmax on empty tensor".to_string(),
942 ));
943 }
944
945 let actual_dim = if dim < 0 {
947 (shape.len() as i32 + dim) as usize
948 } else {
949 dim as usize
950 };
951
952 if actual_dim >= shape.len() {
953 return Err(TorshError::InvalidOperation(format!(
954 "Dimension {} out of range for {}-dimensional tensor",
955 actual_dim,
956 shape.len()
957 )));
958 }
959
960 let max_tensor = self.max(Some(actual_dim), true)?;
962
963 let expanded_max = max_tensor.expand(shape)?;
965 let shifted = self.sub(&expanded_max)?;
966 let exp_tensor = shifted.exp()?;
967 let sum_tensor = exp_tensor.sum_dim(&[actual_dim as i32], true)?;
968
969 let expanded_sum = sum_tensor.expand(shape)?;
971 exp_tensor.div(&expanded_sum)
972 }
973
974 pub fn log_softmax(&self, dim: i32) -> Result<Self>
977 where
978 T: torsh_core::dtype::FloatElement + Copy + std::ops::Sub<Output = T>,
979 {
980 let softmax_result = self.softmax(dim)?;
981 softmax_result.log()
982 }
983
984 pub fn cumsum(&self, dim: i32) -> Result<Self>
986 where
987 T: std::ops::Add<Output = T> + num_traits::Zero + Copy,
988 {
989 let shape_binding = self.shape();
990 let shape = shape_binding.dims();
991
992 let actual_dim = if dim < 0 {
994 (shape.len() as i32 + dim) as usize
995 } else {
996 dim as usize
997 };
998
999 if actual_dim >= shape.len() {
1000 return Err(TorshError::InvalidOperation(format!(
1001 "Dimension {} out of range for {}-dimensional tensor",
1002 actual_dim,
1003 shape.len()
1004 )));
1005 }
1006
1007 let data = self.data()?;
1008 let mut result_data = data.clone();
1009
1010 if actual_dim == shape.len() - 1 || shape.len() == 1 {
1013 let mut cumulative = <T as num_traits::Zero>::zero();
1014 for i in 0..result_data.len() {
1015 cumulative = cumulative + result_data[i];
1016 result_data[i] = cumulative;
1017 }
1018 }
1019
1020 Self::from_data(result_data, shape.to_vec(), self.device)
1021 }
1022
1023 pub fn argmin(&self, dim: Option<i32>) -> Result<Tensor<i64>>
1025 where
1026 T: std::cmp::PartialOrd + Copy,
1027 {
1028 let data = self.data()?;
1029 let shape_binding = self.shape();
1030 let shape = shape_binding.dims();
1031
1032 if shape.is_empty() {
1033 return Err(TorshError::InvalidOperation(
1034 "Cannot compute argmin on empty tensor".to_string(),
1035 ));
1036 }
1037
1038 match dim {
1039 Some(d) => {
1040 let actual_dim = if d < 0 {
1042 (shape.len() as i32 + d) as usize
1043 } else {
1044 d as usize
1045 };
1046
1047 if actual_dim >= shape.len() {
1048 return Err(TorshError::InvalidOperation(format!(
1049 "Dimension {} out of range for {}-dimensional tensor",
1050 actual_dim,
1051 shape.len()
1052 )));
1053 }
1054
1055 let min_val = data
1058 .iter()
1059 .fold(data[0], |acc, &x| if x < acc { x } else { acc });
1060 let min_idx = data.iter().position(|&x| x == min_val).unwrap_or(0);
1061
1062 let result_data = vec![min_idx as i64];
1063 Tensor::<i64>::from_data(result_data, vec![1], self.device)
1064 }
1065 None => {
1066 let min_val = data
1068 .iter()
1069 .fold(data[0], |acc, &x| if x < acc { x } else { acc });
1070 let min_idx = data.iter().position(|&x| x == min_val).unwrap_or(0);
1071
1072 let result_data = vec![min_idx as i64];
1073 Tensor::<i64>::from_data(result_data, vec![], self.device)
1074 }
1075 }
1076 }
1077
1078 pub fn argmax(&self, dim: Option<i32>) -> Result<Tensor<i64>>
1080 where
1081 T: std::cmp::PartialOrd + Copy,
1082 {
1083 let data = self.data()?;
1084 let shape_binding = self.shape();
1085 let shape = shape_binding.dims();
1086
1087 if shape.is_empty() {
1088 return Err(TorshError::InvalidOperation(
1089 "Cannot compute argmax on empty tensor".to_string(),
1090 ));
1091 }
1092
1093 match dim {
1094 Some(d) => {
1095 let actual_dim = if d < 0 {
1097 (shape.len() as i32 + d) as usize
1098 } else {
1099 d as usize
1100 };
1101
1102 if actual_dim >= shape.len() {
1103 return Err(TorshError::InvalidOperation(format!(
1104 "Dimension {} out of range for {}-dimensional tensor",
1105 actual_dim,
1106 shape.len()
1107 )));
1108 }
1109
1110 let max_val = data
1113 .iter()
1114 .fold(data[0], |acc, &x| if x > acc { x } else { acc });
1115 let max_idx = data.iter().position(|&x| x == max_val).unwrap_or(0);
1116
1117 let result_data = vec![max_idx as i64];
1118 Tensor::<i64>::from_data(result_data, vec![1], self.device)
1119 }
1120 None => {
1121 let max_val = data
1123 .iter()
1124 .fold(data[0], |acc, &x| if x > acc { x } else { acc });
1125 let max_idx = data.iter().position(|&x| x == max_val).unwrap_or(0);
1126
1127 let result_data = vec![max_idx as i64];
1128 Tensor::<i64>::from_data(result_data, vec![], self.device)
1129 }
1130 }
1131 }
1132
1133 pub fn topk(
1135 &self,
1136 k: usize,
1137 dim: Option<i32>,
1138 largest: bool,
1139 sorted: bool,
1140 ) -> Result<(Self, Tensor<i64>)>
1141 where
1142 T: std::cmp::PartialOrd + Copy + num_traits::Zero,
1143 {
1144 let data = self.data()?;
1145 let shape_binding = self.shape();
1146 let shape = shape_binding.dims();
1147
1148 if shape.is_empty() {
1149 return Err(TorshError::InvalidOperation(
1150 "Cannot compute topk on empty tensor".to_string(),
1151 ));
1152 }
1153
1154 if k == 0 {
1155 return Err(TorshError::InvalidArgument(
1156 "k must be greater than 0".to_string(),
1157 ));
1158 }
1159
1160 if let Some(_d) = dim {
1162 } else {
1163 }
1164
1165 let mut indexed_data: Vec<(usize, T)> =
1168 data.iter().enumerate().map(|(i, &val)| (i, val)).collect();
1169
1170 if largest {
1172 indexed_data.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1173 } else {
1174 indexed_data.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
1175 }
1176
1177 let top_k = indexed_data
1179 .into_iter()
1180 .take(k.min(data.len()))
1181 .collect::<Vec<_>>();
1182
1183 if !sorted {
1186 }
1188
1189 let values: Vec<T> = top_k.iter().map(|(_, val)| *val).collect();
1191 let indices: Vec<i64> = top_k.iter().map(|(idx, _)| *idx as i64).collect();
1192
1193 let values_tensor = Self::from_data(values, vec![k.min(data.len())], self.device)?;
1195 let indices_tensor =
1196 Tensor::<i64>::from_data(indices, vec![k.min(data.len())], self.device)?;
1197
1198 Ok((values_tensor, indices_tensor))
1199 }
1200}
1201
1202#[cfg(test)]
1203mod tests {
1204 use super::*;
1205 use torsh_core::device::DeviceType;
1206
1207 #[test]
1208 fn test_scalar_creation() {
1209 let scalar = Tensor::<f32>::scalar(42.0).expect("operation should succeed");
1210 assert_eq!(scalar.shape().dims(), &[] as &[usize]);
1211 assert_eq!(scalar.item().expect("item extraction should succeed"), 42.0);
1212 }
1213
1214 #[test]
1215 fn test_max_reduction() {
1216 let data = vec![1.0f32, 5.0, 3.0, 2.0];
1217 let tensor =
1218 Tensor::from_data(data, vec![4], DeviceType::Cpu).expect("operation should succeed");
1219 let max_val = tensor.max(None, false).expect("operation should succeed");
1220 assert_eq!(max_val.item().expect("item extraction should succeed"), 5.0);
1221 }
1222
1223 #[test]
1224 fn test_norm_computation() {
1225 let data = vec![3.0f32, 4.0]; let tensor =
1227 Tensor::from_data(data, vec![2], DeviceType::Cpu).expect("operation should succeed");
1228 let norm = tensor.norm().expect("norm computation should succeed");
1229 assert!((norm.item().expect("item extraction should succeed") - 5.0).abs() < 1e-6);
1230 }
1231
1232 #[test]
1233 fn test_apply_operations() {
1234 let data = vec![1.0f32, 2.0, 3.0, 4.0];
1235 let mut tensor =
1236 Tensor::from_data(data, vec![4], DeviceType::Cpu).expect("operation should succeed");
1237
1238 tensor
1240 .apply_(|x| x * 2.0)
1241 .expect("operation should succeed");
1242 assert_eq!(
1243 tensor.data().expect("data retrieval should succeed"),
1244 vec![2.0, 4.0, 6.0, 8.0]
1245 );
1246
1247 let original = Tensor::from_data(vec![1.0f32, 2.0, 3.0], vec![3], DeviceType::Cpu)
1249 .expect("operation should succeed");
1250 let mapped = original.map(|x| x + 1.0).expect("operation should succeed");
1251 assert_eq!(
1252 mapped.data().expect("data retrieval should succeed"),
1253 vec![2.0, 3.0, 4.0]
1254 );
1255 assert_eq!(
1256 original.data().expect("data retrieval should succeed"),
1257 vec![1.0, 2.0, 3.0]
1258 ); }
1260
1261 #[test]
1262 fn test_activation_functions() {
1263 let data = vec![-1.0f32, 0.0, 1.0, 2.0];
1264 let tensor =
1265 Tensor::from_data(data, vec![4], DeviceType::Cpu).expect("operation should succeed");
1266
1267 let relu_result = tensor.relu().expect("relu should succeed");
1269 assert_eq!(
1270 relu_result.data().expect("data retrieval should succeed"),
1271 vec![0.0, 0.0, 1.0, 2.0]
1272 );
1273
1274 let abs_result = tensor.abs().expect("abs computation should succeed");
1276 assert_eq!(
1277 abs_result.data().expect("data retrieval should succeed"),
1278 vec![1.0, 0.0, 1.0, 2.0]
1279 );
1280
1281 let clamped = tensor.clamp(-0.5, 1.5).expect("operation should succeed");
1283 assert_eq!(
1284 clamped.data().expect("data retrieval should succeed"),
1285 vec![-0.5, 0.0, 1.0, 1.5]
1286 );
1287 }
1288
1289 #[test]
1290 fn test_storage_sharing() {
1291 let tensor1 =
1292 Tensor::<f32>::zeros(&[2, 2], DeviceType::Cpu).expect("operation should succeed");
1293 let tensor2 = tensor1.clone();
1294 let tensor3 = tensor1.clone_data();
1295
1296 assert!(tensor1.shares_storage(&tensor2));
1297 assert!(!tensor1.shares_storage(&tensor3));
1298 }
1299
1300 #[test]
1301 fn test_basic_matmul() {
1302 let a = Tensor::from_data(vec![1.0f32, 2.0, 3.0, 4.0], vec![2, 2], DeviceType::Cpu)
1303 .expect("operation should succeed");
1304 let b = Tensor::from_data(vec![5.0f32, 6.0, 7.0, 8.0], vec![2, 2], DeviceType::Cpu)
1305 .expect("operation should succeed");
1306
1307 let result = a.basic_matmul(&b).expect("operation should succeed");
1308 assert_eq!(result.shape().dims(), &[2, 2]);
1309
1310 let expected = vec![19.0, 22.0, 43.0, 50.0];
1313 assert_eq!(
1314 result.data().expect("data retrieval should succeed"),
1315 expected
1316 );
1317 }
1318
1319 #[test]
1320 fn test_reductions() {
1321 let data = vec![1.0f32, 2.0, 3.0, 4.0];
1322 let tensor =
1323 Tensor::from_data(data, vec![4], DeviceType::Cpu).expect("operation should succeed");
1324
1325 let sum = tensor.sum().expect("sum should succeed");
1326 assert_eq!(sum.item().expect("item extraction should succeed"), 10.0);
1327
1328 let mean = tensor.mean(None, false).expect("operation should succeed");
1329 assert_eq!(mean.item().expect("item extraction should succeed"), 2.5);
1330 }
1331
1332 #[test]
1333 fn test_copy_on_write() {
1334 let mut tensor1 =
1335 Tensor::<f32>::ones(&[2], DeviceType::Cpu).expect("operation should succeed");
1336 let tensor2 = tensor1.clone();
1337
1338 assert!(tensor1.shares_storage(&tensor2));
1340
1341 tensor1.make_unique().expect("make_unique should succeed");
1343 assert!(!tensor1.shares_storage(&tensor2));
1344 }
1345
1346 #[test]
1347 fn test_item_extraction() {
1348 let scalar = Tensor::from_data(vec![42.0f32], vec![], DeviceType::Cpu)
1349 .expect("operation should succeed");
1350 assert_eq!(scalar.item().expect("item extraction should succeed"), 42.0);
1351
1352 let vector = Tensor::from_data(vec![1.0f32, 2.0], vec![2], DeviceType::Cpu)
1353 .expect("operation should succeed");
1354 assert!(vector.item().is_err()); }
1356}