1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
//! Basic Tensor Operations
//!
//! This example demonstrates core numr tensor operations on the CPU backend:
//! creating tensors, element-wise arithmetic, reductions, matmul, shape
//! manipulation, and type conversions.
//!
//! Run with:
//! ```sh
//! cargo run --example basic_tensor_ops
//! ```
use numr::prelude::*;
fn main() -> Result<()> {
// -----------------------------------------------------------------------
// 1. Obtain a backend client
// -----------------------------------------------------------------------
// numr's operations live on a *client* tied to a device. For the CPU
// backend the device is simply `CpuDevice::new()`.
let device = CpuDevice::new();
let client = CpuRuntime::default_client(&device);
// -----------------------------------------------------------------------
// 2. Create tensors
// -----------------------------------------------------------------------
// From a slice – you provide data and the desired shape.
let a = Tensor::<CpuRuntime>::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], &device);
println!("a (2×3):\n{:?}", a.to_vec::<f32>());
// Convenience constructors.
let zeros = Tensor::<CpuRuntime>::zeros(&[2, 3], DType::F32, &device);
let ones = Tensor::<CpuRuntime>::ones(&[2, 3], DType::F32, &device);
let filled = Tensor::<CpuRuntime>::full_scalar(&[2, 3], DType::F32, 7.0, &device);
println!("zeros: {:?}", zeros.to_vec::<f32>());
println!("ones: {:?}", ones.to_vec::<f32>());
println!("filled:{:?}", filled.to_vec::<f32>());
// Random tensors (uniform [0,1) and standard normal).
let uniform = client.rand(&[3, 3], DType::F32)?;
let normal = client.randn(&[3, 3], DType::F32)?;
println!("uniform: {:?}", uniform.to_vec::<f32>());
println!("normal: {:?}", normal.to_vec::<f32>());
// -----------------------------------------------------------------------
// 3. Tensor properties
// -----------------------------------------------------------------------
println!(
"\na: shape={:?}, ndim={}, numel={}, dtype={:?}, contiguous={}",
a.shape(),
a.ndim(),
a.numel(),
a.dtype(),
a.is_contiguous(),
);
// -----------------------------------------------------------------------
// 4. Element-wise arithmetic
// -----------------------------------------------------------------------
// All operations go through the client, not operator overloading.
let b = Tensor::<CpuRuntime>::from_slice(
&[10.0f32, 20.0, 30.0, 40.0, 50.0, 60.0],
&[2, 3],
&device,
);
let sum = client.add(&a, &b)?;
let diff = client.sub(&a, &b)?;
let prod = client.mul(&a, &b)?;
let quot = client.div(&a, &b)?;
println!("\na + b = {:?}", sum.to_vec::<f32>());
println!("a - b = {:?}", diff.to_vec::<f32>());
println!("a * b = {:?}", prod.to_vec::<f32>());
println!("a / b = {:?}", quot.to_vec::<f32>());
// Scalar operations.
let scaled = client.mul_scalar(&a, 100.0)?;
println!("a * 100 = {:?}", scaled.to_vec::<f32>());
// -----------------------------------------------------------------------
// 5. Unary math functions
// -----------------------------------------------------------------------
let x = Tensor::<CpuRuntime>::from_slice(&[0.0f32, 1.0, 2.0, 3.0], &[4], &device);
println!("\nexp(x) = {:?}", client.exp(&x)?.to_vec::<f32>());
println!("sqrt(x) = {:?}", client.sqrt(&x)?.to_vec::<f32>());
println!("sin(x) = {:?}", client.sin(&x)?.to_vec::<f32>());
// Activations.
let logits = Tensor::<CpuRuntime>::from_slice(&[-2.0f32, -1.0, 0.0, 1.0, 2.0], &[5], &device);
println!(
"relu(logits) = {:?}",
client.relu(&logits)?.to_vec::<f32>()
);
println!(
"sigmoid(logits) = {:?}",
client.sigmoid(&logits)?.to_vec::<f32>()
);
// -----------------------------------------------------------------------
// 6. Reductions
// -----------------------------------------------------------------------
// `dims` selects which axes to reduce; `keepdim` controls whether
// reduced dimensions are retained as size-1.
let m = Tensor::<CpuRuntime>::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], &device);
let row_sum = client.sum(&m, &[1], false)?; // sum across columns
let col_mean = client.mean(&m, &[0], false)?; // mean down rows
let global_max = client.max(&m, &[0, 1], false)?;
println!("\nrow sums = {:?}", row_sum.to_vec::<f32>());
println!("col means = {:?}", col_mean.to_vec::<f32>());
println!("global max= {:?}", global_max.to_vec::<f32>());
// -----------------------------------------------------------------------
// 7. Matrix multiplication
// -----------------------------------------------------------------------
// matmul follows standard linear-algebra rules: (M,K) @ (K,N) → (M,N).
let lhs =
Tensor::<CpuRuntime>::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], &device);
let rhs =
Tensor::<CpuRuntime>::from_slice(&[7.0f32, 8.0, 9.0, 10.0, 11.0, 12.0], &[3, 2], &device);
let matmul_result = client.matmul(&lhs, &rhs)?;
println!(
"\n(2×3) @ (3×2) = {:?} (shape {:?})",
matmul_result.to_vec::<f32>(),
matmul_result.shape(),
);
// -----------------------------------------------------------------------
// 8. Shape manipulation (zero-copy views)
// -----------------------------------------------------------------------
// These operations create a *view* sharing the same underlying storage.
let t = Tensor::<CpuRuntime>::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0], &[2, 3], &device);
let reshaped = t.reshape(&[3, 2])?;
println!("\nreshaped (3×2): {:?}", reshaped.to_vec::<f32>());
let transposed = t.transpose(0, 1)?;
println!(
"transposed (3×2): {:?}",
transposed.contiguous().to_vec::<f32>()
);
let unsqueezed = t.unsqueeze(0)?; // [1, 2, 3]
println!("unsqueeze(0) shape: {:?}", unsqueezed.shape());
// Broadcasting: [2, 1] + [1, 3] → [2, 3]
let col = Tensor::<CpuRuntime>::from_slice(&[10.0f32, 20.0], &[2, 1], &device);
let row = Tensor::<CpuRuntime>::from_slice(&[1.0f32, 2.0, 3.0], &[1, 3], &device);
let broadcast_sum = client.add(&col, &row)?;
println!(
"\nbroadcast [2,1]+[1,3] = {:?} (shape {:?})",
broadcast_sum.to_vec::<f32>(),
broadcast_sum.shape(),
);
// -----------------------------------------------------------------------
// 9. Extracting scalar values
// -----------------------------------------------------------------------
let scalar = Tensor::<CpuRuntime>::from_slice(&[42.0f32], &[], &device);
let value: f32 = scalar.item()?;
println!("\nscalar item = {value}");
// -----------------------------------------------------------------------
// 10. Comparison operations
// -----------------------------------------------------------------------
let p = Tensor::<CpuRuntime>::from_slice(&[1.0f32, 5.0, 3.0], &[3], &device);
let q = Tensor::<CpuRuntime>::from_slice(&[2.0f32, 5.0, 1.0], &[3], &device);
let eq_mask = client.eq(&p, &q)?;
let gt_mask = client.gt(&p, &q)?;
// Comparison results use the same dtype (1.0 = true, 0.0 = false).
println!("\np == q: {:?}", eq_mask.to_vec::<f32>());
println!("p > q: {:?}", gt_mask.to_vec::<f32>());
println!("\nAll basic tensor operations completed successfully!");
Ok(())
}