1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
use ferrotorch_core::{Device, FerrotorchResult, Float, Tensor};
/// A tensor registered for gradient descent.
///
/// Always has `requires_grad = true`. Stored inside `Module` implementations
/// as the unit of registration for optimizer consumption.
///
/// `Parameter<T>` is a thin wrapper — it derefs to `Tensor<T>` for all
/// tensor operations, and cloning shares the same underlying identity
/// (Arc-based, like Tensor).
#[derive(Debug, Clone)]
pub struct Parameter<T: Float> {
data: Tensor<T>,
}
impl<T: Float> Parameter<T> {
/// Create a new parameter from a tensor.
///
/// The tensor is set to `requires_grad = true` regardless of its
/// current state.
pub fn new(tensor: Tensor<T>) -> Self {
Self {
data: tensor.requires_grad_(true),
}
}
/// Create a parameter initialized with zeros.
pub fn zeros(shape: &[usize]) -> FerrotorchResult<Self> {
let t = ferrotorch_core::zeros::<T>(shape)?;
Ok(Self::new(t))
}
/// Create a parameter initialized with ones.
pub fn ones(shape: &[usize]) -> FerrotorchResult<Self> {
let t = ferrotorch_core::ones::<T>(shape)?;
Ok(Self::new(t))
}
/// Create a parameter from a data slice.
pub fn from_slice(data: &[T], shape: &[usize]) -> FerrotorchResult<Self> {
let t = ferrotorch_core::from_slice(data, shape)?;
Ok(Self::new(t))
}
/// Borrow the underlying tensor.
#[inline]
pub fn tensor(&self) -> &Tensor<T> {
&self.data
}
/// Consume and return the underlying tensor.
pub fn into_tensor(self) -> Tensor<T> {
self.data
}
/// Replace the underlying tensor data while preserving `requires_grad`.
///
/// Used by optimizers to update parameter values without breaking the
/// parameter identity semantics. The new tensor is set to
/// `requires_grad = true` regardless of its input state.
pub fn set_data(&mut self, tensor: Tensor<T>) {
self.data = tensor.requires_grad_(true);
}
/// Toggle whether this parameter participates in autograd (#583).
///
/// Setting `false` "freezes" the parameter — backward passes will not
/// produce a gradient for it; optimizer steps that consult
/// `requires_grad` will skip it. Mirrors `torch.nn.Parameter.requires_grad_`.
pub fn set_requires_grad(&mut self, requires_grad: bool) {
// Tensor::requires_grad_ takes self by value, so clone once.
let cloned = self.data.clone();
self.data = cloned.requires_grad_(requires_grad);
}
/// Move this parameter to a device.
pub fn to(&self, device: Device) -> FerrotorchResult<Self> {
Ok(Self::new(self.data.to(device)?))
}
}
impl<T: Float> std::ops::Deref for Parameter<T> {
type Target = Tensor<T>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.data
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parameter_requires_grad() {
let p = Parameter::<f32>::zeros(&[3, 4]).unwrap();
assert!(p.requires_grad());
}
#[test]
fn test_parameter_deref_to_tensor() {
let p = Parameter::<f32>::zeros(&[2, 3]).unwrap();
assert_eq!(p.shape(), &[2, 3]);
assert_eq!(p.numel(), 6);
}
#[test]
fn test_parameter_clone_shares_identity() {
let p = Parameter::<f32>::zeros(&[4]).unwrap();
let p2 = p.clone();
assert!(p.tensor().is_same(p2.tensor()));
}
#[test]
fn test_parameter_to_cpu_preserves_data() {
let p = Parameter::<f32>::from_slice(&[1.0, 2.0, 3.0], &[3]).unwrap();
let p2 = p.to(ferrotorch_core::Device::Cpu).unwrap();
assert_eq!(p2.shape(), &[3]);
assert_eq!(p2.data().unwrap(), &[1.0, 2.0, 3.0]);
assert!(p2.requires_grad());
}
#[test]
fn test_parameter_to_cuda_without_backend() {
let p = Parameter::<f32>::zeros(&[2]).unwrap();
let result = p.to(ferrotorch_core::Device::Cuda(0));
assert!(result.is_err());
}
}