sklears_impute/dimensionality/
sparse.rs1use scirs2_core::ndarray::{Array1, Array2, ArrayView2};
6use sklears_core::{
7 error::{Result as SklResult, SklearsError},
8 traits::{Estimator, Fit, Transform, Untrained},
9 types::Float,
10};
11
12#[derive(Debug, Clone)]
17pub struct SparseImputer<S = Untrained> {
18 state: S,
19 sparsity_level: f64,
20 regularization: f64,
21 max_iter: usize,
22 tol: f64,
23 missing_values: f64,
24 random_state: Option<u64>,
25}
26
27#[derive(Debug, Clone)]
29pub struct SparseImputerTrained {
30 dictionary_: Array2<f64>,
31 sparse_codes_: Array2<f64>,
32 mean_: Array1<f64>,
33 n_features_in_: usize,
34 n_components_: usize,
35}
36
37impl SparseImputer<Untrained> {
38 pub fn new() -> Self {
40 Self {
41 state: Untrained,
42 sparsity_level: 0.5,
43 regularization: 0.1,
44 max_iter: 100,
45 tol: 1e-6,
46 missing_values: f64::NAN,
47 random_state: None,
48 }
49 }
50
51 pub fn sparsity_level(mut self, sparsity_level: f64) -> Self {
53 self.sparsity_level = sparsity_level.clamp(0.0, 1.0);
54 self
55 }
56
57 pub fn regularization(mut self, regularization: f64) -> Self {
59 self.regularization = regularization;
60 self
61 }
62
63 pub fn max_iter(mut self, max_iter: usize) -> Self {
65 self.max_iter = max_iter;
66 self
67 }
68
69 pub fn tol(mut self, tol: f64) -> Self {
71 self.tol = tol;
72 self
73 }
74
75 pub fn missing_values(mut self, missing_values: f64) -> Self {
77 self.missing_values = missing_values;
78 self
79 }
80
81 pub fn random_state(mut self, random_state: Option<u64>) -> Self {
83 self.random_state = random_state;
84 self
85 }
86
87 fn is_missing(&self, value: f64) -> bool {
88 if self.missing_values.is_nan() {
89 value.is_nan()
90 } else {
91 (value - self.missing_values).abs() < f64::EPSILON
92 }
93 }
94}
95
96impl Default for SparseImputer<Untrained> {
97 fn default() -> Self {
98 Self::new()
99 }
100}
101
102impl Estimator for SparseImputer<Untrained> {
103 type Config = ();
104 type Error = SklearsError;
105 type Float = Float;
106
107 fn config(&self) -> &Self::Config {
108 &()
109 }
110}
111
112impl Fit<ArrayView2<'_, Float>, ()> for SparseImputer<Untrained> {
113 type Fitted = SparseImputer<SparseImputerTrained>;
114
115 #[allow(non_snake_case)]
116 fn fit(self, X: &ArrayView2<'_, Float>, _y: &()) -> SklResult<Self::Fitted> {
117 let X = X.mapv(|x| x);
118 let (n_samples, n_features) = X.dim();
119
120 let mean = Array1::zeros(n_features);
122 let dictionary = Array2::zeros((n_features, n_features.min(100)));
123 let sparse_codes = Array2::zeros((n_samples, n_features.min(100)));
124
125 Ok(SparseImputer {
126 state: SparseImputerTrained {
127 dictionary_: dictionary,
128 sparse_codes_: sparse_codes,
129 mean_: mean,
130 n_features_in_: n_features,
131 n_components_: n_features.min(100),
132 },
133 sparsity_level: self.sparsity_level,
134 regularization: self.regularization,
135 max_iter: self.max_iter,
136 tol: self.tol,
137 missing_values: self.missing_values,
138 random_state: self.random_state,
139 })
140 }
141}
142
143impl Transform<ArrayView2<'_, Float>, Array2<Float>> for SparseImputer<SparseImputerTrained> {
144 #[allow(non_snake_case)]
145 fn transform(&self, X: &ArrayView2<'_, Float>) -> SklResult<Array2<Float>> {
146 let X = X.mapv(|x| x);
147 let (n_samples, n_features) = X.dim();
148
149 if n_features != self.state.n_features_in_ {
150 return Err(SklearsError::InvalidInput(format!(
151 "Number of features {} does not match training features {}",
152 n_features, self.state.n_features_in_
153 )));
154 }
155
156 let mut X_imputed = X.clone();
158 for i in 0..n_samples {
159 for j in 0..n_features {
160 if self.is_missing(X[[i, j]]) {
161 X_imputed[[i, j]] = 0.0; }
163 }
164 }
165
166 Ok(X_imputed.mapv(|x| x as Float))
167 }
168}
169
170impl SparseImputer<SparseImputerTrained> {
171 fn is_missing(&self, value: f64) -> bool {
172 if self.missing_values.is_nan() {
173 value.is_nan()
174 } else {
175 (value - self.missing_values).abs() < f64::EPSILON
176 }
177 }
178}
179
180#[derive(Debug, Clone)]
185pub struct CompressedSensingImputer<S = Untrained> {
186 state: S,
187 measurement_ratio: f64,
188 regularization: f64,
189 max_iter: usize,
190 tol: f64,
191 missing_values: f64,
192}
193
194#[derive(Debug, Clone)]
196pub struct CompressedSensingImputerTrained {
197 measurement_matrix_: Array2<f64>,
198 n_features_in_: usize,
199}
200
201impl CompressedSensingImputer<Untrained> {
202 pub fn new() -> Self {
204 Self {
205 state: Untrained,
206 measurement_ratio: 0.3,
207 regularization: 0.1,
208 max_iter: 1000,
209 tol: 1e-4,
210 missing_values: f64::NAN,
211 }
212 }
213}
214
215impl Default for CompressedSensingImputer<Untrained> {
216 fn default() -> Self {
217 Self::new()
218 }
219}
220
221impl Estimator for CompressedSensingImputer<Untrained> {
222 type Config = ();
223 type Error = SklearsError;
224 type Float = Float;
225
226 fn config(&self) -> &Self::Config {
227 &()
228 }
229}
230
231impl Fit<ArrayView2<'_, Float>, ()> for CompressedSensingImputer<Untrained> {
232 type Fitted = CompressedSensingImputer<CompressedSensingImputerTrained>;
233
234 #[allow(non_snake_case)]
235 fn fit(self, X: &ArrayView2<'_, Float>, _y: &()) -> SklResult<Self::Fitted> {
236 let X = X.mapv(|x| x);
237 let (_, n_features) = X.dim();
238
239 let measurement_matrix = Array2::eye(n_features);
241
242 Ok(CompressedSensingImputer {
243 state: CompressedSensingImputerTrained {
244 measurement_matrix_: measurement_matrix,
245 n_features_in_: n_features,
246 },
247 measurement_ratio: self.measurement_ratio,
248 regularization: self.regularization,
249 max_iter: self.max_iter,
250 tol: self.tol,
251 missing_values: self.missing_values,
252 })
253 }
254}
255
256impl Transform<ArrayView2<'_, Float>, Array2<Float>>
257 for CompressedSensingImputer<CompressedSensingImputerTrained>
258{
259 #[allow(non_snake_case)]
260 fn transform(&self, X: &ArrayView2<'_, Float>) -> SklResult<Array2<Float>> {
261 let X = X.mapv(|x| x);
262 let (_n_samples, n_features) = X.dim();
263
264 if n_features != self.state.n_features_in_ {
265 return Err(SklearsError::InvalidInput(format!(
266 "Number of features {} does not match training features {}",
267 n_features, self.state.n_features_in_
268 )));
269 }
270
271 let X_imputed = X.mapv(|x| if self.is_missing(x) { 0.0 } else { x });
273 Ok(X_imputed.mapv(|x| x as Float))
274 }
275}
276
277impl CompressedSensingImputer<CompressedSensingImputerTrained> {
278 fn is_missing(&self, value: f64) -> bool {
279 if self.missing_values.is_nan() {
280 value.is_nan()
281 } else {
282 (value - self.missing_values).abs() < f64::EPSILON
283 }
284 }
285}