1use scirs2_core::ndarray::Array2;
8use scirs2_datasets::{
9 add_time_series_noise, inject_missing_data, inject_outliers, load_iris, make_corrupted_dataset,
10 make_time_series, MissingPattern, OutlierType,
11};
12
13#[allow(dead_code)]
14fn main() {
15 println!("=== Realistic Noise Models Demonstration ===\n");
16
17 println!("=== Missing Data Patterns ========================");
19 demonstrate_missing_data_patterns();
20
21 println!("\n=== Outlier Injection ============================");
23 demonstrate_outlier_injection();
24
25 println!("\n=== Time Series Noise ============================");
27 demonstrate_time_series_noise();
28
29 println!("\n=== Comprehensive Dataset Corruption =============");
31 demonstrate_comprehensive_corruption();
32
33 println!("\n=== Real-World Applications ======================");
35 demonstrate_real_world_applications();
36
37 println!("\n=== Noise Models Demo Complete ===================");
38}
39
40#[allow(dead_code)]
41fn demonstrate_missing_data_patterns() {
42 println!("Testing different missing data patterns on a sample dataset:");
43
44 let originaldata = Array2::from_shape_vec(
45 (8, 4),
46 vec![
47 1.0, 2.0, 3.0, 4.0, 2.0, 4.0, 6.0, 8.0, 3.0, 6.0, 9.0, 12.0, 4.0, 8.0, 12.0, 16.0, 5.0,
48 10.0, 15.0, 20.0, 6.0, 12.0, 18.0, 24.0, 7.0, 14.0, 21.0, 28.0, 8.0, 16.0, 24.0, 32.0,
49 ],
50 )
51 .unwrap();
52
53 let patterns = [
54 (MissingPattern::MCAR, "Missing Completely at Random"),
55 (MissingPattern::MAR, "Missing at Random"),
56 (MissingPattern::MNAR, "Missing Not at Random"),
57 (MissingPattern::Block, "Block-wise Missing"),
58 ];
59
60 for (pattern, description) in patterns {
61 let mut testdata = originaldata.clone();
62 let missing_mask = inject_missing_data(&mut testdata, 0.3, pattern, Some(42)).unwrap();
63
64 let missing_count = missing_mask.iter().filter(|&&x| x).count();
65 let total_elements = testdata.len();
66 let missing_percentage = (missing_count as f64 / total_elements as f64) * 100.0;
67
68 println!("{description}:");
69 println!(
70 " Missing elements: {} / {} ({:.1}%)",
71 missing_count, total_elements, missing_percentage
72 );
73
74 print!(" Pattern (X = missing): ");
76 for i in 0..testdata.nrows() {
77 for j in 0..testdata.ncols() {
78 if missing_mask[[i, j]] {
79 print!("X ");
80 } else {
81 print!(". ");
82 }
83 }
84 if i < testdata.nrows() - 1 {
85 print!("| ");
86 }
87 }
88 println!();
89 }
90}
91
92#[allow(dead_code)]
93fn demonstrate_outlier_injection() {
94 println!("Testing different outlier types on a sample dataset:");
95
96 let mut cleandata = Array2::ones((20, 3));
98 for i in 0..20 {
100 for j in 0..3 {
101 cleandata[[i, j]] = (i as f64 + j as f64) / 2.0;
102 }
103 }
104
105 let outlier_types = [
106 (OutlierType::Point, "Point Outliers"),
107 (OutlierType::Contextual, "Contextual Outliers"),
108 (OutlierType::Collective, "Collective Outliers"),
109 ];
110
111 for (outlier_type, description) in outlier_types {
112 let mut testdata = cleandata.clone();
113 let original_stats = calculate_basic_stats(&testdata);
114
115 let outlier_mask =
116 inject_outliers(&mut testdata, 0.2, outlier_type, 3.0, Some(42)).unwrap();
117 let corrupted_stats = calculate_basic_stats(&testdata);
118
119 let outlier_count = outlier_mask.iter().filter(|&&x| x).count();
120
121 println!("{description}:");
122 println!(
123 " Outliers injected: {} / {} samples",
124 outlier_count,
125 testdata.nrows()
126 );
127 println!(
128 " Mean change: {:.3} -> {:.3} (Δ={:.3})",
129 original_stats.0,
130 corrupted_stats.0,
131 corrupted_stats.0 - original_stats.0
132 );
133 println!(
134 " Std change: {:.3} -> {:.3} (Δ={:.3})",
135 original_stats.1,
136 corrupted_stats.1,
137 corrupted_stats.1 - original_stats.1
138 );
139
140 print!(" Outlier samples: ");
142 for (i, &is_outlier) in outlier_mask.iter().enumerate() {
143 if is_outlier {
144 print!("{} ", i);
145 }
146 }
147 println!();
148 }
149}
150
151#[allow(dead_code)]
152fn demonstrate_time_series_noise() {
153 println!("Testing different time series noise types:");
154
155 let clean_ts = make_time_series(100, 2, true, true, 0.0, Some(42)).unwrap();
157
158 let noise_configs = [
159 vec![("gaussian", 0.2)],
160 vec![("spikes", 0.1)],
161 vec![("drift", 0.5)],
162 vec![("seasonal", 0.3)],
163 vec![("autocorrelated", 0.1)],
164 vec![("heteroscedastic", 0.2)],
165 vec![("gaussian", 0.1), ("spikes", 0.05), ("drift", 0.2)], ];
167
168 let noisenames = [
169 "Gaussian White Noise",
170 "Impulse Spikes",
171 "Linear Drift",
172 "Seasonal Pattern",
173 "Autocorrelated Noise",
174 "Heteroscedastic Noise",
175 "Combined Noise",
176 ];
177
178 for (config, name) in noise_configs.iter().zip(noisenames.iter()) {
179 let mut noisydata = clean_ts.data.clone();
180 let original_stats = calculate_basic_stats(&noisydata);
181
182 add_time_series_noise(&mut noisydata, config, Some(42)).unwrap();
183 let noisy_stats = calculate_basic_stats(&noisydata);
184
185 println!("{name}:");
186 println!(" Mean: {:.3} -> {:.3}", original_stats.0, noisy_stats.0);
187 println!(" Std: {:.3} -> {:.3}", original_stats.1, noisy_stats.1);
188 println!(
189 " Range: [{:.3}, {:.3}] -> [{:.3}, {:.3}]",
190 original_stats.2, original_stats.3, noisy_stats.2, noisy_stats.3
191 );
192 }
193}
194
195#[allow(dead_code)]
196fn demonstrate_comprehensive_corruption() {
197 println!("Testing comprehensive dataset corruption:");
198
199 let iris = load_iris().unwrap();
201 println!(
202 "Original Iris dataset: {} samples, {} features",
203 iris.n_samples(),
204 iris.n_features()
205 );
206
207 let original_stats = calculate_basic_stats(&iris.data);
208 println!(
209 "Original stats - Mean: {:.3}, Std: {:.3}",
210 original_stats.0, original_stats.1
211 );
212
213 let corruption_levels = [
215 (0.05, 0.02, "Light corruption"),
216 (0.1, 0.05, "Moderate corruption"),
217 (0.2, 0.1, "Heavy corruption"),
218 (0.3, 0.15, "Severe corruption"),
219 ];
220
221 for (missing_rate, outlier_rate, description) in corruption_levels {
222 let corrupted = make_corrupted_dataset(
223 &iris,
224 missing_rate,
225 MissingPattern::MAR, outlier_rate,
227 OutlierType::Point,
228 2.5,
229 Some(42),
230 )
231 .unwrap();
232
233 let total_elements = corrupted.data.len();
235 let missing_elements = corrupted.data.iter().filter(|&&x| x.is_nan()).count();
236 let usable_percentage =
237 ((total_elements - missing_elements) as f64 / total_elements as f64) * 100.0;
238
239 println!("{description}:");
240 println!(" Missing data: {:.1}%", missing_rate * 100.0);
241 println!(" Outliers: {:.1}%", outlier_rate * 100.0);
242 println!(" Usable data: {:.1}%", usable_percentage);
243
244 if let Some(missing_count) = corrupted.metadata.get("missing_count") {
246 println!(" Actual missing: {missing_count} elements");
247 }
248 if let Some(outlier_count) = corrupted.metadata.get("outlier_count") {
249 println!(" Actual outliers: {outlier_count} samples");
250 }
251 }
252}
253
254#[allow(dead_code)]
255fn demonstrate_real_world_applications() {
256 println!("Real-world application scenarios:");
257
258 println!("\n1. **Medical Data Simulation**:");
259 let medicaldata = load_iris().unwrap(); let _corrupted_medical = make_corrupted_dataset(
261 &medicaldata,
262 0.15, MissingPattern::MNAR, 0.05, OutlierType::Point,
266 2.0,
267 Some(42),
268 )
269 .unwrap();
270
271 println!(" Medical dataset simulation:");
272 println!(" Missing data pattern: MNAR (high values more likely missing)");
273 println!(" Outliers: Point outliers (measurement errors)");
274 println!(" Use case: Testing imputation algorithms for clinical data");
275
276 println!("\n2. **Sensor Network Simulation**:");
277 let sensordata = make_time_series(200, 4, true, true, 0.1, Some(42)).unwrap();
278 let mut sensor_ts = sensordata.data.clone();
279
280 add_time_series_noise(
282 &mut sensor_ts,
283 &[
284 ("gaussian", 0.05), ("spikes", 0.02), ("drift", 0.1), ("heteroscedastic", 0.03), ],
289 Some(42),
290 )
291 .unwrap();
292
293 inject_missing_data(&mut sensor_ts, 0.08, MissingPattern::Block, Some(42)).unwrap();
295
296 println!(" Sensor network simulation:");
297 println!(" Multiple noise types: gaussian + spikes + drift + heteroscedastic");
298 println!(" Missing data: Block pattern (sensor failures)");
299 println!(" Use case: Testing robust time series algorithms");
300
301 println!("\n3. **Survey Data Simulation**:");
302 let surveydata = load_iris().unwrap(); let _corrupted_survey = make_corrupted_dataset(
304 &surveydata,
305 0.25, MissingPattern::MAR, 0.08, OutlierType::Contextual,
309 1.5,
310 Some(42),
311 )
312 .unwrap();
313
314 println!(" Survey data simulation:");
315 println!(" Missing data pattern: MAR (depends on other responses)");
316 println!(" Outliers: Contextual (unusual response patterns)");
317 println!(" Use case: Testing survey analysis robustness");
318
319 println!("\n4. **Financial Data Simulation**:");
320 let mut financial_ts = make_time_series(500, 3, false, false, 0.02, Some(42))
321 .unwrap()
322 .data;
323
324 add_time_series_noise(
326 &mut financial_ts,
327 &[
328 ("gaussian", 0.1), ("spikes", 0.05), ("autocorrelated", 0.15), ("heteroscedastic", 0.2), ],
333 Some(42),
334 )
335 .unwrap();
336
337 println!(" Financial data simulation:");
338 println!(" Noise types: volatility + shocks + momentum + clustering");
339 println!(" Use case: Testing financial models under realistic conditions");
340}
341
342#[allow(dead_code)]
344fn calculate_basic_stats(data: &Array2<f64>) -> (f64, f64, f64, f64) {
345 let valid_values: Vec<f64> = data.iter().filter(|&&x| !x.is_nan()).cloned().collect();
346
347 if valid_values.is_empty() {
348 return (0.0, 0.0, 0.0, 0.0);
349 }
350
351 let mean = valid_values.iter().sum::<f64>() / valid_values.len() as f64;
352 let variance = valid_values
353 .iter()
354 .map(|&x| (x - mean).powi(2))
355 .sum::<f64>()
356 / valid_values.len() as f64;
357 let std = variance.sqrt();
358 let min = valid_values.iter().cloned().fold(f64::INFINITY, f64::min);
359 let max = valid_values
360 .iter()
361 .cloned()
362 .fold(f64::NEG_INFINITY, f64::max);
363
364 (mean, std, min, max)
365}