make_anomaly_dataset

Function make_anomaly_dataset 

Source
pub fn make_anomaly_dataset(
    n_samples: usize,
    n_features: usize,
    config: AnomalyConfig,
) -> Result<Dataset>
Expand description

Generate anomaly detection dataset

Examples found in repository?
examples/advanced_generators_demo.rs (line 148)
126fn demonstrate_anomaly_detection() -> Result<(), Box<dyn std::error::Error>> {
127    println!("🔍 ANOMALY DETECTION DATASETS");
128    println!("{}", "-".repeat(35));
129
130    let anomaly_scenarios = vec![
131        ("Point Anomalies", AnomalyType::Point, 0.05, 3.0),
132        ("Contextual Anomalies", AnomalyType::Contextual, 0.08, 2.0),
133        ("Mixed Anomalies", AnomalyType::Mixed, 0.10, 2.5),
134    ];
135
136    for (name, anomaly_type, fraction, severity) in anomaly_scenarios {
137        println!("\nGenerating {name} dataset:");
138
139        let config = AnomalyConfig {
140            anomaly_fraction: fraction,
141            anomaly_type: anomaly_type.clone(),
142            severity,
143            mixed_anomalies: false,
144            clustering_factor: 1.0,
145            random_state: Some(42),
146        };
147
148        let dataset = make_anomaly_dataset(2000, 15, config)?;
149
150        // Analyze the generated dataset
151        if let Some(target) = &dataset.target {
152            let anomaly_count = target.iter().filter(|&&x| x == 1.0).count();
153            let normal_count = target.len() - anomaly_count;
154
155            println!("  📊 Dataset composition:");
156            println!(
157                "    Normal samples: {} ({:.1}%)",
158                normal_count,
159                (normal_count as f64 / target.len() as f64) * 100.0
160            );
161            println!(
162                "    Anomalous samples: {} ({:.1}%)",
163                anomaly_count,
164                (anomaly_count as f64 / target.len() as f64) * 100.0
165            );
166
167            // Calculate separation metrics
168            let separation = calculate_anomaly_separation(&dataset);
169            println!("  🎯 Anomaly characteristics:");
170            println!(
171                "    Expected detection difficulty: {}",
172                if separation > 2.0 {
173                    "Easy"
174                } else if separation > 1.0 {
175                    "Medium"
176                } else {
177                    "Hard"
178                }
179            );
180            println!("    Separation score: {separation:.2}");
181            println!(
182                "    Recommended algorithms: {}",
183                get_recommended_anomaly_algorithms(&anomaly_type)
184            );
185        }
186    }
187
188    // Real-world scenario simulation
189    println!("\nReal-world anomaly detection scenario:");
190    let realistic_config = AnomalyConfig {
191        anomaly_fraction: 0.02, // 2% anomalies (realistic)
192        anomaly_type: AnomalyType::Mixed,
193        severity: 1.5, // Subtle anomalies
194        mixed_anomalies: true,
195        clustering_factor: 0.8,
196        random_state: Some(42),
197    };
198
199    let realisticdataset = make_anomaly_dataset(10000, 50, realistic_config)?;
200
201    if let Some(target) = &realisticdataset.target {
202        let anomaly_count = target.iter().filter(|&&x| x == 1.0).count();
203        println!(
204            "  🌍 Realistic scenario: {}/{} anomalies in {} samples",
205            anomaly_count,
206            realisticdataset.n_samples(),
207            realisticdataset.n_samples()
208        );
209        println!("  💡 Challenge: Low anomaly rate mimics production environments");
210    }
211
212    println!();
213    Ok(())
214}