pub fn make_anomaly_dataset(
n_samples: usize,
n_features: usize,
config: AnomalyConfig,
) -> Result<Dataset>
Expand description
Generate anomaly detection dataset
Examples found in repository?
examples/advanced_generators_demo.rs (line 148)
126fn demonstrate_anomaly_detection() -> Result<(), Box<dyn std::error::Error>> {
127 println!("🔍 ANOMALY DETECTION DATASETS");
128 println!("{}", "-".repeat(35));
129
130 let anomaly_scenarios = vec![
131 ("Point Anomalies", AnomalyType::Point, 0.05, 3.0),
132 ("Contextual Anomalies", AnomalyType::Contextual, 0.08, 2.0),
133 ("Mixed Anomalies", AnomalyType::Mixed, 0.10, 2.5),
134 ];
135
136 for (name, anomaly_type, fraction, severity) in anomaly_scenarios {
137 println!("\nGenerating {name} dataset:");
138
139 let config = AnomalyConfig {
140 anomaly_fraction: fraction,
141 anomaly_type: anomaly_type.clone(),
142 severity,
143 mixed_anomalies: false,
144 clustering_factor: 1.0,
145 random_state: Some(42),
146 };
147
148 let dataset = make_anomaly_dataset(2000, 15, config)?;
149
150 // Analyze the generated dataset
151 if let Some(target) = &dataset.target {
152 let anomaly_count = target.iter().filter(|&&x| x == 1.0).count();
153 let normal_count = target.len() - anomaly_count;
154
155 println!(" 📊 Dataset composition:");
156 println!(
157 " Normal samples: {} ({:.1}%)",
158 normal_count,
159 (normal_count as f64 / target.len() as f64) * 100.0
160 );
161 println!(
162 " Anomalous samples: {} ({:.1}%)",
163 anomaly_count,
164 (anomaly_count as f64 / target.len() as f64) * 100.0
165 );
166
167 // Calculate separation metrics
168 let separation = calculate_anomaly_separation(&dataset);
169 println!(" 🎯 Anomaly characteristics:");
170 println!(
171 " Expected detection difficulty: {}",
172 if separation > 2.0 {
173 "Easy"
174 } else if separation > 1.0 {
175 "Medium"
176 } else {
177 "Hard"
178 }
179 );
180 println!(" Separation score: {separation:.2}");
181 println!(
182 " Recommended algorithms: {}",
183 get_recommended_anomaly_algorithms(&anomaly_type)
184 );
185 }
186 }
187
188 // Real-world scenario simulation
189 println!("\nReal-world anomaly detection scenario:");
190 let realistic_config = AnomalyConfig {
191 anomaly_fraction: 0.02, // 2% anomalies (realistic)
192 anomaly_type: AnomalyType::Mixed,
193 severity: 1.5, // Subtle anomalies
194 mixed_anomalies: true,
195 clustering_factor: 0.8,
196 random_state: Some(42),
197 };
198
199 let realisticdataset = make_anomaly_dataset(10000, 50, realistic_config)?;
200
201 if let Some(target) = &realisticdataset.target {
202 let anomaly_count = target.iter().filter(|&&x| x == 1.0).count();
203 println!(
204 " 🌍 Realistic scenario: {}/{} anomalies in {} samples",
205 anomaly_count,
206 realisticdataset.n_samples(),
207 realisticdataset.n_samples()
208 );
209 println!(" 💡 Challenge: Low anomaly rate mimics production environments");
210 }
211
212 println!();
213 Ok(())
214}