use crate::patterns::predictive::{
dbscan::{AdaptiveDBSCAN, DBSCANConfig},
kdtree::{KDTree, Point},
};
use crate::patterns::statistical::{
SimpleBOCPD, analysis::types::BOCPDConfig, bocpd_tests::create_changepoint_data,
};
use std::time::Instant;
#[test]
fn benchmark_dbscan_scalability() {
let sizes = vec![100, 500, 1000, 2000, 5000];
for size in sizes {
let values: Vec<f64> = (0..size)
.map(|i| 10.0 + (i as f64 / size as f64) * 10.0 + (rand::random::<f64>() - 0.5))
.collect();
let timestamps: Vec<f64> = (0..values.len()).map(|i| i as f64).collect();
let mut dbscan = AdaptiveDBSCAN::new(DBSCANConfig::default()).unwrap();
let start = Instant::now();
let labels = dbscan.detect_anomalies_dbscan(&values, ×tamps);
let duration = start.elapsed();
println!(
"DBSCAN with {} points: {:?} ({:.2} ms)",
size,
duration,
duration.as_millis() as f64
);
assert_eq!(labels.len(), size);
if size >= 5000 {
assert!(
duration.as_secs() < 30,
"DBSCAN with 5000 points should complete in < 30s"
);
}
}
}
#[test]
fn benchmark_bocpd_scalability() {
let sizes = vec![100, 500, 1000, 2000, 5000];
for size in sizes {
let data = create_changepoint_data(10.0, 20.0, size / 2, size / 2);
let config = BOCPDConfig {
buffer_size: size.min(1000),
..Default::default()
};
let mut bocpd = SimpleBOCPD::new(config);
let start = Instant::now();
let results = bocpd.detect_changepoints(&data).unwrap();
let duration = start.elapsed();
println!(
"BOCPD with {} points: {:?} ({:.2} ms, {} detections)",
size,
duration,
duration.as_millis() as f64,
results.len()
);
if size >= 5000 {
assert!(
duration.as_secs() < 30,
"BOCPD with 5000 points should complete in < 30s"
);
}
}
}
#[test]
fn benchmark_kdtree_performance() {
let sizes = vec![100, 500, 1000, 1500];
for size in sizes {
let mut points: Vec<Point> = (0..size)
.map(|i| Point::new(i, &[i as f64, (i * 2) as f64], None, i as f64))
.collect();
use rand::seq::SliceRandom;
points.shuffle(&mut rand::rng());
let start = Instant::now();
let kd_tree = KDTree::build(&points);
let construction_time = start.elapsed();
let query_point = vec![size as f64 / 2.0, size as f64];
let start = Instant::now();
let _neighbors = kd_tree.find_neighbors(&query_point, 10.0);
let query_time = start.elapsed();
println!(
"KD-tree with {} points: construction {:?}, query {:?}",
size,
construction_time.as_micros(),
query_time.as_micros()
);
assert!(
construction_time.as_millis() < 1000,
"KD-tree construction should be fast"
);
assert!(
query_time.as_micros() < 10000,
"KD-tree query should be fast"
);
}
}
#[test]
fn benchmark_pattern_extraction() {
use crate::patterns::predictive::extraction::{ExtractionConfig, PatternExtractor};
let sizes = vec![10, 50, 100, 500];
for size in sizes {
let mut points = Vec::new();
for i in 0..size {
points.push(Point::new(i, &[i as f64], None, i as f64));
}
let clusters = vec![crate::patterns::predictive::dbscan::Cluster {
id: 0,
points: points.clone(),
centroid: vec![size as f64 / 2.0],
density: 0.8,
}];
let extractor = PatternExtractor::new(ExtractionConfig::default());
let start = Instant::now();
let _patterns = extractor.extract_patterns(&clusters, &[], &["test".to_string()]);
let duration = start.elapsed();
println!(
"Pattern extraction with {} points: {:?}",
size,
duration.as_micros()
);
assert!(
duration.as_millis() < 100,
"Pattern extraction should be fast"
);
}
}
#[test]
fn benchmark_compatibility_assessment() {
use crate::patterns::compatibility::{AssessmentConfig, CompatibilityAssessor, PatternContext};
let is_ci = std::env::var("CI").is_ok();
let max_ms = if is_ci { 3000 } else { 200 };
let tool_counts = vec![1, 5, 10, 20];
let known_tools = [
"query_memory",
"analyze_patterns",
"advanced_pattern_analysis",
];
let assessor = CompatibilityAssessor::new(AssessmentConfig::default());
for tool_name in &known_tools {
let result = assessor.assess_compatibility(
"test_pattern",
tool_name,
&PatternContext {
domain: "test".to_string(),
data_quality: 0.8,
occurrences: 10,
temporal_stability: 0.9,
available_memory_mb: 200,
complexity: 0.5,
},
);
assert!(
result.is_ok(),
"Tool {} should be properly registered",
tool_name
);
}
for count in tool_counts {
let tools: Vec<String> = known_tools
.iter()
.cycle()
.take(count)
.map(|tool| (*tool).to_string())
.collect();
let context = PatternContext {
domain: "test".to_string(),
data_quality: 0.8,
occurrences: 10,
temporal_stability: 0.9,
available_memory_mb: 200,
complexity: 0.5,
};
if count == 1 {
let _warmup = assessor.batch_assess("warmup", &tools, &context);
}
let start = Instant::now();
let assessments = assessor.batch_assess("test_pattern", &tools, &context);
let assessments = match assessments {
Ok(a) => a,
Err(e) => {
panic!("Batch assessment failed for {} tools: {:?}", count, e);
}
};
let duration = start.elapsed();
assert_eq!(
assessments.len(),
tools.len(),
"Should get one assessment per tool"
);
println!(
"Compatibility assessment for {} tools: {:?} ({} assessments)",
count,
duration.as_micros(),
assessments.len()
);
assert!(
duration.as_millis() < max_ms,
"Compatibility assessment should be fast: got {}ms, max allowed {}ms",
duration.as_millis(),
max_ms
);
}
}
#[test]
fn benchmark_memory_usage() {
let is_ci = std::env::var("CI").is_ok();
let size = if is_ci { 2000 } else { 10000 };
let values: Vec<f64> = (0..size)
.map(|i| 10.0 + (i as f64 / size as f64) * 10.0)
.collect();
let timestamps: Vec<f64> = (0..size).map(|i| i as f64).collect();
use rand::seq::SliceRandom;
let mut indexed_values: Vec<(f64, f64)> = values.into_iter().zip(timestamps).collect();
indexed_values.shuffle(&mut rand::rng());
let values: Vec<f64> = indexed_values.iter().map(|(v, _)| *v).collect();
let timestamps: Vec<f64> = indexed_values.iter().map(|(_, t)| *t).collect();
let values_capacity_mb =
(values.capacity() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
let timestamps_capacity_mb =
(timestamps.capacity() * std::mem::size_of::<f64>()) as f64 / (1024.0 * 1024.0);
let start = Instant::now();
let mut dbscan = AdaptiveDBSCAN::new(DBSCANConfig::default()).unwrap();
let _labels = dbscan.detect_anomalies_dbscan(&values, ×tamps);
let duration = start.elapsed();
let estimated_mb = values_capacity_mb + timestamps_capacity_mb + 10.0;
println!(
"DBSCAN with {} points: input data {:.2} MB, completed in {:?}",
size, estimated_mb, duration
);
let max_secs = if is_ci { 60 } else { 120 };
assert!(
duration.as_secs() < max_secs,
"DBSCAN should complete within the time budget ({}s for {} points)",
max_secs,
size
);
assert!(estimated_mb < 500.0, "Memory usage should be reasonable");
}
#[cfg_attr(not(feature = "streaming-impl"), ignore)]
#[test]
fn benchmark_streaming_performance() {
let is_ci = std::env::var("CI").is_ok();
let window_sizes = if is_ci {
vec![100, 500]
} else {
vec![100, 500, 1000, 2000]
};
for window_size in window_sizes {
let num_points = match window_size {
_ if is_ci && window_size >= 1000 => 500,
_ if is_ci => 1000,
_ => 10000,
};
let mut dbscan = AdaptiveDBSCAN::new(DBSCANConfig {
window_size,
..Default::default()
})
.unwrap();
let start = Instant::now();
for i in 0..num_points {
let point = Point::new(i, &[i as f64], None, i as f64);
dbscan.update_streaming_clusters(point);
}
let duration = start.elapsed();
let throughput = (num_points as f64) / duration.as_secs_f64();
println!(
"Streaming DBSCAN (window={}): {} points in {:?} ({:.0} points/sec)",
window_size, num_points, duration, throughput
);
let min_throughput = if is_ci { 3.0 } else { 10.0 };
assert!(
throughput > min_throughput,
"Streaming performance degraded: got {:.0} pts/sec, min {} pts/sec. \
See ADR-026 for handling strategy.",
throughput,
min_throughput
);
}
}
#[test]
fn benchmark_concurrent_analysis() {
use std::thread;
let num_threads = vec![1, 2, 4];
for threads in num_threads {
let start = Instant::now();
let handles: Vec<_> = (0..threads)
.map(|_| {
thread::spawn(|| {
let mut dbscan = AdaptiveDBSCAN::new(DBSCANConfig::default()).unwrap();
let values: Vec<f64> = (0..1000).map(|i| i as f64).collect();
let timestamps: Vec<f64> = (0..1000).map(|i| i as f64).collect();
dbscan.detect_anomalies_dbscan(&values, ×tamps)
})
})
.collect();
for handle in handles {
handle.join().unwrap();
}
let duration = start.elapsed();
println!(
"Concurrent analysis ({} threads): {:?}",
threads,
duration.as_millis()
);
}
}
#[test]
fn benchmark_real_world_workload() {
let mut dbscan = AdaptiveDBSCAN::new(DBSCANConfig::default()).unwrap();
let num_batches = 100;
let points_per_batch = 50;
let start = Instant::now();
for batch in 0..num_batches {
let values: Vec<f64> = (0..points_per_batch)
.map(|_i| {
let base = 10.0;
if rand::random::<f64>() < 0.05 {
base + 50.0
} else {
base + (rand::random::<f64>() - 0.5) * 2.0
}
})
.collect();
let timestamps: Vec<f64> = (0..points_per_batch)
.map(|i| (batch * points_per_batch + i) as f64)
.collect();
let _labels = dbscan.detect_anomalies_dbscan(&values, ×tamps);
}
let duration = start.elapsed();
let total_points = num_batches * points_per_batch;
let throughput = total_points as f64 / duration.as_secs_f64();
println!(
"Real-world workload: {} points in {:?} ({:.0} points/sec)",
total_points, duration, throughput
);
assert!(
throughput > 100.0,
"Real-time processing should handle at least 100 points/sec"
);
}
#[test]
fn benchmark_accuracy_performance_tradeoff() {
let configs = vec![(0.1, 2, 1000), (0.5, 5, 500), (1.0, 10, 200)];
for (density, min_samples, max_distance) in configs {
let config = DBSCANConfig {
density,
min_cluster_size: min_samples,
max_distance: max_distance as f64,
window_size: 1000,
};
let mut dbscan = AdaptiveDBSCAN::new(config).unwrap();
let values: Vec<f64> = (0..1000)
.map(|i| {
if i == 100 || i == 500 || i == 900 {
50.0
} else {
10.0 + (rand::random::<f64>() - 0.5) * 2.0
}
})
.collect();
let timestamps: Vec<f64> = (0..values.len()).map(|i| i as f64).collect();
let start = Instant::now();
let labels = dbscan.detect_anomalies_dbscan(&values, ×tamps);
let duration = start.elapsed();
let detected_outliers = labels
.iter()
.filter(|&l| matches!(l, crate::patterns::predictive::dbscan::ClusterLabel::Noise))
.count();
println!(
"Config (density={}, min_samples={}, max_distance={}): {:?}, detected {} outliers",
density, min_samples, max_distance, duration, detected_outliers
);
}
}