use ruvector_data_framework::*;
use ruvector_data_framework::ruvector_native::{Domain, SemanticVector};
use ruvector_data_framework::optimized::{OptimizedDiscoveryEngine, OptimizedConfig, simd_cosine_similarity};
use ruvector_data_framework::discovery::{DiscoveryEngine, DiscoveryConfig, PatternStrength, PatternCategory};
use std::collections::HashMap;
use chrono::Utc;
#[test]
fn test_stoer_wagner_simple_graph() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
similarity_threshold: 0.0, use_simd: false,
..Default::default()
});
let adj = vec![
vec![0.0, 1.0, 2.0, 0.0], vec![1.0, 0.0, 0.0, 2.0], vec![2.0, 0.0, 0.0, 1.0], vec![0.0, 2.0, 1.0, 0.0], ];
for i in 0..4 {
let mut embedding = vec![0.0; 128];
embedding[i] = 1.0;
engine.add_vector(SemanticVector {
id: format!("node_{}", i),
embedding,
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let coherence = engine.compute_coherence();
assert!(coherence.mincut_value >= 0.0, "Min-cut should be non-negative");
assert_eq!(coherence.node_count, 4, "Should have 4 nodes");
}
#[test]
fn test_stoer_wagner_disconnected_graph() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
similarity_threshold: 0.99, use_simd: false,
..Default::default()
});
for i in 0..3 {
let mut embedding = vec![0.0; 128];
embedding[i * 40] = 1.0;
engine.add_vector(SemanticVector {
id: format!("isolated_{}", i),
embedding,
domain: Domain::Climate,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let coherence = engine.compute_coherence();
assert_eq!(coherence.edge_count, 0, "Disconnected graph should have 0 edges");
assert_eq!(coherence.mincut_value, 0.0, "Min-cut of disconnected graph is 0");
}
#[test]
fn test_stoer_wagner_single_node() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig::default());
engine.add_vector(SemanticVector {
id: "single".to_string(),
embedding: vec![1.0; 128],
domain: Domain::Finance,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let coherence = engine.compute_coherence();
assert_eq!(coherence.node_count, 1, "Should have 1 node");
assert_eq!(coherence.edge_count, 0, "Single node has no edges");
assert_eq!(coherence.mincut_value, 0.0, "Single node min-cut is 0");
}
#[test]
fn test_stoer_wagner_empty_graph() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig::default());
let coherence = engine.compute_coherence();
assert_eq!(coherence.node_count, 0, "Empty graph has 0 nodes");
assert_eq!(coherence.edge_count, 0, "Empty graph has 0 edges");
assert_eq!(coherence.mincut_value, 0.0, "Empty graph min-cut is 0");
}
#[test]
fn test_stoer_wagner_complete_graph() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
similarity_threshold: 0.5,
use_simd: false,
..Default::default()
});
for i in 0..4 {
let mut embedding = vec![0.6; 128];
embedding[i] = 0.8;
engine.add_vector(SemanticVector {
id: format!("clique_{}", i),
embedding,
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let coherence = engine.compute_coherence();
assert_eq!(coherence.node_count, 4, "Complete graph K4 has 4 nodes");
assert!(coherence.edge_count >= 6, "K4 should have at least 6 edges");
assert!(coherence.mincut_value > 0.0, "Complete graph has positive min-cut");
}
#[test]
fn test_cosine_similarity_identical() {
let vec_a = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
let vec_b = vec_a.clone();
let similarity = simd_cosine_similarity(&vec_a, &vec_b);
assert!((similarity - 1.0).abs() < 1e-6,
"Identical vectors should have similarity 1.0, got {}", similarity);
}
#[test]
fn test_cosine_similarity_orthogonal() {
let vec_a = vec![1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
let vec_b = vec![0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0];
let similarity = simd_cosine_similarity(&vec_a, &vec_b);
assert!(similarity.abs() < 1e-6,
"Orthogonal vectors should have similarity 0.0, got {}", similarity);
}
#[test]
fn test_cosine_similarity_opposite() {
let vec_a = vec![1.0, 2.0, 3.0, 4.0];
let vec_b = vec![-1.0, -2.0, -3.0, -4.0];
let similarity = simd_cosine_similarity(&vec_a, &vec_b);
assert!((similarity - (-1.0)).abs() < 1e-6,
"Opposite vectors should have similarity -1.0, got {}", similarity);
}
#[test]
fn test_cosine_similarity_zero_vector() {
let vec_a = vec![1.0, 2.0, 3.0, 4.0];
let vec_zero = vec![0.0, 0.0, 0.0, 0.0];
let similarity = simd_cosine_similarity(&vec_a, &vec_zero);
assert_eq!(similarity, 0.0,
"Similarity with zero vector should be 0.0, got {}", similarity);
assert!(!similarity.is_nan(), "Should not return NaN");
}
#[test]
fn test_cosine_similarity_mismatched_length() {
let vec_a = vec![1.0, 2.0, 3.0];
let vec_b = vec![1.0, 2.0, 3.0, 4.0];
let similarity = simd_cosine_similarity(&vec_a, &vec_b);
assert_eq!(similarity, 0.0,
"Mismatched lengths should return 0.0, got {}", similarity);
}
#[test]
fn test_cosine_similarity_large_vectors() {
let mut vec_a = vec![0.0; 128];
let mut vec_b = vec![0.0; 128];
for i in 0..128 {
vec_a[i] = (i as f32).sin();
vec_b[i] = (i as f32).sin() * 0.9; }
let similarity = simd_cosine_similarity(&vec_a, &vec_b);
assert!(similarity > 0.99,
"Proportional vectors should have high similarity, got {}", similarity);
}
#[test]
fn test_cosine_similarity_non_aligned() {
let vec_a = vec![1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0];
let vec_b = vec_a.clone();
let similarity = simd_cosine_similarity(&vec_a, &vec_b);
assert!((similarity - 1.0).abs() < 1e-6,
"Non-aligned identical vectors should still have similarity 1.0, got {}", similarity);
}
#[test]
fn test_statistical_significance_p_value() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig::default());
for _ in 0..10 {
engine.add_vector(SemanticVector {
id: format!("stable_{}", rand::random::<u32>()),
embedding: vec![0.5; 128],
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let _ = engine.detect_patterns_with_significance();
}
for _ in 0..5 {
engine.add_vector(SemanticVector {
id: format!("change_{}", rand::random::<u32>()),
embedding: vec![0.9; 128],
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let patterns = engine.detect_patterns_with_significance();
if !patterns.is_empty() {
for pattern in &patterns {
assert!(pattern.p_value >= 0.0 && pattern.p_value <= 1.0,
"P-value must be in [0, 1], got {}", pattern.p_value);
}
}
}
#[test]
fn test_statistical_effect_size() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
significance_threshold: 0.1,
..Default::default()
});
for i in 0..5 {
let mut emb = vec![0.3; 64];
emb[i] = 0.4;
engine.add_vector(SemanticVector {
id: format!("base_{}", i),
embedding: emb,
domain: Domain::Finance,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let _ = engine.detect_patterns_with_significance();
}
let patterns = engine.detect_patterns_with_significance();
for pattern in &patterns {
assert!(pattern.effect_size.is_finite(),
"Effect size should be finite, got {}", pattern.effect_size);
}
}
#[test]
fn test_statistical_confidence_interval() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig::default());
for i in 0..8 {
let mut emb = vec![0.5; 32];
emb[0] = 0.5 + (i as f32 * 0.01);
engine.add_vector(SemanticVector {
id: format!("trend_{}", i),
embedding: emb,
domain: Domain::Climate,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let _ = engine.detect_patterns_with_significance();
}
let patterns = engine.detect_patterns_with_significance();
for pattern in &patterns {
let (lower, upper) = pattern.confidence_interval;
assert!(lower <= upper,
"Confidence interval lower ({}) should be ≤ upper ({})", lower, upper);
assert!(lower.is_finite() && upper.is_finite(),
"Confidence interval bounds should be finite");
}
}
#[test]
fn test_significance_threshold() {
let config = OptimizedConfig {
significance_threshold: 0.05,
..Default::default()
};
let mut engine = OptimizedDiscoveryEngine::new(config.clone());
for i in 0..6 {
engine.add_vector(SemanticVector {
id: format!("node_{}", i),
embedding: vec![0.6 + (i as f32 * 0.05); 96],
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let _ = engine.detect_patterns_with_significance();
}
let patterns = engine.detect_patterns_with_significance();
for pattern in &patterns {
let expected_significant = pattern.p_value < config.significance_threshold;
assert_eq!(pattern.is_significant, expected_significant,
"Pattern with p={} should be marked significant={}",
pattern.p_value, expected_significant);
}
}
#[test]
fn test_granger_causality_basic() {
use ruvector_data_framework::ruvector_native::PatternType;
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
cross_domain: true,
causality_lookback: 5,
causality_min_correlation: 0.5,
..Default::default()
});
for i in 0..12 {
let mut climate_emb = vec![0.5; 64];
climate_emb[0] = (i as f32 * 0.1).sin();
engine.add_vector(SemanticVector {
id: format!("climate_t{}", i),
embedding: climate_emb,
domain: Domain::Climate,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
if i > 0 {
let mut finance_emb = vec![0.5; 64];
finance_emb[0] = ((i - 1) as f32 * 0.1).sin();
engine.add_vector(SemanticVector {
id: format!("finance_t{}", i),
embedding: finance_emb,
domain: Domain::Finance,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let _ = engine.detect_patterns_with_significance();
}
let patterns = engine.detect_patterns_with_significance();
let causality_patterns: Vec<_> = patterns.iter()
.filter(|p| p.pattern.pattern_type == PatternType::Cascade)
.collect();
assert!(causality_patterns.len() >= 0, "Causality detection completed without errors");
}
#[test]
fn test_cross_correlation_lags() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
cross_domain: true,
causality_lookback: 8,
..Default::default()
});
for i in 0..15 {
let mut emb_a = vec![0.5; 32];
emb_a[0] = (i as f32 * 0.3).sin();
engine.add_vector(SemanticVector {
id: format!("series_a_{}", i),
embedding: emb_a,
domain: Domain::Climate,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
if i >= 2 {
let mut emb_b = vec![0.5; 32];
emb_b[0] = ((i - 2) as f32 * 0.3).sin();
engine.add_vector(SemanticVector {
id: format!("series_b_{}", i),
embedding: emb_b,
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let _ = engine.detect_patterns_with_significance();
}
let patterns = engine.detect_patterns_with_significance();
assert!(patterns.len() >= 0);
}
#[test]
fn test_granger_f_statistic() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
cross_domain: true,
causality_lookback: 6,
causality_min_correlation: 0.3,
..Default::default()
});
for i in 0..10 {
let value = (i as f32).sqrt();
let mut emb1 = vec![0.5; 48];
emb1[0] = value;
engine.add_vector(SemanticVector {
id: format!("dom1_{}", i),
embedding: emb1,
domain: Domain::Climate,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let mut emb2 = vec![0.5; 48];
emb2[0] = value * 0.8 + 0.1; engine.add_vector(SemanticVector {
id: format!("dom2_{}", i),
embedding: emb2,
domain: Domain::Finance,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let _ = engine.detect_patterns_with_significance();
}
let patterns = engine.detect_patterns_with_significance();
for pattern in &patterns {
for evidence in &pattern.pattern.evidence {
if evidence.evidence_type == "f_statistic" {
assert!(evidence.value >= 0.0,
"F-statistic should be non-negative, got {}", evidence.value);
}
}
}
}
#[test]
fn test_cross_domain_bridge_detection() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
cross_domain: true,
similarity_threshold: 0.7,
..Default::default()
});
let bridge_emb = vec![0.8; 96];
for i in 0..3 {
let mut emb = bridge_emb.clone();
emb[i] += 0.1;
engine.add_vector(SemanticVector {
id: format!("climate_{}", i),
embedding: emb,
domain: Domain::Climate,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
for i in 0..3 {
let mut emb = bridge_emb.clone();
emb[i + 3] += 0.1;
engine.add_vector(SemanticVector {
id: format!("finance_{}", i),
embedding: emb,
domain: Domain::Finance,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let stats = engine.stats();
assert!(stats.cross_domain_edges > 0,
"Should detect cross-domain connections, found {}", stats.cross_domain_edges);
assert!(stats.domain_counts.contains_key(&Domain::Climate));
assert!(stats.domain_counts.contains_key(&Domain::Finance));
}
#[test]
fn test_domain_coherence() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig {
similarity_threshold: 0.6,
..Default::default()
});
for i in 0..4 {
let mut emb = vec![0.9; 80];
emb[i] = 0.95;
engine.add_vector(SemanticVector {
id: format!("climate_tight_{}", i),
embedding: emb,
domain: Domain::Climate,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
for i in 0..4 {
let mut emb = vec![0.5; 80];
emb[i * 20] = 0.6;
engine.add_vector(SemanticVector {
id: format!("research_loose_{}", i),
embedding: emb,
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let climate_coh = engine.domain_coherence(Domain::Climate);
let research_coh = engine.domain_coherence(Domain::Research);
assert!(climate_coh.is_some(), "Climate domain should have coherence");
assert!(research_coh.is_some(), "Research domain should have coherence");
if let (Some(c_coh), Some(r_coh)) = (climate_coh, research_coh) {
assert!(c_coh >= r_coh,
"Tighter cluster should have higher coherence: {} vs {}", c_coh, r_coh);
}
}
#[test]
fn test_domain_coherence_empty_domain() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig::default());
engine.add_vector(SemanticVector {
id: "climate_only".to_string(),
embedding: vec![0.5; 64],
domain: Domain::Climate,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let finance_coh = engine.domain_coherence(Domain::Finance);
assert!(finance_coh.is_none(),
"Empty domain should return None, got {:?}", finance_coh);
}
#[test]
fn test_normal_cdf_edge_cases() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig::default());
for _ in 0..5 {
engine.add_vector(SemanticVector {
id: format!("constant_{}", rand::random::<u32>()),
embedding: vec![0.5; 32],
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
let _ = engine.detect_patterns_with_significance();
}
let patterns = engine.detect_patterns_with_significance();
for pattern in &patterns {
assert!(!pattern.p_value.is_nan(), "P-value should not be NaN");
assert!(pattern.p_value >= 0.0 && pattern.p_value <= 1.0,
"P-value should be in [0,1]");
}
}
#[test]
fn test_pattern_detection_insufficient_history() {
let config = DiscoveryConfig {
lookback_windows: 10,
..Default::default()
};
let mut engine = DiscoveryEngine::new(config);
let signals = vec![
CoherenceSignal {
id: "s1".to_string(),
window: TemporalWindow::new(Utc::now(), Utc::now(), 0),
min_cut_value: 1.0,
node_count: 5,
edge_count: 10,
partition_sizes: Some((2, 3)),
is_exact: true,
cut_nodes: vec![],
delta: None,
},
];
let patterns = engine.detect(&signals).unwrap();
assert!(patterns.len() >= 0, "Should handle insufficient history gracefully");
}
#[test]
fn test_linear_regression_through_trends() {
use chrono::Duration;
let config = DiscoveryConfig {
lookback_windows: 5,
..Default::default()
};
let mut engine = DiscoveryEngine::new(config);
let mut signals = vec![];
for i in 0..10 {
signals.push(CoherenceSignal {
id: format!("linear_{}", i),
window: TemporalWindow::new(
Utc::now() + Duration::hours(i),
Utc::now() + Duration::hours(i + 1),
i as u64,
),
min_cut_value: 1.0 + (i as f64 * 0.5), node_count: 10,
edge_count: 20,
partition_sizes: Some((5, 5)),
is_exact: true,
cut_nodes: vec![],
delta: None,
});
}
let patterns = engine.detect(&signals).unwrap();
let trends: Vec<_> = patterns.iter()
.filter(|p| p.category == PatternCategory::Consolidation)
.collect();
assert!(trends.len() >= 0, "Trend detection uses linear regression internally");
}
#[test]
fn test_anomaly_detection_sigma_threshold() {
use chrono::Duration;
let config = DiscoveryConfig {
detect_anomalies: true,
anomaly_sigma: 2.0,
..Default::default()
};
let mut engine = DiscoveryEngine::new(config);
let mut signals = vec![];
for i in 0..10 {
signals.push(CoherenceSignal {
id: format!("normal_{}", i),
window: TemporalWindow::new(
Utc::now() + Duration::hours(i),
Utc::now() + Duration::hours(i + 1),
i as u64,
),
min_cut_value: 5.0 + (i % 3) as f64 * 0.5,
node_count: 10,
edge_count: 20,
partition_sizes: Some((5, 5)),
is_exact: true,
cut_nodes: vec![],
delta: None,
});
}
signals.push(CoherenceSignal {
id: "anomaly".to_string(),
window: TemporalWindow::new(
Utc::now() + Duration::hours(10),
Utc::now() + Duration::hours(11),
10,
),
min_cut_value: 15.0, node_count: 10,
edge_count: 20,
partition_sizes: Some((5, 5)),
is_exact: true,
cut_nodes: vec!["outlier".to_string()],
delta: None,
});
let patterns = engine.detect(&signals).unwrap();
let anomalies: Vec<_> = patterns.iter()
.filter(|p| p.category == PatternCategory::Anomaly)
.collect();
assert!(anomalies.len() > 0,
"Should detect anomaly pattern with z > 2.0");
}
#[cfg(feature = "parallel")]
#[test]
fn test_batch_vector_addition() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig::default());
let batch: Vec<SemanticVector> = (0..20)
.map(|i| {
let mut emb = vec![0.5; 64];
emb[i % 64] = 0.8;
SemanticVector {
id: format!("batch_{}", i),
embedding: emb,
domain: Domain::Research,
timestamp: Utc::now(),
metadata: HashMap::new(),
}
})
.collect();
let ids = engine.add_vectors_batch(batch);
assert_eq!(ids.len(), 20, "Should return 20 node IDs");
let stats = engine.stats();
assert_eq!(stats.total_nodes, 20, "Should have 20 nodes");
}
#[test]
fn test_performance_metrics() {
let mut engine = OptimizedDiscoveryEngine::new(OptimizedConfig::default());
for i in 0..5 {
engine.add_vector(SemanticVector {
id: format!("perf_{}", i),
embedding: vec![0.6; 48],
domain: Domain::Finance,
timestamp: Utc::now(),
metadata: HashMap::new(),
});
}
let metrics = engine.metrics();
let comparisons = metrics.vector_comparisons.load(std::sync::atomic::Ordering::Relaxed);
assert!(comparisons > 0, "Should track vector comparisons");
}
#[test]
fn test_pattern_strength_classification() {
assert_eq!(PatternStrength::from_score(0.1), PatternStrength::Weak);
assert_eq!(PatternStrength::from_score(0.24), PatternStrength::Weak);
assert_eq!(PatternStrength::from_score(0.25), PatternStrength::Moderate);
assert_eq!(PatternStrength::from_score(0.49), PatternStrength::Moderate);
assert_eq!(PatternStrength::from_score(0.50), PatternStrength::Strong);
assert_eq!(PatternStrength::from_score(0.74), PatternStrength::Strong);
assert_eq!(PatternStrength::from_score(0.75), PatternStrength::VeryStrong);
assert_eq!(PatternStrength::from_score(1.0), PatternStrength::VeryStrong);
}
#[test]
fn test_empty_embeddings() {
let empty_a: Vec<f32> = vec![];
let empty_b: Vec<f32> = vec![];
let similarity = simd_cosine_similarity(&empty_a, &empty_b);
assert_eq!(similarity, 0.0, "Empty vectors should have similarity 0.0");
let single_a = vec![1.0];
let single_b = vec![1.0];
let sim_single = simd_cosine_similarity(&single_a, &single_b);
assert!((sim_single - 1.0).abs() < 1e-6, "Single element identical vectors");
}