pub fn make_anisotropic_blobs(
n_samples: usize,
n_features: usize,
centers: usize,
cluster_std: f64,
anisotropy_factor: f64,
random_seed: Option<u64>,
) -> Result<Dataset>
Expand description
Generate anisotropic (elongated) clusters
Examples found in repository?
examples/complex_patterns_demo.rs (line 49)
11fn main() {
12 println!("=== Complex Pattern Generators Demonstration ===\n");
13
14 // Demonstrate non-linear pattern generators
15 println!("=== Non-Linear Pattern Generators ================");
16
17 // Spiral dataset
18 println!("1. Spiral Patterns:");
19 let spirals = make_spirals(200, 3, 0.1, Some(42)).unwrap();
20 println!(
21 " Generated {} spirals with {} samples",
22 3,
23 spirals.n_samples()
24 );
25 print_dataset_summary(&spirals, "Spirals");
26
27 // Two moons dataset
28 println!("\n2. Two Moons Pattern:");
29 let moons = make_moons(300, 0.05, Some(42)).unwrap();
30 print_dataset_summary(&moons, "Moons");
31
32 // Concentric circles
33 println!("\n3. Concentric Circles:");
34 let circles = make_circles(250, 0.4, 0.03, Some(42)).unwrap();
35 print_dataset_summary(&circles, "Circles");
36
37 // Swiss roll manifold
38 println!("\n4. Swiss Roll Manifold:");
39 let swiss_roll = make_swiss_roll(400, 0.1, Some(42)).unwrap();
40 print_dataset_summary(&swiss_roll, "Swiss Roll");
41 println!(" 3D manifold with intrinsic 2D structure");
42 println!();
43
44 // Demonstrate complex clustering patterns
45 println!("=== Complex Clustering Patterns ==================");
46
47 // Anisotropic (elongated) clusters
48 println!("1. Anisotropic Clusters:");
49 let aniso_clusters = make_anisotropic_blobs(300, 2, 4, 1.0, 5.0, Some(42)).unwrap();
50 print_dataset_summary(&aniso_clusters, "Anisotropic Clusters");
51 println!(" Elongated clusters with anisotropy factor 5.0");
52
53 // Different anisotropy factors demonstration
54 println!("\n Anisotropy Factor Comparison:");
55 for factor in [1.0, 2.0, 5.0, 10.0] {
56 let _dataset = make_anisotropic_blobs(100, 2, 3, 1.0, factor, Some(42)).unwrap();
57 println!(" Factor {:.1}: {} clusters", factor, 3);
58 }
59
60 // Hierarchical clusters
61 println!("\n2. Hierarchical Clusters:");
62 let hierarchical = make_hierarchical_clusters(240, 3, 3, 4, 3.0, 0.8, Some(42)).unwrap();
63 print_dataset_summary(&hierarchical, "Hierarchical Clusters");
64 println!(" 3 main clusters, each with 4 sub-clusters");
65
66 if let Some(_metadata) = hierarchical.metadata.get("sub_cluster_labels") {
67 println!(" Sub-cluster structure preserved in metadata");
68 }
69 println!();
70
71 // Demonstrate parameter effects
72 println!("=== Parameter Effects Demonstration ==============");
73
74 // Noise effect on spirals
75 println!("1. Noise Effect on Spirals:");
76 for noise in [0.0, 0.05, 0.1, 0.2] {
77 let _spiral_data = make_spirals(100, 2, noise, Some(42)).unwrap();
78 println!(
79 " Noise {:.2}: Clean separation = {}",
80 noise,
81 if noise < 0.1 { "High" } else { "Low" }
82 );
83 }
84
85 // Factor effect on circles
86 println!("\n2. Factor Effect on Concentric Circles:");
87 for factor in [0.2, 0.4, 0.6, 0.8] {
88 let _circle_data = make_circles(100, factor, 0.05, Some(42)).unwrap();
89 println!(" Factor {:.1}: Inner/Outer ratio = {:.1}", factor, factor);
90 }
91
92 // Cluster complexity in hierarchical patterns
93 println!("\n3. Hierarchical Cluster Complexity:");
94 for (main, sub) in [(2, 2), (2, 4), (3, 3), (4, 2)] {
95 let _hier_data = make_hierarchical_clusters(120, 2, main, sub, 2.0, 0.5, Some(42)).unwrap();
96 println!(
97 " {} main × {} sub = {} total clusters",
98 main,
99 sub,
100 main * sub
101 );
102 }
103 println!();
104
105 // Demonstrate use cases
106 println!("=== Use Cases and Applications ====================");
107
108 println!("1. **Non-linear Classification Testing**:");
109 println!(" - Spirals: Test algorithms that can handle multiple interleaved classes");
110 println!(" - Moons: Classic benchmark for non-linear separability");
111 println!(" - Circles: Test radial basis function methods");
112
113 println!("\n2. **Dimensionality Reduction Evaluation**:");
114 println!(" - Swiss Roll: Test manifold learning algorithms (t-SNE, UMAP, Isomap)");
115 println!(" - Preserves intrinsic 2D structure in 3D space");
116
117 println!("\n3. **Clustering Algorithm Testing**:");
118 println!(" - Anisotropic: Test algorithms robust to cluster shape variations");
119 println!(" - Hierarchical: Test multi-level clustering methods");
120
121 println!("\n4. **Robustness Testing**:");
122 println!(" - Variable noise levels test algorithm stability");
123 println!(" - Different cluster properties test generalization");
124 println!();
125
126 // Demonstrate advanced configurations
127 println!("=== Advanced Configuration Examples ===============");
128
129 println!("1. Multi-scale Spiral (Large dataset):");
130 let large_spirals = make_spirals(2000, 4, 0.08, Some(42)).unwrap();
131 print_dataset_summary(&large_spirals, "Large Spirals");
132
133 println!("\n2. High-dimensional Anisotropic Clusters:");
134 let hd_aniso = make_anisotropic_blobs(500, 10, 5, 1.5, 8.0, Some(42)).unwrap();
135 print_dataset_summary(&hd_aniso, "High-D Anisotropic");
136
137 println!("\n3. Deep Hierarchical Structure:");
138 let deep_hier = make_hierarchical_clusters(300, 4, 2, 6, 4.0, 1.0, Some(42)).unwrap();
139 print_dataset_summary(&deep_hier, "Deep Hierarchical");
140 println!(" Deep structure: 2 main → 12 sub-clusters");
141 println!();
142
143 // Performance and memory considerations
144 println!("=== Performance Guidelines =======================");
145 println!("**Recommended dataset sizes:**");
146 println!("- Development/Testing: 100-500 samples");
147 println!("- Algorithm benchmarking: 1,000-5,000 samples");
148 println!("- Performance testing: 10,000+ samples");
149
150 println!("\n**Memory usage (approximate):**");
151 println!("- Spirals (1000, 2D): ~16 KB");
152 println!("- Swiss Roll (1000, 3D): ~24 KB");
153 println!("- Hierarchical (1000, 5D): ~40 KB");
154
155 println!("\n**Parameter tuning tips:**");
156 println!("- Start with moderate noise (0.05-0.1)");
157 println!("- Use anisotropy factors 2.0-10.0 for clear elongation");
158 println!("- Keep sub-clusters ≤ 8 per main cluster for interpretability");
159 println!();
160
161 // Real-world applications
162 println!("=== Real-World Applications =======================");
163 println!("**Computer Vision:**");
164 println!("- Spirals: Object boundary detection");
165 println!("- Circles: Radial pattern recognition");
166
167 println!("\n**Machine Learning Research:**");
168 println!("- Benchmarking new clustering algorithms");
169 println!("- Testing manifold learning methods");
170 println!("- Evaluating non-linear classifiers");
171
172 println!("\n**Data Science Education:**");
173 println!("- Demonstrating algorithm limitations");
174 println!("- Visualizing high-dimensional data challenges");
175 println!("- Teaching feature engineering concepts");
176 println!();
177
178 println!("=== Complex Patterns Demo Complete ===============");
179}