make_hierarchical_clusters

Function make_hierarchical_clusters 

Source
pub fn make_hierarchical_clusters(
    n_samples: usize,
    n_features: usize,
    n_main_clusters: usize,
    n_sub_clusters: usize,
    main_cluster_std: f64,
    sub_cluster_std: f64,
    randomseed: Option<u64>,
) -> Result<Dataset>
Expand description

Generate hierarchical clusters (clusters within clusters)

Examples found in repository?
examples/complex_patterns_demo.rs (line 63)
12fn main() {
13    println!("=== Complex Pattern Generators Demonstration ===\n");
14
15    // Demonstrate non-linear pattern generators
16    println!("=== Non-Linear Pattern Generators ================");
17
18    // Spiral dataset
19    println!("1. Spiral Patterns:");
20    let spirals = make_spirals(200, 3, 0.1, Some(42)).unwrap();
21    println!(
22        "   Generated {} spirals with {} samples",
23        3,
24        spirals.n_samples()
25    );
26    print_dataset_summary(&spirals, "Spirals");
27
28    // Two moons dataset
29    println!("\n2. Two Moons Pattern:");
30    let moons = make_moons(300, 0.05, Some(42)).unwrap();
31    print_dataset_summary(&moons, "Moons");
32
33    // Concentric circles
34    println!("\n3. Concentric Circles:");
35    let circles = make_circles(250, 0.4, 0.03, Some(42)).unwrap();
36    print_dataset_summary(&circles, "Circles");
37
38    // Swiss roll manifold
39    println!("\n4. Swiss Roll Manifold:");
40    let swiss_roll = make_swiss_roll(400, 0.1, Some(42)).unwrap();
41    print_dataset_summary(&swiss_roll, "Swiss Roll");
42    println!("   3D manifold with intrinsic 2D structure");
43    println!();
44
45    // Demonstrate complex clustering patterns
46    println!("=== Complex Clustering Patterns ==================");
47
48    // Anisotropic (elongated) clusters
49    println!("1. Anisotropic Clusters:");
50    let aniso_clusters = make_anisotropic_blobs(300, 2, 4, 1.0, 5.0, Some(42)).unwrap();
51    print_dataset_summary(&aniso_clusters, "Anisotropic Clusters");
52    println!("   Elongated clusters with anisotropy factor 5.0");
53
54    // Different anisotropy factors demonstration
55    println!("\n   Anisotropy Factor Comparison:");
56    for factor in [1.0, 2.0, 5.0, 10.0] {
57        let dataset = make_anisotropic_blobs(100, 2, 3, 1.0, factor, Some(42)).unwrap();
58        println!("     Factor {:.1}: {} clusters", factor, 3);
59    }
60
61    // Hierarchical clusters
62    println!("\n2. Hierarchical Clusters:");
63    let hierarchical = make_hierarchical_clusters(240, 3, 3, 4, 3.0, 0.8, Some(42)).unwrap();
64    print_dataset_summary(&hierarchical, "Hierarchical Clusters");
65    println!("   3 main clusters, each with 4 sub-clusters");
66
67    if let Some(_metadata) = hierarchical.metadata.get("sub_cluster_labels") {
68        println!("   Sub-cluster structure preserved in metadata");
69    }
70    println!();
71
72    // Demonstrate parameter effects
73    println!("=== Parameter Effects Demonstration ==============");
74
75    // Noise effect on spirals
76    println!("1. Noise Effect on Spirals:");
77    for noise in [0.0, 0.05, 0.1, 0.2] {
78        let _spiraldata = make_spirals(100, 2, noise, Some(42)).unwrap();
79        println!(
80            "   Noise {:.2}: Clean separation = {}",
81            noise,
82            if noise < 0.1 { "High" } else { "Low" }
83        );
84    }
85
86    // Factor effect on circles
87    println!("\n2. Factor Effect on Concentric Circles:");
88    for factor in [0.2, 0.4, 0.6, 0.8] {
89        let _circledata = make_circles(100, factor, 0.05, Some(42)).unwrap();
90        println!("   Factor {factor:.1}: Inner/Outer ratio = {factor:.1}");
91    }
92
93    // Cluster complexity in hierarchical patterns
94    println!("\n3. Hierarchical Cluster Complexity:");
95    for (main, sub) in [(2, 2), (2, 4), (3, 3), (4, 2)] {
96        let _hierdata = make_hierarchical_clusters(120, 2, main, sub, 2.0, 0.5, Some(42)).unwrap();
97        println!(
98            "   {} main × {} sub = {} total clusters",
99            main,
100            sub,
101            main * sub
102        );
103    }
104    println!();
105
106    // Demonstrate use cases
107    println!("=== Use Cases and Applications ====================");
108
109    println!("1. **Non-linear Classification Testing**:");
110    println!("   - Spirals: Test algorithms that can handle multiple interleaved classes");
111    println!("   - Moons: Classic benchmark for non-linear separability");
112    println!("   - Circles: Test radial basis function methods");
113
114    println!("\n2. **Dimensionality Reduction Evaluation**:");
115    println!("   - Swiss Roll: Test manifold learning algorithms (t-SNE, UMAP, Isomap)");
116    println!("   - Preserves intrinsic 2D structure in 3D space");
117
118    println!("\n3. **Clustering Algorithm Testing**:");
119    println!("   - Anisotropic: Test algorithms robust to cluster shape variations");
120    println!("   - Hierarchical: Test multi-level clustering methods");
121
122    println!("\n4. **Robustness Testing**:");
123    println!("   - Variable noise levels test algorithm stability");
124    println!("   - Different cluster properties test generalization");
125    println!();
126
127    // Demonstrate advanced configurations
128    println!("=== Advanced Configuration Examples ===============");
129
130    println!("1. Multi-scale Spiral (Large dataset):");
131    let large_spirals = make_spirals(2000, 4, 0.08, Some(42)).unwrap();
132    print_dataset_summary(&large_spirals, "Large Spirals");
133
134    println!("\n2. High-dimensional Anisotropic Clusters:");
135    let hd_aniso = make_anisotropic_blobs(500, 10, 5, 1.5, 8.0, Some(42)).unwrap();
136    print_dataset_summary(&hd_aniso, "High-D Anisotropic");
137
138    println!("\n3. Deep Hierarchical Structure:");
139    let deep_hier = make_hierarchical_clusters(300, 4, 2, 6, 4.0, 1.0, Some(42)).unwrap();
140    print_dataset_summary(&deep_hier, "Deep Hierarchical");
141    println!("   Deep structure: 2 main → 12 sub-clusters");
142    println!();
143
144    // Performance and memory considerations
145    println!("=== Performance Guidelines =======================");
146    println!("**Recommended dataset sizes:**");
147    println!("- Development/Testing: 100-500 samples");
148    println!("- Algorithm benchmarking: 1,000-5,000 samples");
149    println!("- Performance testing: 10,000+ samples");
150
151    println!("\n**Memory usage (approximate):**");
152    println!("- Spirals (1000, 2D): ~16 KB");
153    println!("- Swiss Roll (1000, 3D): ~24 KB");
154    println!("- Hierarchical (1000, 5D): ~40 KB");
155
156    println!("\n**Parameter tuning tips:**");
157    println!("- Start with moderate noise (0.05-0.1)");
158    println!("- Use anisotropy factors 2.0-10.0 for clear elongation");
159    println!("- Keep sub-clusters ≤ 8 per main cluster for interpretability");
160    println!();
161
162    // Real-world applications
163    println!("=== Real-World Applications =======================");
164    println!("**Computer Vision:**");
165    println!("- Spirals: Object boundary detection");
166    println!("- Circles: Radial pattern recognition");
167
168    println!("\n**Machine Learning Research:**");
169    println!("- Benchmarking new clustering algorithms");
170    println!("- Testing manifold learning methods");
171    println!("- Evaluating non-linear classifiers");
172
173    println!("\n**Data Science Education:**");
174    println!("- Demonstrating algorithm limitations");
175    println!("- Visualizing high-dimensional data challenges");
176    println!("- Teaching feature engineering concepts");
177    println!();
178
179    println!("=== Complex Patterns Demo Complete ===============");
180}