k_means_generating_cluster/
k-means_generating_cluster.rs

1extern crate rusty_machine;
2extern crate rand;
3
4use rusty_machine::linalg::{Matrix, BaseMatrix};
5use rusty_machine::learning::k_means::KMeansClassifier;
6use rusty_machine::learning::UnSupModel;
7
8use rand::thread_rng;
9use rand::distributions::IndependentSample;
10use rand::distributions::normal::Normal;
11
12fn generate_data(centroids: &Matrix<f64>,
13                 points_per_centroid: usize,
14                 noise: f64)
15                 -> Matrix<f64> {
16    assert!(centroids.cols() > 0, "Centroids cannot be empty.");
17    assert!(centroids.rows() > 0, "Centroids cannot be empty.");
18    assert!(noise >= 0f64, "Noise must be non-negative.");
19    let mut raw_cluster_data = Vec::with_capacity(centroids.rows() * points_per_centroid *
20                                                  centroids.cols());
21
22    let mut rng = thread_rng();
23    let normal_rv = Normal::new(0f64, noise);
24
25    for _ in 0..points_per_centroid {
26        // Generate points from each centroid
27        for centroid in centroids.iter_rows() {
28            // Generate a point randomly around the centroid
29            let mut point = Vec::with_capacity(centroids.cols());
30            for feature in centroid {
31                point.push(feature + normal_rv.ind_sample(&mut rng));
32            }
33
34            // Push point to raw_cluster_data
35            raw_cluster_data.extend(point);
36        }
37    }
38
39    Matrix::new(centroids.rows() * points_per_centroid,
40                centroids.cols(),
41                raw_cluster_data)
42}
43
44fn main() {
45    println!("K-Means clustering example:");
46
47    const SAMPLES_PER_CENTROID: usize = 2000;
48
49    println!("Generating {0} samples from each centroids:",
50             SAMPLES_PER_CENTROID);
51    // Choose two cluster centers, at (-0.5, -0.5) and (0, 0.5).
52    let centroids = Matrix::new(2, 2, vec![-0.5, -0.5, 0.0, 0.5]);
53    println!("{}", centroids);
54
55    // Generate some data randomly around the centroids
56    let samples = generate_data(&centroids, SAMPLES_PER_CENTROID, 0.4);
57
58    // Create a new model with 2 clusters
59    let mut model = KMeansClassifier::new(2);
60
61    // Train the model
62    println!("Training the model...");
63    // Our train function returns a Result<(), E>
64    model.train(&samples).unwrap();
65
66    let centroids = model.centroids().as_ref().unwrap();
67    println!("Model Centroids:\n{:.3}", centroids);
68
69    // Predict the classes and partition into
70    println!("Classifying the samples...");
71    let classes = model.predict(&samples).unwrap();
72    let (first, second): (Vec<usize>, Vec<usize>) = classes.data().iter().partition(|&x| *x == 0);
73
74    println!("Samples closest to first centroid: {}", first.len());
75    println!("Samples closest to second centroid: {}", second.len());
76}