1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
#[cfg(test)]
#[path = "../../../tests/unit/algorithms/gsom/network_test.rs"]
mod network_test;

use super::*;
use crate::utils::parallel_collect;
use hashbrown::HashMap;
use rand::prelude::SliceRandom;
use std::cmp::Ordering;
use std::ops::Deref;
use std::sync::{Arc, RwLock};

/// A customized Growing Self Organizing Map designed to store and retrieve trained input.
pub struct Network<I: Input, S: Storage<Item = I>> {
    /// Data dimension.
    dimension: usize,
    /// Growth threshold.
    growing_threshold: f64,
    /// The factor of distribution (FD), used in error distribution stage, 0 < FD < 1
    distribution_factor: f64,
    /// Initial learning rate.
    learning_rate: f64,
    /// All nodes in the network.
    nodes: HashMap<Coordinate, NodeLink<I, S>>,
    /// Creates input storage for new nodes.
    storage_factory: Box<dyn Fn() -> S + Send + Sync>,
    /// A current time which is used to track node update statistics.
    time: usize,
    /// A rebalance memory.
    rebalance_memory: usize,
}

/// GSOM network configuration.
pub struct NetworkConfig {
    /// A spread factor.
    pub spread_factor: f64,
    /// The factor of distribution (FD), used in error distribution stage, 0 < FD < 1
    pub distribution_factor: f64,
    /// Initial learning rate.
    pub learning_rate: f64,
    /// A rebalance memory.
    pub rebalance_memory: usize,
}

impl<I: Input, S: Storage<Item = I>> Network<I, S> {
    /// Creates a new instance of `Network`.
    pub fn new(roots: [I; 4], config: NetworkConfig, storage_factory: Box<dyn Fn() -> S + Send + Sync>) -> Self {
        let dimension = roots[0].weights().len();

        assert!(roots.iter().all(|r| r.weights().len() == dimension));
        assert!(config.distribution_factor > 0. && config.distribution_factor < 1.);

        Self {
            dimension,
            growing_threshold: -1. * dimension as f64 * config.spread_factor.log2(),
            distribution_factor: config.distribution_factor,
            learning_rate: config.learning_rate,
            nodes: Self::create_initial_nodes(roots, 0, config.rebalance_memory, &storage_factory),
            storage_factory,
            time: 0,
            rebalance_memory: config.rebalance_memory,
        }
    }

    /// Stores input into the network.
    pub fn store(&mut self, input: I, time: usize) {
        debug_assert!(input.weights().len() == self.dimension);
        self.time = time;
        self.train(input, true)
    }

    /// Stores multiple inputs into the network.
    pub fn store_batch<T: Send + Sync>(&mut self, item_data: &[T], time: usize, map_func: fn(&T) -> I) {
        self.time = time;
        self.train_batch(item_data, true, map_func);
    }

    /// Optimizes network by rebalancing and compaction of the nodes.
    pub fn optimize(&mut self, rebalance_count: usize, compact_rule: &(dyn Fn(&NodeLink<I, S>) -> bool)) {
        // NOTE compact before rebalancing to reduce network size to be rebalanced
        self.compact(compact_rule);
        self.rebalance(rebalance_count);
        self.compact(compact_rule);
    }

    /// Returns node coordinates in arbitrary order.
    pub fn get_coordinates(&'_ self) -> impl Iterator<Item = Coordinate> + '_ {
        self.nodes.keys().cloned()
    }

    /// Return nodes in arbitrary order.
    pub fn get_nodes<'a>(&'a self) -> impl Iterator<Item = &NodeLink<I, S>> + 'a {
        self.nodes.values()
    }

    /// Returns a total amount of nodes.
    pub fn size(&self) -> usize {
        self.nodes.len()
    }

    /// Trains network on an input.
    fn train(&mut self, input: I, is_new_input: bool) {
        debug_assert!(input.weights().len() == self.dimension);

        let bmu = self.find_bmu(&input);
        let error = bmu.read().unwrap().distance(input.weights());

        self.update(&bmu, &input, error, is_new_input);

        bmu.write().unwrap().storage.add(input);
    }

    /// Trains network on inputs.
    fn train_batch<T: Send + Sync>(&mut self, item_data: &[T], is_new_input: bool, map_func: fn(&T) -> I) {
        let nodes_data = parallel_collect(item_data, |item| {
            let input = map_func(item);
            let bmu = self.find_bmu(&input);
            let error = bmu.read().unwrap().distance(input.weights());
            (bmu, error, input)
        });

        nodes_data.into_iter().for_each(|(bmu, error, input)| {
            self.update(&bmu, &input, error, is_new_input);
            bmu.write().unwrap().storage.add(input);
        });
    }

    /// Finds the best matching unit within the map for the given input.
    fn find_bmu(&self, input: &I) -> NodeLink<I, S> {
        self.nodes
            .iter()
            .map(|(_, node)| (node.clone(), node.read().unwrap().distance(input.weights())))
            .min_by(|(_, x), (_, y)| x.partial_cmp(y).unwrap_or(Ordering::Less))
            .map(|(node, _)| node)
            .expect("no nodes")
    }

    /// Updates network according to the error.
    fn update(&mut self, node: &NodeLink<I, S>, input: &I, error: f64, is_new_input: bool) {
        let (exceeds_ae, is_boundary) = {
            let mut node = node.write().unwrap();
            node.error += error;

            // NOTE update usage statistics only for a new input
            if is_new_input {
                node.new_hit(self.time);
            }

            (node.error > self.growing_threshold, node.topology.is_boundary())
        };

        match (exceeds_ae, is_boundary) {
            // error distribution
            (true, false) => {
                let distribute_error = |node: Option<&NodeLink<I, S>>| {
                    let mut node = node.unwrap().write().unwrap();
                    node.error += self.distribution_factor * node.error;
                };

                let mut node = node.write().unwrap();

                node.error = 0.5 * self.growing_threshold;

                distribute_error(node.topology.left.as_ref());
                distribute_error(node.topology.right.as_ref());
                distribute_error(node.topology.up.as_ref());
                distribute_error(node.topology.down.as_ref());
            }
            // weight distribution
            (true, true) => {
                // NOTE clone to fight with borrow checker
                let coordinate = node.read().unwrap().coordinate.clone();
                let weights = node.read().unwrap().weights.clone();
                let topology = node.read().unwrap().topology.clone();

                let mut distribute_weight = |offset: (i32, i32), link: Option<&NodeLink<I, S>>| {
                    if link.is_none() {
                        let coordinate = Coordinate(coordinate.0 + offset.0, coordinate.1 + offset.1);
                        self.insert(coordinate, weights.as_slice());
                    }
                };

                distribute_weight((-1, 0), topology.left.as_ref());
                distribute_weight((1, 0), topology.right.as_ref());
                distribute_weight((0, 1), topology.up.as_ref());
                distribute_weight((0, -1), topology.down.as_ref());
            }
            _ => {}
        }

        // weight adjustments
        let mut node = node.write().unwrap();
        let learning_rate = self.learning_rate * (1. - 3.8 / (self.nodes.len() as f64));

        node.adjust(input.weights(), learning_rate);
        (node.topology.neighbours().map(|n| n.write().unwrap())).for_each(|mut neighbor| {
            neighbor.adjust(input.weights(), learning_rate);
        });
    }

    /// Inserts new neighbors if necessary.
    fn insert(&mut self, coordinate: Coordinate, weights: &[f64]) {
        let new_node = Arc::new(RwLock::new(Node::new(
            coordinate.clone(),
            weights,
            self.time,
            self.rebalance_memory,
            self.storage_factory.deref()(),
        )));
        {
            let mut new_node_mut = new_node.write().unwrap();
            let (new_x, new_y) = (coordinate.0, coordinate.1);

            if let Some(node) = self.nodes.get(&Coordinate(new_x - 1, new_y)) {
                new_node_mut.topology.left = Some(node.clone());
                node.write().unwrap().topology.right = Some(new_node.clone());
            }

            if let Some(node) = self.nodes.get(&Coordinate(new_x + 1, new_y)) {
                new_node_mut.topology.right = Some(node.clone());
                node.write().unwrap().topology.left = Some(new_node.clone());
            }

            if let Some(node) = self.nodes.get(&Coordinate(new_x, new_y - 1)) {
                new_node_mut.topology.down = Some(node.clone());
                node.write().unwrap().topology.up = Some(new_node.clone());
            }

            if let Some(node) = self.nodes.get(&Coordinate(new_x, new_y + 1)) {
                new_node_mut.topology.up = Some(node.clone());
                node.write().unwrap().topology.down = Some(new_node.clone());
            }
        }

        self.nodes.insert(coordinate, new_node);
    }

    /// Rebalances network.
    fn rebalance(&mut self, rebalance_count: usize) {
        let mut data = Vec::with_capacity(self.nodes.len());
        (0..rebalance_count).for_each(|_| {
            self.reset_error();

            data.clear();
            data.extend(self.nodes.iter_mut().flat_map(|(_, node)| node.write().unwrap().storage.drain()));

            data.shuffle(&mut rand::thread_rng());

            data.drain(0..).for_each(|input| {
                self.train(input, false);
            });
        });

        self.reset_error();
    }

    /// Compacts network.
    fn compact(&mut self, rule: &(dyn Fn(&NodeLink<I, S>) -> bool)) {
        let mut remove = vec![];
        self.nodes.iter_mut().filter(|(_, node)| rule.deref()(node)).for_each(|(coordinate, node)| {
            let topology = &mut node.write().unwrap().topology;
            topology.left.iter_mut().for_each(|link| link.write().unwrap().topology.right = None);
            topology.right.iter_mut().for_each(|link| link.write().unwrap().topology.left = None);
            topology.up.iter_mut().for_each(|link| link.write().unwrap().topology.down = None);
            topology.down.iter_mut().for_each(|link| link.write().unwrap().topology.up = None);

            remove.push(coordinate.clone());
        });

        remove.iter().for_each(|coordinate| {
            self.nodes.remove(coordinate);
        });
    }

    /// Resets accumulated error in all nodes.
    fn reset_error(&mut self) {
        self.nodes.iter_mut().for_each(|(_, node)| {
            let mut node = node.write().unwrap();
            node.error = 0.;
        })
    }

    /// Creates nodes for initial topology.
    fn create_initial_nodes(
        roots: [I; 4],
        time: usize,
        rebalance_memory: usize,
        storage_factory: &(dyn Fn() -> S + Send + Sync),
    ) -> HashMap<Coordinate, NodeLink<I, S>> {
        let create_node_link = |coordinate: Coordinate, input: I| {
            let mut node =
                Node::<I, S>::new(coordinate, input.weights(), time, rebalance_memory, storage_factory.deref()());
            node.storage.add(input);
            Arc::new(RwLock::new(node))
        };

        let [n00, n01, n11, n10] = roots;

        let n00 = create_node_link(Coordinate(0, 0), n00);
        let n01 = create_node_link(Coordinate(0, 1), n01);
        let n11 = create_node_link(Coordinate(1, 1), n11);
        let n10 = create_node_link(Coordinate(1, 0), n10);

        n00.write().unwrap().topology.right = Some(n10.clone());
        n00.write().unwrap().topology.up = Some(n01.clone());

        n01.write().unwrap().topology.right = Some(n11.clone());
        n01.write().unwrap().topology.down = Some(n00.clone());

        n10.write().unwrap().topology.up = Some(n11.clone());
        n10.write().unwrap().topology.left = Some(n00.clone());

        n11.write().unwrap().topology.left = Some(n01.clone());
        n11.write().unwrap().topology.down = Some(n10.clone());

        [(Coordinate(0, 0), n00), (Coordinate(0, 1), n01), (Coordinate(1, 1), n11), (Coordinate(1, 0), n10)]
            .iter()
            .cloned()
            .collect()
    }
}