saorsa_core/adaptive/
som.rs

1// Copyright (c) 2025 Saorsa Labs Limited
2
3// This file is part of the Saorsa P2P network.
4
5// Licensed under the AGPL-3.0 license:
6// <https://www.gnu.org/licenses/agpl-3.0.html>
7
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU Affero General Public License for more details.
12
13// You should have received a copy of the GNU Affero General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16// Copyright 2024 P2P Foundation
17// SPDX-License-Identifier: AGPL-3.0-or-later
18
19//! Self-Organizing Map (SOM) Implementation
20//!
21//! This module provides a Self-Organizing Map for intelligent clustering and organization
22//! of nodes in the P2P network based on multi-dimensional features such as content
23//! specialization, compute capability, network latency, and storage availability.
24
25use rand::Rng;
26use serde::{Deserialize, Serialize};
27use std::collections::{HashMap, HashSet};
28use std::sync::{Arc, RwLock};
29
30/// Configuration for Self-Organizing Map
31#[derive(Debug, Clone)]
32pub struct SomConfig {
33    /// Initial learning rate (typically 0.1 - 0.5)
34    pub initial_learning_rate: f64,
35    /// Initial neighborhood radius
36    pub initial_radius: f64,
37    /// Number of training iterations
38    pub iterations: usize,
39    /// Grid size configuration
40    pub grid_size: GridSize,
41}
42
43/// Grid size configuration
44#[derive(Debug, Clone)]
45pub enum GridSize {
46    /// Fixed grid dimensions
47    Fixed(usize, usize),
48    /// Dynamic grid that grows with network size
49    Dynamic { min: usize, max: usize },
50}
51
52/// Multi-dimensional features representing a node's characteristics
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct NodeFeatures {
55    /// Content vector (128-dimensional semantic hash)
56    pub content_vector: Vec<f64>,
57    /// Compute capability (0-1000 benchmark score)
58    pub compute_capability: f64,
59    /// Average network latency in milliseconds
60    pub network_latency: f64,
61    /// Available storage in GB
62    pub storage_available: f64,
63}
64
65impl NodeFeatures {
66    /// Normalize features to ensure consistent scale
67    pub fn normalize(&self) -> Self {
68        // Normalize content vector to unit length
69        let content_magnitude = self
70            .content_vector
71            .iter()
72            .map(|x| x * x)
73            .sum::<f64>()
74            .sqrt();
75
76        let normalized_content = if content_magnitude > 0.0 {
77            self.content_vector
78                .iter()
79                .map(|x| x / content_magnitude)
80                .collect()
81        } else {
82            vec![0.0; self.content_vector.len()]
83        };
84
85        // Normalize other features to [0, 1] range
86        Self {
87            content_vector: normalized_content,
88            compute_capability: self.compute_capability / 1000.0, // Max 1000
89            network_latency: (self.network_latency / 200.0).min(1.0), // Max 200ms
90            storage_available: (self.storage_available / 5000.0).min(1.0), // Max 5TB
91        }
92    }
93
94    /// Calculate Euclidean distance to another feature vector
95    pub fn euclidean_distance(&self, other: &Self) -> f64 {
96        let normalized_self = self.normalize();
97        let normalized_other = other.normalize();
98
99        // Combine all features into a single vector for distance calculation
100        let self_vec = normalized_self.to_weight_vector();
101        let other_vec = normalized_other.to_weight_vector();
102
103        self_vec
104            .iter()
105            .zip(other_vec.iter())
106            .map(|(a, b)| (a - b).powi(2))
107            .sum::<f64>()
108            .sqrt()
109    }
110
111    /// Convert to weight vector for SOM operations
112    pub fn to_weight_vector(&self) -> Vec<f64> {
113        let normalized = self.normalize();
114        let mut weights = normalized.content_vector.clone();
115        weights.push(normalized.compute_capability);
116        weights.push(normalized.network_latency);
117        weights.push(normalized.storage_available);
118        weights
119    }
120}
121
122/// A single neuron in the SOM grid
123#[derive(Debug, Clone)]
124pub struct Neuron {
125    /// Weight vector
126    weights: Vec<f64>,
127    /// Node IDs assigned to this neuron
128    assigned_nodes: HashSet<NodeId>,
129}
130
131impl Neuron {
132    /// Create a new neuron with random weights
133    fn new(weight_dim: usize) -> Self {
134        let mut rng = rand::thread_rng();
135        let weights = (0..weight_dim).map(|_| rng.gen_range(0.0..1.0)).collect();
136
137        Self {
138            weights,
139            assigned_nodes: HashSet::new(),
140        }
141    }
142
143    /// Calculate distance to input vector
144    fn distance(&self, input: &[f64]) -> f64 {
145        self.weights
146            .iter()
147            .zip(input.iter())
148            .map(|(w, i)| (w - i).powi(2))
149            .sum::<f64>()
150            .sqrt()
151    }
152
153    /// Update weights based on input and influence
154    fn update_weights(&mut self, input: &[f64], learning_rate: f64, influence: f64) {
155        for (weight, &input_val) in self.weights.iter_mut().zip(input.iter()) {
156            *weight += learning_rate * influence * (input_val - *weight);
157        }
158    }
159}
160
161/// Self-Organizing Map for node clustering
162pub struct SelfOrganizingMap {
163    /// Grid of neurons
164    grid: Arc<RwLock<Vec<Vec<Neuron>>>>,
165    /// Current grid dimensions
166    width: usize,
167    height: usize,
168    /// Configuration
169    config: SomConfig,
170    /// Weight dimension (features + metadata)
171    weight_dim: usize,
172    /// Node to grid position mapping for fast lookups
173    node_positions: Arc<RwLock<HashMap<NodeId, (usize, usize)>>>,
174}
175
176impl SelfOrganizingMap {
177    /// Create a new Self-Organizing Map
178    pub fn new(config: SomConfig) -> Self {
179        let (width, height) = match &config.grid_size {
180            GridSize::Fixed(w, h) => (*w, *h),
181            GridSize::Dynamic { min, .. } => (*min, *min),
182        };
183
184        // Weight dimension = 128 (content) + 3 (other features)
185        let weight_dim = 131;
186
187        // Initialize grid with random neurons
188        let grid = (0..height)
189            .map(|_| (0..width).map(|_| Neuron::new(weight_dim)).collect())
190            .collect();
191
192        Self {
193            grid: Arc::new(RwLock::new(grid)),
194            width,
195            height,
196            config,
197            weight_dim,
198            node_positions: Arc::new(RwLock::new(HashMap::new())),
199        }
200    }
201
202    /// Find the Best Matching Unit (BMU) for given features
203    pub fn find_best_matching_unit(&self, features: &NodeFeatures) -> (usize, usize) {
204        let input = features.to_weight_vector();
205        let Ok(grid) = self.grid.read() else {
206            return (0, 0);
207        };
208
209        let mut best_x = 0;
210        let mut best_y = 0;
211        let mut best_distance = f64::MAX;
212
213        for (y, row) in grid.iter().enumerate() {
214            for (x, neuron) in row.iter().enumerate() {
215                let distance = neuron.distance(&input);
216                if distance < best_distance {
217                    best_distance = distance;
218                    best_x = x;
219                    best_y = y;
220                }
221            }
222        }
223
224        (best_x, best_y)
225    }
226
227    /// Calculate Gaussian neighborhood function
228    pub fn gaussian_neighborhood(distance: f64, radius: f64) -> f64 {
229        (-distance.powi(2) / (2.0 * radius.powi(2))).exp()
230    }
231
232    /// Get current neighborhood radius based on iteration
233    pub fn get_neighborhood_radius(&self, iteration: usize) -> f64 {
234        self.config.initial_radius * (-(iteration as f64) / self.config.iterations as f64).exp()
235    }
236
237    /// Get current learning rate based on iteration
238    pub fn get_learning_rate(&self, iteration: usize) -> f64 {
239        self.config.initial_learning_rate
240            * (-(iteration as f64) / self.config.iterations as f64).exp()
241    }
242
243    /// Train the SOM with a single sample
244    pub fn train_single(&mut self, features: &NodeFeatures, iteration: usize) {
245        let input = features.to_weight_vector();
246        let (bmu_x, bmu_y) = self.find_best_matching_unit(features);
247
248        let learning_rate = self.get_learning_rate(iteration);
249        let radius = self.get_neighborhood_radius(iteration);
250
251        let Ok(mut grid) = self.grid.write() else {
252            return;
253        };
254
255        // Update all neurons based on their distance to BMU
256        for (y, row) in grid.iter_mut().enumerate() {
257            for (x, neuron) in row.iter_mut().enumerate() {
258                let distance =
259                    ((x as f64 - bmu_x as f64).powi(2) + (y as f64 - bmu_y as f64).powi(2)).sqrt();
260
261                if distance <= radius * 3.0 {
262                    // Only update within 3 * radius
263                    let influence = Self::gaussian_neighborhood(distance, radius);
264                    neuron.update_weights(&input, learning_rate, influence);
265                }
266            }
267        }
268    }
269
270    /// Train the SOM with a batch of samples
271    pub fn train_batch(&mut self, features_batch: &[NodeFeatures]) {
272        for iteration in 0..self.config.iterations {
273            // Random sampling for each iteration
274            let sample_idx = rand::thread_rng().gen_range(0..features_batch.len());
275            self.train_single(&features_batch[sample_idx], iteration);
276        }
277    }
278
279    /// Assign a node to its BMU
280    pub fn assign_node(&mut self, node_id: NodeId, features: NodeFeatures) {
281        let (x, y) = self.find_best_matching_unit(&features);
282
283        // Remove from old position if exists
284        let Ok(mut positions) = self.node_positions.write() else {
285            return;
286        };
287        if let Some((old_x, old_y)) = positions.get(&node_id) {
288            if let Ok(mut grid) = self.grid.write() {
289                grid[*old_y][*old_x].assigned_nodes.remove(&node_id);
290            } else {
291                return;
292            }
293        }
294
295        // Assign to new position
296        positions.insert(node_id.clone(), (x, y));
297        let Ok(mut grid) = self.grid.write() else {
298            return;
299        };
300        grid[y][x].assigned_nodes.insert(node_id);
301
302        // Check if we need to resize (for dynamic grids)
303        drop(grid);
304        drop(positions);
305        self.check_and_resize();
306    }
307
308    /// Get nodes assigned to a specific neuron
309    pub fn get_assigned_nodes(&self, x: usize, y: usize) -> HashSet<NodeId> {
310        let Ok(grid) = self.grid.read() else {
311            return HashSet::new();
312        };
313        grid.get(y)
314            .and_then(|row| row.get(x))
315            .map(|neuron| neuron.assigned_nodes.clone())
316            .unwrap_or_default()
317    }
318
319    /// Get all assigned nodes
320    pub fn get_all_assigned_nodes(&self) -> HashSet<NodeId> {
321        let Ok(grid) = self.grid.read() else {
322            return HashSet::new();
323        };
324        grid.iter()
325            .flat_map(|row| row.iter())
326            .flat_map(|neuron| neuron.assigned_nodes.iter())
327            .cloned()
328            .collect()
329    }
330
331    /// Find nodes similar to given features
332    pub fn find_similar_nodes(&self, features: &NodeFeatures, radius: usize) -> Vec<NodeId> {
333        let (bmu_x, bmu_y) = self.find_best_matching_unit(features);
334        let Ok(grid) = self.grid.read() else {
335            return Vec::new();
336        };
337
338        let mut similar_nodes = Vec::new();
339
340        // Search in neighborhood around BMU
341        let x_start = bmu_x.saturating_sub(radius);
342        let x_end = (bmu_x + radius + 1).min(self.width);
343        let y_start = bmu_y.saturating_sub(radius);
344        let y_end = (bmu_y + radius + 1).min(self.height);
345
346        for y in y_start..y_end {
347            for x in x_start..x_end {
348                if let Some(neuron) = grid.get(y).and_then(|row| row.get(x)) {
349                    similar_nodes.extend(neuron.assigned_nodes.iter().cloned());
350                }
351            }
352        }
353
354        similar_nodes
355    }
356
357    /// Get current grid dimensions
358    pub fn get_grid_dimensions(&self) -> (usize, usize) {
359        (self.width, self.height)
360    }
361
362    /// Set neuron weights (for testing)
363    pub fn set_neuron_weights(&mut self, x: usize, y: usize, weights: Vec<f64>) {
364        let Ok(mut grid) = self.grid.write() else {
365            return;
366        };
367        if let Some(neuron) = grid.get_mut(y).and_then(|row| row.get_mut(x)) {
368            neuron.weights = weights;
369        }
370    }
371
372    /// Get neuron weights (for testing)
373    pub fn get_neuron_weights(&self, x: usize, y: usize) -> Option<Vec<f64>> {
374        let Ok(grid) = self.grid.read() else {
375            return None;
376        };
377        grid.get(y)
378            .and_then(|row| row.get(x))
379            .map(|neuron| neuron.weights.clone())
380    }
381
382    /// Check if grid needs resizing and resize if necessary
383    fn check_and_resize(&mut self) {
384        if let GridSize::Dynamic { max, .. } = self.config.grid_size {
385            let node_count = match self.node_positions.read() {
386                Ok(p) => p.len(),
387                Err(_) => 0,
388            };
389            let current_capacity = self.width * self.height;
390
391            // Resize if we're at 80% capacity and haven't hit max
392            if node_count as f64 > current_capacity as f64 * 0.8 {
393                let new_size = ((current_capacity as f64 * 1.5).sqrt() as usize).min(max);
394                if new_size > self.width || new_size > self.height {
395                    self.resize_grid(new_size, new_size);
396                }
397            }
398        }
399    }
400
401    /// Resize the grid while preserving node assignments
402    fn resize_grid(&mut self, new_width: usize, new_height: usize) {
403        let Ok(mut old_grid) = self.grid.write() else {
404            return;
405        };
406        let mut new_grid = vec![vec![Neuron::new(self.weight_dim); new_width]; new_height];
407
408        // Copy neurons that fit in new grid
409        for (y, row) in old_grid.iter().enumerate() {
410            if y >= new_height {
411                break;
412            }
413            for (x, neuron) in row.iter().enumerate() {
414                if x >= new_width {
415                    break;
416                }
417                new_grid[y][x] = neuron.clone();
418            }
419        }
420
421        *old_grid = new_grid;
422        self.width = new_width;
423        self.height = new_height;
424
425        // Re-assign nodes that were outside the new grid
426        let positions = match self.node_positions.read() {
427            Ok(p) => p.clone(),
428            Err(_) => return,
429        };
430        for (node_id, (x, y)) in positions {
431            if x >= new_width || y >= new_height {
432                // Find new position for this node
433                // For simplicity, we'll just find a nearby valid position
434                let new_x = x.min(new_width - 1);
435                let new_y = y.min(new_height - 1);
436                old_grid[new_y][new_x]
437                    .assigned_nodes
438                    .insert(node_id.clone());
439                if let Ok(mut pos) = self.node_positions.write() {
440                    pos.insert(node_id, (new_x, new_y));
441                }
442            }
443        }
444    }
445
446    /// Get visualization data for the SOM
447    pub fn get_visualization_data(&self) -> VisualizationData {
448        let Ok(grid) = self.grid.read() else {
449            return VisualizationData {
450                grid_width: self.width,
451                grid_height: self.height,
452                neurons: vec![],
453            };
454        };
455        let neurons = grid
456            .iter()
457            .enumerate()
458            .flat_map(|(y, row)| {
459                row.iter()
460                    .enumerate()
461                    .map(move |(x, neuron)| NeuronVisualization {
462                        x,
463                        y,
464                        weights: neuron.weights.clone(),
465                        assigned_nodes: neuron.assigned_nodes.iter().cloned().collect(),
466                    })
467            })
468            .collect();
469
470        VisualizationData {
471            grid_width: self.width,
472            grid_height: self.height,
473            neurons,
474        }
475    }
476
477    /// Generate U-Matrix (unified distance matrix) for visualization
478    pub fn generate_u_matrix(&self) -> Vec<Vec<f64>> {
479        let Ok(grid) = self.grid.read() else {
480            return vec![vec![0.0; self.width]; self.height];
481        };
482        let mut u_matrix = vec![vec![0.0; self.width]; self.height];
483
484        for y in 0..self.height {
485            for x in 0..self.width {
486                let mut distances = Vec::new();
487                let current = &grid[y][x];
488
489                // Calculate distances to neighbors
490                for dy in -1i32..=1 {
491                    for dx in -1i32..=1 {
492                        if dx == 0 && dy == 0 {
493                            continue;
494                        }
495
496                        let nx = (x as i32 + dx) as usize;
497                        let ny = (y as i32 + dy) as usize;
498
499                        if nx < self.width && ny < self.height {
500                            let neighbor = &grid[ny][nx];
501                            let distance = current
502                                .weights
503                                .iter()
504                                .zip(neighbor.weights.iter())
505                                .map(|(a, b)| (a - b).powi(2))
506                                .sum::<f64>()
507                                .sqrt();
508                            distances.push(distance);
509                        }
510                    }
511                }
512
513                // Average distance to neighbors
514                if !distances.is_empty() {
515                    u_matrix[y][x] = distances.iter().sum::<f64>() / distances.len() as f64;
516                }
517            }
518        }
519
520        u_matrix
521    }
522}
523
524/// Visualization data for a single neuron
525#[derive(Debug, Clone, Serialize, Deserialize)]
526pub struct NeuronVisualization {
527    pub x: usize,
528    pub y: usize,
529    pub weights: Vec<f64>,
530    pub assigned_nodes: Vec<NodeId>,
531}
532
533/// Complete visualization data for the SOM
534#[derive(Debug, Clone, Serialize, Deserialize)]
535pub struct VisualizationData {
536    pub grid_width: usize,
537    pub grid_height: usize,
538    pub neurons: Vec<NeuronVisualization>,
539}
540
541// Legacy support types
542use super::*;
543
544/// SOM-based routing strategy
545pub struct SOMRoutingStrategy;
546
547/// Feature extractor for SOM
548pub struct FeatureExtractor;