ruvector_memopt/algorithms/
mincut.rs

1//! MinCut algorithm for process clustering
2//!
3//! Groups processes that share memory pages or have parent-child relationships.
4//! Optimizing related processes together improves cache coherence.
5
6use std::collections::{HashMap, HashSet, VecDeque};
7use sysinfo::System;
8
9/// Edge in the process graph
10#[derive(Debug, Clone)]
11struct Edge {
12    from: u32,
13    to: u32,
14    weight: f64,
15}
16
17/// Process cluster identified by MinCut
18#[derive(Debug, Clone)]
19pub struct ProcessCluster {
20    pub id: usize,
21    pub processes: Vec<u32>,
22    pub total_memory_mb: f64,
23    pub connectivity: f64,
24}
25
26/// MinCut-based process clustering
27pub struct MinCutClusterer {
28    adjacency: HashMap<u32, Vec<(u32, f64)>>,
29    process_memory: HashMap<u32, u64>,
30    min_cluster_size: usize,
31}
32
33impl MinCutClusterer {
34    pub fn new() -> Self {
35        Self {
36            adjacency: HashMap::new(),
37            process_memory: HashMap::new(),
38            min_cluster_size: 2,
39        }
40    }
41
42    /// Build process graph from system state
43    pub fn build_graph(&mut self, system: &System) {
44        self.adjacency.clear();
45        self.process_memory.clear();
46
47        // Build edges based on parent-child relationships and shared resources
48        for (pid, process) in system.processes() {
49            let pid_u32 = pid.as_u32();
50            self.process_memory.insert(pid_u32, process.memory());
51
52            // Parent-child edge (strong connection)
53            if let Some(parent_pid) = process.parent() {
54                self.add_edge(pid_u32, parent_pid.as_u32(), 1.0);
55            }
56
57            // Same-name processes (likely related instances)
58            let name = process.name().to_lowercase();
59            for (other_pid, other_proc) in system.processes() {
60                if other_pid != pid && other_proc.name().to_lowercase() == name {
61                    self.add_edge(pid_u32, other_pid.as_u32(), 0.5);
62                }
63            }
64        }
65    }
66
67    fn add_edge(&mut self, from: u32, to: u32, weight: f64) {
68        self.adjacency
69            .entry(from)
70            .or_insert_with(Vec::new)
71            .push((to, weight));
72        self.adjacency
73            .entry(to)
74            .or_insert_with(Vec::new)
75            .push((from, weight));
76    }
77
78    /// Find clusters using Karger's MinCut approximation
79    /// Returns groups of related processes
80    pub fn find_clusters(&self, target_clusters: usize) -> Vec<ProcessCluster> {
81        if self.adjacency.is_empty() {
82            return vec![];
83        }
84
85        // Use BFS-based clustering (faster than Karger's for our use case)
86        let mut visited: HashSet<u32> = HashSet::new();
87        let mut clusters = Vec::new();
88        let mut cluster_id = 0;
89
90        for &start_pid in self.adjacency.keys() {
91            if visited.contains(&start_pid) {
92                continue;
93            }
94
95            let cluster = self.bfs_cluster(start_pid, &mut visited);
96            if cluster.len() >= self.min_cluster_size {
97                let total_memory: u64 = cluster
98                    .iter()
99                    .filter_map(|pid| self.process_memory.get(pid))
100                    .sum();
101
102                let connectivity = self.calculate_connectivity(&cluster);
103
104                clusters.push(ProcessCluster {
105                    id: cluster_id,
106                    processes: cluster,
107                    total_memory_mb: total_memory as f64 / (1024.0 * 1024.0),
108                    connectivity,
109                });
110                cluster_id += 1;
111            }
112
113            if clusters.len() >= target_clusters {
114                break;
115            }
116        }
117
118        // Sort by total memory (optimize largest clusters first)
119        clusters.sort_by(|a, b| {
120            b.total_memory_mb
121                .partial_cmp(&a.total_memory_mb)
122                .unwrap_or(std::cmp::Ordering::Equal)
123        });
124
125        clusters
126    }
127
128    /// BFS to find connected component
129    fn bfs_cluster(&self, start: u32, visited: &mut HashSet<u32>) -> Vec<u32> {
130        let mut cluster = Vec::new();
131        let mut queue = VecDeque::new();
132        queue.push_back(start);
133
134        while let Some(pid) = queue.pop_front() {
135            if visited.contains(&pid) {
136                continue;
137            }
138            visited.insert(pid);
139            cluster.push(pid);
140
141            if let Some(neighbors) = self.adjacency.get(&pid) {
142                for &(neighbor, weight) in neighbors {
143                    if !visited.contains(&neighbor) && weight > 0.3 {
144                        queue.push_back(neighbor);
145                    }
146                }
147            }
148        }
149
150        cluster
151    }
152
153    /// Calculate internal connectivity of a cluster
154    fn calculate_connectivity(&self, cluster: &[u32]) -> f64 {
155        if cluster.len() < 2 {
156            return 0.0;
157        }
158
159        let cluster_set: HashSet<u32> = cluster.iter().copied().collect();
160        let mut internal_edges = 0.0;
161        let mut total_edges = 0.0;
162
163        for &pid in cluster {
164            if let Some(neighbors) = self.adjacency.get(&pid) {
165                for &(neighbor, weight) in neighbors {
166                    total_edges += weight;
167                    if cluster_set.contains(&neighbor) {
168                        internal_edges += weight;
169                    }
170                }
171            }
172        }
173
174        if total_edges > 0.0 {
175            internal_edges / total_edges
176        } else {
177            0.0
178        }
179    }
180
181    /// Get optimal trim order within a cluster
182    pub fn get_trim_order(&self, cluster: &ProcessCluster) -> Vec<u32> {
183        // Sort by memory usage (trim largest first within cluster)
184        let mut ordered = cluster.processes.clone();
185        ordered.sort_by(|a, b| {
186            let mem_a = self.process_memory.get(a).unwrap_or(&0);
187            let mem_b = self.process_memory.get(b).unwrap_or(&0);
188            mem_b.cmp(mem_a)
189        });
190        ordered
191    }
192
193    /// Statistics for benchmarking
194    pub fn stats(&self) -> MinCutStats {
195        MinCutStats {
196            total_processes: self.process_memory.len(),
197            total_edges: self.adjacency.values().map(|v| v.len()).sum::<usize>() / 2,
198            total_memory_mb: self.process_memory.values().sum::<u64>() as f64 / (1024.0 * 1024.0),
199        }
200    }
201}
202
203impl Default for MinCutClusterer {
204    fn default() -> Self {
205        Self::new()
206    }
207}
208
209#[derive(Debug, Clone)]
210pub struct MinCutStats {
211    pub total_processes: usize,
212    pub total_edges: usize,
213    pub total_memory_mb: f64,
214}
215
216#[cfg(test)]
217mod tests {
218    use super::*;
219
220    #[test]
221    fn test_mincut_clusterer() {
222        let mut clusterer = MinCutClusterer::new();
223
224        // Add some test edges manually
225        clusterer.add_edge(1, 2, 1.0);
226        clusterer.add_edge(2, 3, 1.0);
227        clusterer.add_edge(4, 5, 1.0);
228
229        clusterer.process_memory.insert(1, 100 * 1024 * 1024);
230        clusterer.process_memory.insert(2, 200 * 1024 * 1024);
231        clusterer.process_memory.insert(3, 150 * 1024 * 1024);
232        clusterer.process_memory.insert(4, 50 * 1024 * 1024);
233        clusterer.process_memory.insert(5, 75 * 1024 * 1024);
234
235        let clusters = clusterer.find_clusters(10);
236        assert!(!clusters.is_empty());
237
238        let stats = clusterer.stats();
239        assert_eq!(stats.total_processes, 5);
240    }
241}