Skip to main content

ruvector_memopt/algorithms/
pagerank.rs

1//! PageRank algorithm for process importance scoring
2//!
3//! Ranks processes by their importance in the process dependency graph.
4//! High-rank processes are preserved; low-rank processes are trimmed first.
5
6use std::collections::HashMap;
7use sysinfo::System;
8
9/// PageRank-based process prioritization
10pub struct ProcessPageRank {
11    /// PageRank scores for each process
12    scores: HashMap<u32, f64>,
13    /// Adjacency list (outgoing edges)
14    outlinks: HashMap<u32, Vec<u32>>,
15    /// Reverse adjacency list (incoming edges) - for O(1) inlink lookup
16    inlinks: HashMap<u32, Vec<u32>>,
17    /// Damping factor (typically 0.85)
18    damping: f64,
19    /// Convergence threshold
20    epsilon: f64,
21    /// Maximum iterations
22    max_iterations: usize,
23}
24
25impl ProcessPageRank {
26    pub fn new() -> Self {
27        Self {
28            scores: HashMap::new(),
29            outlinks: HashMap::new(),
30            inlinks: HashMap::new(),
31            damping: 0.85,
32            epsilon: 1e-6,
33            max_iterations: 100,
34        }
35    }
36
37    /// Build the process graph and compute PageRank scores
38    pub fn compute(&mut self, system: &System) {
39        self.outlinks.clear();
40        self.inlinks.clear();
41        self.scores.clear();
42
43        let processes: Vec<u32> = system.processes().keys().map(|p| p.as_u32()).collect();
44        let n = processes.len();
45
46        if n == 0 {
47            return;
48        }
49
50        // Build outlink + inlink graphs (parent -> children)
51        for (pid, process) in system.processes() {
52            let pid_u32 = pid.as_u32();
53
54            // Parent points to child (influence flows from parent)
55            if let Some(parent_pid) = process.parent() {
56                let parent_u32 = parent_pid.as_u32();
57                self.outlinks
58                    .entry(parent_u32)
59                    .or_default()
60                    .push(pid_u32);
61                // Build reverse index: child -> list of parents pointing to it
62                self.inlinks
63                    .entry(pid_u32)
64                    .or_default()
65                    .push(parent_u32);
66            }
67        }
68
69        // Pre-compute out-degrees for O(1) lookup
70        let out_degrees: HashMap<u32, f64> = self.outlinks
71            .iter()
72            .map(|(&k, v)| (k, v.len() as f64))
73            .collect();
74
75        // Collect dangling nodes (no outlinks)
76        let dangling_nodes: Vec<u32> = processes.iter()
77            .filter(|p| !self.outlinks.contains_key(p))
78            .copied()
79            .collect();
80
81        // Initialize scores uniformly
82        let initial_score = 1.0 / n as f64;
83        for &pid in &processes {
84            self.scores.insert(pid, initial_score);
85        }
86
87        let n_f64 = n as f64;
88
89        // Power iteration
90        for iteration in 0..self.max_iterations {
91            let mut new_scores: HashMap<u32, f64> = HashMap::with_capacity(n);
92            let mut max_delta = 0.0f64;
93
94            // Pre-compute dangling node contribution (sum of their scores / n)
95            let dangling_sum: f64 = dangling_nodes.iter()
96                .map(|p| self.scores.get(p).unwrap_or(&0.0))
97                .sum();
98            let dangling_contrib = self.damping * dangling_sum / n_f64;
99
100            // Teleport component (random jump)
101            let teleport = (1.0 - self.damping) / n_f64;
102            let base_score = teleport + dangling_contrib;
103
104            for &pid in &processes {
105                let mut score = base_score;
106
107                // O(in-degree) instead of O(m): only iterate inlinks to this node
108                if let Some(sources) = self.inlinks.get(&pid) {
109                    for &source in sources {
110                        let source_score = self.scores.get(&source).unwrap_or(&0.0);
111                        let out_deg = out_degrees.get(&source).unwrap_or(&1.0);
112                        score += self.damping * source_score / out_deg;
113                    }
114                }
115
116                let old_score = self.scores.get(&pid).unwrap_or(&0.0);
117                max_delta = max_delta.max((score - old_score).abs());
118                new_scores.insert(pid, score);
119            }
120
121            self.scores = new_scores;
122
123            // Check convergence
124            if max_delta < self.epsilon {
125                tracing::debug!("PageRank converged after {} iterations", iteration + 1);
126                break;
127            }
128        }
129
130        // Normalize scores
131        let total: f64 = self.scores.values().sum();
132        if total > 0.0 {
133            for score in self.scores.values_mut() {
134                *score /= total;
135            }
136        }
137    }
138
139    /// Get PageRank score for a process (higher = more important)
140    pub fn get_score(&self, pid: u32) -> f64 {
141        *self.scores.get(&pid).unwrap_or(&0.0)
142    }
143
144    /// Get processes ranked by importance (least important first - trim these)
145    pub fn get_trim_candidates(&self, limit: usize) -> Vec<(u32, f64)> {
146        let mut ranked: Vec<(u32, f64)> = self.scores.iter().map(|(&k, &v)| (k, v)).collect();
147        // Sort ascending (lowest rank first)
148        ranked.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
149        ranked.into_iter().take(limit).collect()
150    }
151
152    /// Get most important processes (preserve these)
153    pub fn get_critical_processes(&self, limit: usize) -> Vec<(u32, f64)> {
154        let mut ranked: Vec<(u32, f64)> = self.scores.iter().map(|(&k, &v)| (k, v)).collect();
155        // Sort descending (highest rank first)
156        ranked.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
157        ranked.into_iter().take(limit).collect()
158    }
159
160    /// Statistics for benchmarking
161    pub fn stats(&self) -> PageRankStats {
162        let scores: Vec<f64> = self.scores.values().copied().collect();
163        let n = scores.len();
164
165        if n == 0 {
166            return PageRankStats::default();
167        }
168
169        let sum: f64 = scores.iter().sum();
170        let mean = sum / n as f64;
171        let variance: f64 = scores.iter().map(|s| (s - mean).powi(2)).sum::<f64>() / n as f64;
172        let max = scores.iter().cloned().fold(0.0f64, f64::max);
173        let min = scores.iter().cloned().fold(1.0f64, f64::min);
174
175        PageRankStats {
176            process_count: n,
177            mean_score: mean,
178            std_dev: variance.sqrt(),
179            max_score: max,
180            min_score: min,
181            iterations_to_converge: self.max_iterations, // Would need to track this
182        }
183    }
184
185    /// Combine PageRank with memory usage for final priority
186    pub fn get_weighted_candidates(
187        &self,
188        system: &System,
189        memory_weight: f64,
190        rank_weight: f64,
191        limit: usize,
192    ) -> Vec<(u32, f64)> {
193        let mut candidates: Vec<(u32, f64)> = Vec::new();
194
195        // Find max memory for normalization
196        let max_memory = system
197            .processes()
198            .values()
199            .map(|p| p.memory())
200            .max()
201            .unwrap_or(1) as f64;
202
203        for (pid, process) in system.processes() {
204            let pid_u32 = pid.as_u32();
205            let rank_score = self.get_score(pid_u32);
206            let memory_score = process.memory() as f64 / max_memory;
207
208            // Combined score: high memory + low rank = good trim candidate
209            let combined = memory_weight * memory_score + rank_weight * (1.0 - rank_score);
210            candidates.push((pid_u32, combined));
211        }
212
213        candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
214        candidates.into_iter().take(limit).collect()
215    }
216}
217
218impl Default for ProcessPageRank {
219    fn default() -> Self {
220        Self::new()
221    }
222}
223
224#[derive(Debug, Clone, Default)]
225pub struct PageRankStats {
226    pub process_count: usize,
227    pub mean_score: f64,
228    pub std_dev: f64,
229    pub max_score: f64,
230    pub min_score: f64,
231    pub iterations_to_converge: usize,
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn test_pagerank_basic() {
240        let pagerank = ProcessPageRank::new();
241        let stats = pagerank.stats();
242        assert_eq!(stats.process_count, 0);
243    }
244}