Skip to main content

graphify_analyze/
temporal.rs

1//! Temporal graph analysis via git history integration.
2//!
3//! Correlates graph nodes with git commit history to identify high-risk
4//! nodes: frequently modified code with high connectivity.
5
6use std::collections::HashMap;
7use std::path::Path;
8use std::process::Command;
9
10use graphify_core::graph::KnowledgeGraph;
11use graphify_core::model::TemporalNode;
12
13/// Analyze temporal risk by correlating graph nodes with git history.
14///
15/// For each node's source file, queries `git log` to determine change frequency
16/// and recency. Risk score = churn_rate × normalized_degree.
17///
18/// Returns up to `top_n` nodes sorted by risk score descending.
19pub fn temporal_analysis(
20    graph: &KnowledgeGraph,
21    repo_root: &Path,
22    top_n: usize,
23) -> Vec<TemporalNode> {
24    // Collect unique source files from nodes
25    let mut file_stats: HashMap<String, (usize, String)> = HashMap::new(); // file → (commit_count, last_date)
26
27    let source_files: Vec<String> = graph
28        .nodes()
29        .iter()
30        .map(|n| n.source_file.clone())
31        .collect::<std::collections::HashSet<_>>()
32        .into_iter()
33        .collect();
34
35    for file in &source_files {
36        if let Some((count, date)) = git_file_stats(repo_root, file) {
37            file_stats.insert(file.clone(), (count, date));
38        }
39    }
40
41    if file_stats.is_empty() {
42        return Vec::new();
43    }
44
45    // Get current date for age calculation
46    let now = chrono_days_since_epoch();
47
48    // Max degree for normalization
49    let max_degree = graph
50        .node_ids()
51        .iter()
52        .map(|id| graph.degree(id))
53        .max()
54        .unwrap_or(1)
55        .max(1) as f64;
56
57    let mut results: Vec<TemporalNode> = graph
58        .nodes()
59        .iter()
60        .filter_map(|node| {
61            let (change_count, last_modified) = file_stats.get(&node.source_file)?;
62            let age_days = date_to_age(last_modified, now).max(1);
63            let churn_rate = *change_count as f64 / age_days as f64;
64            let normalized_degree = graph.degree(&node.id) as f64 / max_degree;
65            let risk_score = churn_rate * normalized_degree;
66
67            Some(TemporalNode {
68                id: node.id.clone(),
69                label: node.label.clone(),
70                last_modified: last_modified.clone(),
71                change_count: *change_count,
72                age_days,
73                churn_rate,
74                risk_score,
75            })
76        })
77        .filter(|t| t.risk_score > 0.0)
78        .collect();
79
80    results.sort_by(|a, b| {
81        b.risk_score
82            .partial_cmp(&a.risk_score)
83            .unwrap_or(std::cmp::Ordering::Equal)
84    });
85    results.truncate(top_n);
86    results
87}
88
89/// Query git for a file's commit count and last modified date.
90fn git_file_stats(repo_root: &Path, file: &str) -> Option<(usize, String)> {
91    let output = Command::new("git")
92        .args(["log", "--format=%aI", "--follow", "--", file])
93        .current_dir(repo_root)
94        .output()
95        .ok()?;
96
97    if !output.status.success() {
98        return None;
99    }
100
101    let stdout = String::from_utf8_lossy(&output.stdout);
102    let lines: Vec<&str> = stdout.lines().filter(|l| !l.is_empty()).collect();
103    if lines.is_empty() {
104        return None;
105    }
106
107    let count = lines.len();
108    let last_date = lines[0].split('T').next().unwrap_or("").to_string();
109    Some((count, last_date))
110}
111
112/// Simple day counter: days since 2020-01-01 from an ISO date string.
113fn date_to_age(date_str: &str, now_days: u64) -> u64 {
114    let parts: Vec<u64> = date_str.split('-').filter_map(|p| p.parse().ok()).collect();
115    if parts.len() < 3 {
116        return 1;
117    }
118    let file_days = (parts[0] - 2020) * 365 + parts[1] * 30 + parts[2];
119    now_days.saturating_sub(file_days).max(1)
120}
121
122/// Approximate days since 2020-01-01 for "now".
123fn chrono_days_since_epoch() -> u64 {
124    // Use git to get current date for consistency
125    let output = Command::new("date")
126        .args(["+%Y-%m-%d"])
127        .output()
128        .ok()
129        .and_then(|o| String::from_utf8(o.stdout).ok())
130        .unwrap_or_else(|| "2026-04-13".to_string());
131    let trimmed = output.trim();
132    let parts: Vec<u64> = trimmed.split('-').filter_map(|p| p.parse().ok()).collect();
133    if parts.len() < 3 {
134        return 2300; // ~2026
135    }
136    (parts[0] - 2020) * 365 + parts[1] * 30 + parts[2]
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn date_to_age_computes_correctly() {
145        let now = (2026 - 2020) * 365 + 4 * 30 + 13; // ~2026-04-13
146        let age = date_to_age("2026-01-01", now);
147        assert!(age > 0 && age < 200);
148    }
149
150    #[test]
151    fn date_to_age_invalid_returns_1() {
152        assert_eq!(date_to_age("invalid", 2300), 1);
153    }
154}