Skip to main content

graphify_analyze/
temporal.rs

1//! Temporal graph analysis via git history integration.
2//!
3//! Correlates graph nodes with git commit history to identify high-risk
4//! nodes: frequently modified code with high connectivity.
5
6use std::collections::HashMap;
7use std::path::Path;
8use std::process::Command;
9
10use graphify_core::graph::KnowledgeGraph;
11use graphify_core::model::TemporalNode;
12
13/// Analyze temporal risk by correlating graph nodes with git history.
14///
15/// For each node's source file, queries `git log` to determine change frequency
16/// and recency. Risk score = churn_rate × normalized_degree.
17///
18/// Returns up to `top_n` nodes sorted by risk score descending.
19pub fn temporal_analysis(
20    graph: &KnowledgeGraph,
21    repo_root: &Path,
22    top_n: usize,
23) -> Vec<TemporalNode> {
24    let mut file_stats: HashMap<String, (usize, String)> = HashMap::new(); // file → (commit_count, last_date)
25
26    let source_files: Vec<String> = graph
27        .nodes()
28        .iter()
29        .map(|n| n.source_file.clone())
30        .collect::<std::collections::HashSet<_>>()
31        .into_iter()
32        .collect();
33
34    for file in &source_files {
35        if let Some((count, date)) = git_file_stats(repo_root, file) {
36            file_stats.insert(file.clone(), (count, date));
37        }
38    }
39
40    if file_stats.is_empty() {
41        return Vec::new();
42    }
43
44    let now = chrono_days_since_epoch();
45
46    let max_degree = graph
47        .node_ids()
48        .iter()
49        .map(|id| graph.degree(id))
50        .max()
51        .unwrap_or(1)
52        .max(1) as f64;
53
54    let mut results: Vec<TemporalNode> = graph
55        .nodes()
56        .iter()
57        .filter_map(|node| {
58            let (change_count, last_modified) = file_stats.get(&node.source_file)?;
59            let age_days = date_to_age(last_modified, now).max(1);
60            let churn_rate = *change_count as f64 / age_days as f64;
61            let normalized_degree = graph.degree(&node.id) as f64 / max_degree;
62            let risk_score = churn_rate * normalized_degree;
63
64            Some(TemporalNode {
65                id: node.id.clone(),
66                label: node.label.clone(),
67                last_modified: last_modified.clone(),
68                change_count: *change_count,
69                age_days,
70                churn_rate,
71                risk_score,
72            })
73        })
74        .filter(|t| t.risk_score > 0.0)
75        .collect();
76
77    results.sort_by(|a, b| {
78        b.risk_score
79            .partial_cmp(&a.risk_score)
80            .unwrap_or(std::cmp::Ordering::Equal)
81    });
82    results.truncate(top_n);
83    results
84}
85
86/// Query git for a file's commit count and last modified date.
87fn git_file_stats(repo_root: &Path, file: &str) -> Option<(usize, String)> {
88    let output = Command::new("git")
89        .args(["log", "--format=%aI", "--follow", "--", file])
90        .current_dir(repo_root)
91        .output()
92        .ok()?;
93
94    if !output.status.success() {
95        return None;
96    }
97
98    let stdout = String::from_utf8_lossy(&output.stdout);
99    let lines: Vec<&str> = stdout.lines().filter(|l| !l.is_empty()).collect();
100    if lines.is_empty() {
101        return None;
102    }
103
104    let count = lines.len();
105    let last_date = lines[0].split('T').next().unwrap_or("").to_string();
106    Some((count, last_date))
107}
108
109/// Simple day counter: days since 2020-01-01 from an ISO date string.
110/// Uses the same calculation as [`chrono_days_since_epoch`] to avoid
111/// offset from using different approximations.
112fn date_to_age(date_str: &str, now_days: u64) -> u64 {
113    match days_since_epoch_2020(date_str) {
114        Some(file_days) => now_days.saturating_sub(file_days).max(1),
115        None => 1,
116    }
117}
118
119/// Compute approximate days since 2020-01-01 from an ISO date string.
120/// Uses cumulative days per month to avoid the 30-day/month approximation
121/// which caused up to ~30 day offset vs the precise epoch calculation.
122/// Returns `None` for invalid date strings.
123fn days_since_epoch_2020(date_str: &str) -> Option<u64> {
124    let parts: Vec<u64> = date_str.split('-').filter_map(|p| p.parse().ok()).collect();
125    if parts.len() < 3 {
126        return None;
127    }
128    let (y, m, d) = (parts[0], parts[1], parts[2]);
129    if m == 0 || m > 12 || d == 0 {
130        return None;
131    }
132    const CUM_DAYS: [u64; 12] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334];
133    let leap_extra = if m > 2 && (y % 4 == 0 && (y % 100 != 0 || y % 400 == 0)) {
134        1
135    } else {
136        0
137    };
138    Some(
139        (y.saturating_sub(2020)) * 365
140            + CUM_DAYS.get(m as usize - 1).copied().unwrap_or(0)
141            + leap_extra
142            + d
143            - 1,
144    )
145}
146
147/// Approximate days since 2020-01-01 for "now".
148fn chrono_days_since_epoch() -> u64 {
149    use std::time::{SystemTime, UNIX_EPOCH};
150    let secs = SystemTime::now()
151        .duration_since(UNIX_EPOCH)
152        .unwrap_or_default()
153        .as_secs();
154    secs.saturating_sub(1577836800) / 86400
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160
161    #[test]
162    fn date_to_age_computes_correctly() {
163        // 2026-04-13 in epoch-2020 days:
164        // 6 * 365 + CUM_DAYS[3] (90) + 13 = 2283
165        let now = 6 * 365 + 90 + 13;
166        let age = date_to_age("2026-01-01", now);
167        // 2026-01-01 = 6*365 + 0 + 1 = 2191; 2283 - 2191 = 92
168        assert!(age > 0 && age < 200, "age = {age}");
169    }
170
171    #[test]
172    fn date_to_age_invalid_returns_1() {
173        assert_eq!(date_to_age("invalid", 2300), 1);
174    }
175
176    #[test]
177    fn days_since_epoch_consistent() {
178        // 2020-01-01 is the epoch, so day 0
179        assert_eq!(days_since_epoch_2020("2020-01-01"), Some(0));
180        // 2020-02-01 = 31 days after epoch
181        assert_eq!(days_since_epoch_2020("2020-02-01"), Some(31));
182        // 2021-01-01 = 365 days after epoch
183        assert_eq!(days_since_epoch_2020("2021-01-01"), Some(365));
184        // 2020-03-01 = 31 + 29 (leap) + 0 = 60
185        assert_eq!(days_since_epoch_2020("2020-03-01"), Some(60));
186    }
187}