Skip to main content

agentic_codebase/temporal/
archaeology.rs

1//! Version Archaeology — Invention 11.
2//!
3//! Understand the history and evolution of code units. Why does this code
4//! look the way it does? When did it change? What decisions led here?
5
6use serde::{Deserialize, Serialize};
7
8use crate::graph::CodeGraph;
9use crate::temporal::history::ChangeHistory;
10
11// ── Types ────────────────────────────────────────────────────────────────────
12
13/// Historical change type categories.
14#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15pub enum HistoricalChangeType {
16    /// Initial creation of the code.
17    Creation,
18    /// Bug fix.
19    BugFix,
20    /// Feature addition.
21    Feature,
22    /// Refactoring / cleanup.
23    Refactor,
24    /// Performance optimization.
25    Performance,
26    /// Unknown / general modification.
27    Unknown,
28}
29
30impl HistoricalChangeType {
31    /// Classify a commit message.
32    pub fn classify(message: &str) -> Self {
33        let msg = message.to_lowercase();
34        if msg.contains("fix")
35            || msg.contains("bug")
36            || msg.contains("patch")
37            || msg.contains("hotfix")
38        {
39            return Self::BugFix;
40        }
41        if msg.contains("refactor")
42            || msg.contains("cleanup")
43            || msg.contains("clean up")
44            || msg.contains("rename")
45        {
46            return Self::Refactor;
47        }
48        if msg.contains("perf")
49            || msg.contains("optim")
50            || msg.contains("speed")
51            || msg.contains("fast")
52        {
53            return Self::Performance;
54        }
55        if msg.contains("feat")
56            || msg.contains("add")
57            || msg.contains("implement")
58            || msg.contains("new")
59        {
60            return Self::Feature;
61        }
62        Self::Unknown
63    }
64
65    pub fn label(&self) -> &str {
66        match self {
67            Self::Creation => "creation",
68            Self::BugFix => "bugfix",
69            Self::Feature => "feature",
70            Self::Refactor => "refactor",
71            Self::Performance => "performance",
72            Self::Unknown => "unknown",
73        }
74    }
75}
76
77/// A historical decision inferred from code changes.
78#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct HistoricalDecision {
80    /// Description of the decision.
81    pub description: String,
82    /// When the decision was made (approximate commit timestamp).
83    pub timestamp: u64,
84    /// Who made the decision.
85    pub author: String,
86    /// Change type associated with this decision.
87    pub change_type: HistoricalChangeType,
88    /// Inferred reasoning.
89    pub reasoning: String,
90}
91
92/// Evolution summary of a code unit.
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct CodeEvolution {
95    /// Node ID.
96    pub node_id: u64,
97    /// Node name.
98    pub name: String,
99    /// File path.
100    pub file_path: String,
101    /// Total number of changes.
102    pub total_changes: usize,
103    /// Number of bug fixes.
104    pub bugfix_count: usize,
105    /// Number of authors who touched this code.
106    pub author_count: usize,
107    /// Authors.
108    pub authors: Vec<String>,
109    /// Age in seconds (from first to latest change).
110    pub age_seconds: u64,
111    /// Churn (total lines added + deleted).
112    pub churn: u64,
113    /// Stability score (from CodeUnit).
114    pub stability_score: f32,
115    /// Key decisions.
116    pub decisions: Vec<HistoricalDecision>,
117    /// Evolution phase.
118    pub phase: EvolutionPhase,
119}
120
121/// The current phase in a code unit's lifecycle.
122#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
123pub enum EvolutionPhase {
124    /// Newly created, still evolving rapidly.
125    Active,
126    /// Maturing, changes slowing down.
127    Maturing,
128    /// Stable, rarely changes.
129    Stable,
130    /// Decaying, mostly bugfixes and patches.
131    Decaying,
132    /// No history available.
133    Unknown,
134}
135
136impl EvolutionPhase {
137    pub fn label(&self) -> &str {
138        match self {
139            Self::Active => "active",
140            Self::Maturing => "maturing",
141            Self::Stable => "stable",
142            Self::Decaying => "decaying",
143            Self::Unknown => "unknown",
144        }
145    }
146}
147
148/// Archaeological investigation result.
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct ArchaeologyResult {
151    /// The code unit investigated.
152    pub evolution: CodeEvolution,
153    /// "Why" explanation.
154    pub why_explanation: String,
155    /// "When" timeline.
156    pub timeline: Vec<TimelineEvent>,
157}
158
159/// A single event in a code unit's timeline.
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct TimelineEvent {
162    /// Timestamp.
163    pub timestamp: u64,
164    /// Description of what happened.
165    pub description: String,
166    /// Who did it.
167    pub author: String,
168    /// Change category.
169    pub change_type: HistoricalChangeType,
170}
171
172// ── CodeArchaeologist ────────────────────────────────────────────────────────
173
174/// Investigates the history and evolution of code.
175pub struct CodeArchaeologist<'g> {
176    graph: &'g CodeGraph,
177    history: ChangeHistory,
178}
179
180impl<'g> CodeArchaeologist<'g> {
181    pub fn new(graph: &'g CodeGraph, history: ChangeHistory) -> Self {
182        Self { graph, history }
183    }
184
185    /// Investigate the full history of a code unit.
186    pub fn investigate(&self, unit_id: u64) -> Option<ArchaeologyResult> {
187        let unit = self.graph.get_unit(unit_id)?;
188        let file_path = unit.file_path.display().to_string();
189
190        let changes = self.history.changes_for_path(&unit.file_path);
191        let total_changes = changes.len();
192        let bugfix_count = changes.iter().filter(|c| c.is_bugfix).count();
193        let authors = self.history.authors_for_path(&unit.file_path);
194        let churn = self.history.total_churn(&unit.file_path);
195
196        let oldest = self.history.oldest_timestamp(&unit.file_path);
197        let latest = self.history.latest_timestamp(&unit.file_path);
198        let age_seconds = latest.saturating_sub(oldest);
199
200        // Infer evolution phase
201        let phase = self.infer_phase(
202            unit.stability_score,
203            total_changes,
204            bugfix_count,
205            age_seconds,
206        );
207
208        // Build decisions from changes
209        let decisions: Vec<HistoricalDecision> = changes
210            .iter()
211            .map(|c| {
212                let change_type = if c.is_bugfix {
213                    HistoricalChangeType::BugFix
214                } else {
215                    HistoricalChangeType::Unknown
216                };
217                HistoricalDecision {
218                    description: format!(
219                        "{} {} (+{} -{})",
220                        c.change_type, file_path, c.lines_added, c.lines_deleted
221                    ),
222                    timestamp: c.timestamp,
223                    author: c.author.clone(),
224                    change_type,
225                    reasoning: format!("Change to {} via commit {}", file_path, c.commit_id),
226                }
227            })
228            .collect();
229
230        // Build timeline
231        let timeline: Vec<TimelineEvent> = changes
232            .iter()
233            .map(|c| TimelineEvent {
234                timestamp: c.timestamp,
235                description: format!(
236                    "{} (+{} -{})",
237                    c.change_type, c.lines_added, c.lines_deleted
238                ),
239                author: c.author.clone(),
240                change_type: if c.is_bugfix {
241                    HistoricalChangeType::BugFix
242                } else {
243                    HistoricalChangeType::Unknown
244                },
245            })
246            .collect();
247
248        let evolution = CodeEvolution {
249            node_id: unit_id,
250            name: unit.name.clone(),
251            file_path: file_path.clone(),
252            total_changes,
253            bugfix_count,
254            author_count: authors.len(),
255            authors: authors.clone(),
256            age_seconds,
257            churn,
258            stability_score: unit.stability_score,
259            decisions,
260            phase,
261        };
262
263        let why_explanation = self.explain_why(&evolution);
264
265        Some(ArchaeologyResult {
266            evolution,
267            why_explanation,
268            timeline,
269        })
270    }
271
272    /// Answer "why does this code look the way it does?"
273    pub fn explain_why(&self, evolution: &CodeEvolution) -> String {
274        let mut explanations = Vec::new();
275
276        if evolution.total_changes == 0 {
277            return format!(
278                "'{}' has no recorded change history. It may be new or history is unavailable.",
279                evolution.name
280            );
281        }
282
283        // Age analysis
284        let age_days = evolution.age_seconds / 86400;
285        if age_days > 365 {
286            explanations.push(format!(
287                "This code is {} days old, suggesting it's a mature part of the codebase.",
288                age_days
289            ));
290        } else if age_days < 30 {
291            explanations.push("This code is relatively new (< 30 days old).".to_string());
292        }
293
294        // Bugfix ratio
295        if evolution.total_changes > 0 {
296            let bugfix_ratio = evolution.bugfix_count as f64 / evolution.total_changes as f64;
297            if bugfix_ratio > 0.5 {
298                explanations.push(format!(
299                    "High bugfix ratio ({:.0}%) suggests this code has been problematic.",
300                    bugfix_ratio * 100.0
301                ));
302            }
303        }
304
305        // Author count
306        if evolution.author_count > 3 {
307            explanations.push(format!(
308                "Modified by {} different authors, indicating shared ownership.",
309                evolution.author_count
310            ));
311        } else if evolution.author_count == 1 {
312            explanations.push("Single author — likely has clear ownership.".to_string());
313        }
314
315        // Churn
316        if evolution.churn > 500 {
317            explanations.push(format!(
318                "High churn ({} lines changed) suggests significant rework.",
319                evolution.churn
320            ));
321        }
322
323        // Stability
324        if evolution.stability_score < 0.3 {
325            explanations.push("Low stability score suggests ongoing volatility.".to_string());
326        } else if evolution.stability_score > 0.8 {
327            explanations.push("High stability score indicates the code has settled.".to_string());
328        }
329
330        if explanations.is_empty() {
331            format!(
332                "'{}' has a typical change history with {} changes.",
333                evolution.name, evolution.total_changes
334            )
335        } else {
336            explanations.join(" ")
337        }
338    }
339
340    /// Answer "when did important changes happen?"
341    pub fn when_changed(&self, unit_id: u64) -> Vec<TimelineEvent> {
342        let unit = match self.graph.get_unit(unit_id) {
343            Some(u) => u,
344            None => return Vec::new(),
345        };
346
347        self.history
348            .changes_for_path(&unit.file_path)
349            .iter()
350            .map(|c| TimelineEvent {
351                timestamp: c.timestamp,
352                description: format!(
353                    "{} by {} (+{} -{})",
354                    c.change_type, c.author, c.lines_added, c.lines_deleted
355                ),
356                author: c.author.clone(),
357                change_type: if c.is_bugfix {
358                    HistoricalChangeType::BugFix
359                } else {
360                    HistoricalChangeType::Unknown
361                },
362            })
363            .collect()
364    }
365
366    // ── Internal ─────────────────────────────────────────────────────────
367
368    fn infer_phase(
369        &self,
370        stability_score: f32,
371        total_changes: usize,
372        bugfix_count: usize,
373        age_seconds: u64,
374    ) -> EvolutionPhase {
375        if total_changes == 0 {
376            return EvolutionPhase::Unknown;
377        }
378
379        let age_days = age_seconds / 86400;
380        let bugfix_ratio = bugfix_count as f64 / total_changes as f64;
381
382        if stability_score > 0.8 && age_days > 180 {
383            EvolutionPhase::Stable
384        } else if bugfix_ratio > 0.6 && age_days > 90 {
385            EvolutionPhase::Decaying
386        } else if age_days < 30 || total_changes > 10 {
387            EvolutionPhase::Active
388        } else {
389            EvolutionPhase::Maturing
390        }
391    }
392}
393
394// ── Tests ────────────────────────────────────────────────────────────────────
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399    use crate::temporal::history::FileChange;
400    use crate::types::{CodeUnit, CodeUnitType, Language, Span};
401    use std::path::PathBuf;
402
403    fn test_graph_and_history() -> (CodeGraph, ChangeHistory) {
404        let mut graph = CodeGraph::with_default_dimension();
405        graph.add_unit(CodeUnit::new(
406            CodeUnitType::Function,
407            Language::Rust,
408            "process_payment".to_string(),
409            "billing::process_payment".to_string(),
410            PathBuf::from("src/billing.rs"),
411            Span::new(1, 0, 20, 0),
412        ));
413
414        let mut history = ChangeHistory::new();
415        history.add_change(FileChange {
416            path: PathBuf::from("src/billing.rs"),
417            change_type: crate::temporal::history::ChangeType::Add,
418            commit_id: "abc123".to_string(),
419            timestamp: 1000000,
420            author: "alice".to_string(),
421            is_bugfix: false,
422            lines_added: 50,
423            lines_deleted: 0,
424            old_path: None,
425        });
426        history.add_change(FileChange {
427            path: PathBuf::from("src/billing.rs"),
428            change_type: crate::temporal::history::ChangeType::Modify,
429            commit_id: "def456".to_string(),
430            timestamp: 2000000,
431            author: "bob".to_string(),
432            is_bugfix: true,
433            lines_added: 5,
434            lines_deleted: 3,
435            old_path: None,
436        });
437
438        (graph, history)
439    }
440
441    #[test]
442    fn investigate_returns_evolution() {
443        let (graph, history) = test_graph_and_history();
444        let archaeologist = CodeArchaeologist::new(&graph, history);
445        let result = archaeologist.investigate(0).unwrap();
446
447        assert_eq!(result.evolution.name, "process_payment");
448        assert_eq!(result.evolution.total_changes, 2);
449        assert_eq!(result.evolution.bugfix_count, 1);
450        assert_eq!(result.evolution.author_count, 2);
451    }
452
453    #[test]
454    fn when_changed_returns_timeline() {
455        let (graph, history) = test_graph_and_history();
456        let archaeologist = CodeArchaeologist::new(&graph, history);
457        let timeline = archaeologist.when_changed(0);
458
459        assert_eq!(timeline.len(), 2);
460        assert_eq!(timeline[0].timestamp, 1000000);
461    }
462
463    #[test]
464    fn classify_change_type() {
465        assert_eq!(
466            HistoricalChangeType::classify("fix: null pointer bug"),
467            HistoricalChangeType::BugFix
468        );
469        assert_eq!(
470            HistoricalChangeType::classify("refactor: extract method"),
471            HistoricalChangeType::Refactor
472        );
473        assert_eq!(
474            HistoricalChangeType::classify("feat: add payment"),
475            HistoricalChangeType::Feature
476        );
477        assert_eq!(
478            HistoricalChangeType::classify("optimize query performance"),
479            HistoricalChangeType::Performance
480        );
481    }
482}