1use serde::{Deserialize, Serialize};
7
8use crate::graph::CodeGraph;
9use crate::temporal::history::ChangeHistory;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
15pub enum HistoricalChangeType {
16 Creation,
18 BugFix,
20 Feature,
22 Refactor,
24 Performance,
26 Unknown,
28}
29
30impl HistoricalChangeType {
31 pub fn classify(message: &str) -> Self {
33 let msg = message.to_lowercase();
34 if msg.contains("fix")
35 || msg.contains("bug")
36 || msg.contains("patch")
37 || msg.contains("hotfix")
38 {
39 return Self::BugFix;
40 }
41 if msg.contains("refactor")
42 || msg.contains("cleanup")
43 || msg.contains("clean up")
44 || msg.contains("rename")
45 {
46 return Self::Refactor;
47 }
48 if msg.contains("perf")
49 || msg.contains("optim")
50 || msg.contains("speed")
51 || msg.contains("fast")
52 {
53 return Self::Performance;
54 }
55 if msg.contains("feat")
56 || msg.contains("add")
57 || msg.contains("implement")
58 || msg.contains("new")
59 {
60 return Self::Feature;
61 }
62 Self::Unknown
63 }
64
65 pub fn label(&self) -> &str {
66 match self {
67 Self::Creation => "creation",
68 Self::BugFix => "bugfix",
69 Self::Feature => "feature",
70 Self::Refactor => "refactor",
71 Self::Performance => "performance",
72 Self::Unknown => "unknown",
73 }
74 }
75}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
79pub struct HistoricalDecision {
80 pub description: String,
82 pub timestamp: u64,
84 pub author: String,
86 pub change_type: HistoricalChangeType,
88 pub reasoning: String,
90}
91
92#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct CodeEvolution {
95 pub node_id: u64,
97 pub name: String,
99 pub file_path: String,
101 pub total_changes: usize,
103 pub bugfix_count: usize,
105 pub author_count: usize,
107 pub authors: Vec<String>,
109 pub age_seconds: u64,
111 pub churn: u64,
113 pub stability_score: f32,
115 pub decisions: Vec<HistoricalDecision>,
117 pub phase: EvolutionPhase,
119}
120
121#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
123pub enum EvolutionPhase {
124 Active,
126 Maturing,
128 Stable,
130 Decaying,
132 Unknown,
134}
135
136impl EvolutionPhase {
137 pub fn label(&self) -> &str {
138 match self {
139 Self::Active => "active",
140 Self::Maturing => "maturing",
141 Self::Stable => "stable",
142 Self::Decaying => "decaying",
143 Self::Unknown => "unknown",
144 }
145 }
146}
147
148#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct ArchaeologyResult {
151 pub evolution: CodeEvolution,
153 pub why_explanation: String,
155 pub timeline: Vec<TimelineEvent>,
157}
158
159#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct TimelineEvent {
162 pub timestamp: u64,
164 pub description: String,
166 pub author: String,
168 pub change_type: HistoricalChangeType,
170}
171
172pub struct CodeArchaeologist<'g> {
176 graph: &'g CodeGraph,
177 history: ChangeHistory,
178}
179
180impl<'g> CodeArchaeologist<'g> {
181 pub fn new(graph: &'g CodeGraph, history: ChangeHistory) -> Self {
182 Self { graph, history }
183 }
184
185 pub fn investigate(&self, unit_id: u64) -> Option<ArchaeologyResult> {
187 let unit = self.graph.get_unit(unit_id)?;
188 let file_path = unit.file_path.display().to_string();
189
190 let changes = self.history.changes_for_path(&unit.file_path);
191 let total_changes = changes.len();
192 let bugfix_count = changes.iter().filter(|c| c.is_bugfix).count();
193 let authors = self.history.authors_for_path(&unit.file_path);
194 let churn = self.history.total_churn(&unit.file_path);
195
196 let oldest = self.history.oldest_timestamp(&unit.file_path);
197 let latest = self.history.latest_timestamp(&unit.file_path);
198 let age_seconds = latest.saturating_sub(oldest);
199
200 let phase = self.infer_phase(
202 unit.stability_score,
203 total_changes,
204 bugfix_count,
205 age_seconds,
206 );
207
208 let decisions: Vec<HistoricalDecision> = changes
210 .iter()
211 .map(|c| {
212 let change_type = if c.is_bugfix {
213 HistoricalChangeType::BugFix
214 } else {
215 HistoricalChangeType::Unknown
216 };
217 HistoricalDecision {
218 description: format!(
219 "{} {} (+{} -{})",
220 c.change_type, file_path, c.lines_added, c.lines_deleted
221 ),
222 timestamp: c.timestamp,
223 author: c.author.clone(),
224 change_type,
225 reasoning: format!("Change to {} via commit {}", file_path, c.commit_id),
226 }
227 })
228 .collect();
229
230 let timeline: Vec<TimelineEvent> = changes
232 .iter()
233 .map(|c| TimelineEvent {
234 timestamp: c.timestamp,
235 description: format!(
236 "{} (+{} -{})",
237 c.change_type, c.lines_added, c.lines_deleted
238 ),
239 author: c.author.clone(),
240 change_type: if c.is_bugfix {
241 HistoricalChangeType::BugFix
242 } else {
243 HistoricalChangeType::Unknown
244 },
245 })
246 .collect();
247
248 let evolution = CodeEvolution {
249 node_id: unit_id,
250 name: unit.name.clone(),
251 file_path: file_path.clone(),
252 total_changes,
253 bugfix_count,
254 author_count: authors.len(),
255 authors: authors.clone(),
256 age_seconds,
257 churn,
258 stability_score: unit.stability_score,
259 decisions,
260 phase,
261 };
262
263 let why_explanation = self.explain_why(&evolution);
264
265 Some(ArchaeologyResult {
266 evolution,
267 why_explanation,
268 timeline,
269 })
270 }
271
272 pub fn explain_why(&self, evolution: &CodeEvolution) -> String {
274 let mut explanations = Vec::new();
275
276 if evolution.total_changes == 0 {
277 return format!(
278 "'{}' has no recorded change history. It may be new or history is unavailable.",
279 evolution.name
280 );
281 }
282
283 let age_days = evolution.age_seconds / 86400;
285 if age_days > 365 {
286 explanations.push(format!(
287 "This code is {} days old, suggesting it's a mature part of the codebase.",
288 age_days
289 ));
290 } else if age_days < 30 {
291 explanations.push("This code is relatively new (< 30 days old).".to_string());
292 }
293
294 if evolution.total_changes > 0 {
296 let bugfix_ratio = evolution.bugfix_count as f64 / evolution.total_changes as f64;
297 if bugfix_ratio > 0.5 {
298 explanations.push(format!(
299 "High bugfix ratio ({:.0}%) suggests this code has been problematic.",
300 bugfix_ratio * 100.0
301 ));
302 }
303 }
304
305 if evolution.author_count > 3 {
307 explanations.push(format!(
308 "Modified by {} different authors, indicating shared ownership.",
309 evolution.author_count
310 ));
311 } else if evolution.author_count == 1 {
312 explanations.push("Single author — likely has clear ownership.".to_string());
313 }
314
315 if evolution.churn > 500 {
317 explanations.push(format!(
318 "High churn ({} lines changed) suggests significant rework.",
319 evolution.churn
320 ));
321 }
322
323 if evolution.stability_score < 0.3 {
325 explanations.push("Low stability score suggests ongoing volatility.".to_string());
326 } else if evolution.stability_score > 0.8 {
327 explanations.push("High stability score indicates the code has settled.".to_string());
328 }
329
330 if explanations.is_empty() {
331 format!(
332 "'{}' has a typical change history with {} changes.",
333 evolution.name, evolution.total_changes
334 )
335 } else {
336 explanations.join(" ")
337 }
338 }
339
340 pub fn when_changed(&self, unit_id: u64) -> Vec<TimelineEvent> {
342 let unit = match self.graph.get_unit(unit_id) {
343 Some(u) => u,
344 None => return Vec::new(),
345 };
346
347 self.history
348 .changes_for_path(&unit.file_path)
349 .iter()
350 .map(|c| TimelineEvent {
351 timestamp: c.timestamp,
352 description: format!(
353 "{} by {} (+{} -{})",
354 c.change_type, c.author, c.lines_added, c.lines_deleted
355 ),
356 author: c.author.clone(),
357 change_type: if c.is_bugfix {
358 HistoricalChangeType::BugFix
359 } else {
360 HistoricalChangeType::Unknown
361 },
362 })
363 .collect()
364 }
365
366 fn infer_phase(
369 &self,
370 stability_score: f32,
371 total_changes: usize,
372 bugfix_count: usize,
373 age_seconds: u64,
374 ) -> EvolutionPhase {
375 if total_changes == 0 {
376 return EvolutionPhase::Unknown;
377 }
378
379 let age_days = age_seconds / 86400;
380 let bugfix_ratio = bugfix_count as f64 / total_changes as f64;
381
382 if stability_score > 0.8 && age_days > 180 {
383 EvolutionPhase::Stable
384 } else if bugfix_ratio > 0.6 && age_days > 90 {
385 EvolutionPhase::Decaying
386 } else if age_days < 30 || total_changes > 10 {
387 EvolutionPhase::Active
388 } else {
389 EvolutionPhase::Maturing
390 }
391 }
392}
393
394#[cfg(test)]
397mod tests {
398 use super::*;
399 use crate::temporal::history::FileChange;
400 use crate::types::{CodeUnit, CodeUnitType, Language, Span};
401 use std::path::PathBuf;
402
403 fn test_graph_and_history() -> (CodeGraph, ChangeHistory) {
404 let mut graph = CodeGraph::with_default_dimension();
405 graph.add_unit(CodeUnit::new(
406 CodeUnitType::Function,
407 Language::Rust,
408 "process_payment".to_string(),
409 "billing::process_payment".to_string(),
410 PathBuf::from("src/billing.rs"),
411 Span::new(1, 0, 20, 0),
412 ));
413
414 let mut history = ChangeHistory::new();
415 history.add_change(FileChange {
416 path: PathBuf::from("src/billing.rs"),
417 change_type: crate::temporal::history::ChangeType::Add,
418 commit_id: "abc123".to_string(),
419 timestamp: 1000000,
420 author: "alice".to_string(),
421 is_bugfix: false,
422 lines_added: 50,
423 lines_deleted: 0,
424 old_path: None,
425 });
426 history.add_change(FileChange {
427 path: PathBuf::from("src/billing.rs"),
428 change_type: crate::temporal::history::ChangeType::Modify,
429 commit_id: "def456".to_string(),
430 timestamp: 2000000,
431 author: "bob".to_string(),
432 is_bugfix: true,
433 lines_added: 5,
434 lines_deleted: 3,
435 old_path: None,
436 });
437
438 (graph, history)
439 }
440
441 #[test]
442 fn investigate_returns_evolution() {
443 let (graph, history) = test_graph_and_history();
444 let archaeologist = CodeArchaeologist::new(&graph, history);
445 let result = archaeologist.investigate(0).unwrap();
446
447 assert_eq!(result.evolution.name, "process_payment");
448 assert_eq!(result.evolution.total_changes, 2);
449 assert_eq!(result.evolution.bugfix_count, 1);
450 assert_eq!(result.evolution.author_count, 2);
451 }
452
453 #[test]
454 fn when_changed_returns_timeline() {
455 let (graph, history) = test_graph_and_history();
456 let archaeologist = CodeArchaeologist::new(&graph, history);
457 let timeline = archaeologist.when_changed(0);
458
459 assert_eq!(timeline.len(), 2);
460 assert_eq!(timeline[0].timestamp, 1000000);
461 }
462
463 #[test]
464 fn classify_change_type() {
465 assert_eq!(
466 HistoricalChangeType::classify("fix: null pointer bug"),
467 HistoricalChangeType::BugFix
468 );
469 assert_eq!(
470 HistoricalChangeType::classify("refactor: extract method"),
471 HistoricalChangeType::Refactor
472 );
473 assert_eq!(
474 HistoricalChangeType::classify("feat: add payment"),
475 HistoricalChangeType::Feature
476 );
477 assert_eq!(
478 HistoricalChangeType::classify("optimize query performance"),
479 HistoricalChangeType::Performance
480 );
481 }
482}