gitstack/stats/
health.rs

1//! Bus factor, technical debt, and project health analysis
2
3use chrono::DateTime;
4use std::collections::HashMap;
5
6use crate::event::GitEvent;
7
8use super::{
9    BusFactorAnalysis, BusFactorEntry, BusFactorRisk, CommitQualityAnalysis, ContributorInfo,
10    FileHeatmap, TechDebtAnalysis, TechDebtEntry, TechDebtLevel,
11};
12
13const DAYS_PER_YEAR: f64 = 365.0;
14
15// Top contributors display limit
16const TOP_CONTRIBUTORS: usize = 5;
17
18// Bus factor threshold (cumulative contribution %)
19const BUS_FACTOR_CUMULATIVE_THRESHOLD: f64 = 50.0;
20
21// Tech debt scoring weights
22const TECH_DEBT_CHURN_WEIGHT: f64 = 0.5;
23const TECH_DEBT_COMPLEXITY_WEIGHT: f64 = 0.4;
24const TECH_DEBT_AGE_WEIGHT: f64 = 0.1;
25const TECH_DEBT_HIGH: f64 = 0.6;
26const TECH_DEBT_MEDIUM: f64 = 0.3;
27
28// Large commit threshold
29const LARGE_COMMIT_CHANGES: usize = 50;
30
31// Health component weights
32const WEIGHT_QUALITY: f64 = 0.20;
33const WEIGHT_TEST: f64 = 0.15;
34const WEIGHT_BUS_FACTOR: f64 = 0.20;
35const WEIGHT_TECH_DEBT: f64 = 0.20;
36const WEIGHT_CHURN: f64 = 0.15;
37const WEIGHT_CADENCE: f64 = 0.10;
38
39// Test score weights
40const TEST_MSG_WEIGHT: f64 = 0.3;
41const TEST_FILE_WEIGHT: f64 = 0.7;
42const LOW_TEST_THRESHOLD: f64 = 0.1;
43const RECENT_COMMITS_WINDOW: usize = 30;
44
45// Bus factor alert thresholds
46const BUS_FACTOR_CRITICAL: f64 = 0.3;
47const BUS_FACTOR_WARNING: f64 = 0.5;
48const SINGLE_AUTHOR_CONCENTRATION: u32 = 70;
49
50// Churn thresholds
51const HIGH_CHURN_MULTIPLIER: f64 = 2.0;
52const CHURN_WARNING_THRESHOLD: f64 = 0.5;
53
54// Cadence analysis
55const MIN_PERIOD_FOR_CADENCE: u64 = 14;
56const CV_STABLE: f64 = 0.5;
57const CV_UNSTABLE: f64 = 2.0;
58const CV_RANGE: f64 = 1.5; // 2.0 - 0.5
59const CV_WEIGHT: f64 = 0.8;
60const CADENCE_SCORE_UNSTABLE: f64 = 0.2;
61
62// Confidence thresholds
63const HIGH_CONF_COMMITS: usize = 100;
64const HIGH_CONF_AUTHORS: usize = 3;
65const HIGH_CONF_DAYS: u64 = 30;
66const MEDIUM_CONF_COMMITS: usize = 30;
67const MEDIUM_CONF_DAYS: u64 = 7;
68
69/// Alert severity
70#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
71pub enum AlertSeverity {
72    /// Information (blue)
73    Info,
74    /// Warning (yellow)
75    Warning,
76    /// Critical (red)
77    Critical,
78}
79
80impl AlertSeverity {
81    /// Get color name
82    pub fn color(&self) -> &'static str {
83        match self {
84            AlertSeverity::Info => "blue",
85            AlertSeverity::Warning => "yellow",
86            AlertSeverity::Critical => "red",
87        }
88    }
89
90    /// Get icon
91    pub fn icon(&self) -> &'static str {
92        match self {
93            AlertSeverity::Info => "ℹ",
94            AlertSeverity::Warning => "⚠",
95            AlertSeverity::Critical => "🔴",
96        }
97    }
98}
99
100/// Health alert kind
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102pub enum HealthAlertKind {
103    /// Low commit quality
104    LowCommitQuality,
105    /// Low test coverage
106    LowTestCoverage,
107    /// High bus factor risk
108    HighBusFactorRisk,
109    /// Moderate bus factor risk
110    ModerateBusFactorRisk,
111    /// High technical debt
112    HighTechDebt,
113    /// High code churn
114    HighCodeChurn,
115    /// Other
116    Other,
117}
118
119/// Health alert
120#[derive(Debug, Clone)]
121pub struct HealthAlert {
122    /// Kind
123    pub kind: HealthAlertKind,
124    /// Severity
125    pub severity: AlertSeverity,
126    /// Alert message (for English fallback)
127    pub message: String,
128    /// Detailed description
129    pub details: Option<String>,
130}
131
132impl HealthAlert {
133    /// Create a new alert
134    pub fn new(kind: HealthAlertKind, severity: AlertSeverity, message: impl Into<String>) -> Self {
135        Self {
136            kind,
137            severity,
138            message: message.into(),
139            details: None,
140        }
141    }
142
143    /// Create an alert with details
144    pub fn with_details(
145        kind: HealthAlertKind,
146        severity: AlertSeverity,
147        message: impl Into<String>,
148        details: impl Into<String>,
149    ) -> Self {
150        Self {
151            kind,
152            severity,
153            message: message.into(),
154            details: Some(details.into()),
155        }
156    }
157}
158
159/// Determine whether a file is a test file
160///
161/// Based on language-specific test file patterns:
162/// - `_test.rs`, `_test.go` (Rust, Go)
163/// - `.test.ts`, `.test.js`, `.spec.ts`, `.spec.js` (JavaScript/TypeScript)
164/// - `test_*.py` (Python)
165/// - Files within `tests/`, `__tests__/`, `spec/` directories
166pub fn is_test_file(path: &str) -> bool {
167    let lower = path.to_lowercase();
168
169    // File name patterns
170    lower.ends_with("_test.rs")
171        || lower.ends_with("_test.go")
172        || lower.ends_with(".test.ts")
173        || lower.ends_with(".test.tsx")
174        || lower.ends_with(".test.js")
175        || lower.ends_with(".test.jsx")
176        || lower.ends_with(".spec.ts")
177        || lower.ends_with(".spec.tsx")
178        || lower.ends_with(".spec.js")
179        || lower.ends_with(".spec.jsx")
180        // Python: test_*.py
181        || lower
182            .rsplit('/')
183            .next()
184            .map(|f| f.starts_with("test_") && f.ends_with(".py"))
185            .unwrap_or(false)
186        // Inside test directories
187        || lower.contains("/tests/")
188        || lower.contains("/__tests__/")
189        || lower.contains("/spec/")
190        || lower.starts_with("tests/")
191        || lower.starts_with("__tests__/")
192        || lower.starts_with("spec/")
193}
194
195/// Confidence level
196#[derive(Debug, Clone, Copy, PartialEq, Eq)]
197pub enum ConfidenceLevel {
198    /// High confidence: 100+ commits, 3+ authors, and 30+ days
199    High,
200    /// Medium confidence: 30+ commits and 7+ days
201    Medium,
202    /// Low confidence: everything else
203    Low,
204}
205
206impl ConfidenceLevel {
207    /// Display string
208    pub fn as_str(&self) -> &'static str {
209        match self {
210            ConfidenceLevel::High => "High",
211            ConfidenceLevel::Medium => "Medium",
212            ConfidenceLevel::Low => "Low",
213        }
214    }
215}
216
217/// Health analysis confidence
218#[derive(Debug, Clone)]
219pub struct HealthConfidence {
220    /// Confidence level
221    pub level: ConfidenceLevel,
222    /// Reason
223    pub reason: String,
224}
225
226/// Individual score component
227#[derive(Debug, Clone, Default)]
228pub struct HealthScoreComponent {
229    /// Score (0.0-1.0)
230    pub score: f64,
231    /// Weight (proportion in score calculation)
232    pub weight: f64,
233    /// Description
234    pub description: String,
235}
236
237/// Project health analysis result
238#[derive(Debug, Clone)]
239pub struct ProjectHealth {
240    /// Overall score (0-100)
241    pub overall_score: u8,
242    /// Quality score component (20%)
243    pub quality: HealthScoreComponent,
244    /// Test health component (15%)
245    pub test_health: HealthScoreComponent,
246    /// Bus factor risk component (20%)
247    pub bus_factor_risk: HealthScoreComponent,
248    /// Technical debt component (20%)
249    pub tech_debt: HealthScoreComponent,
250    /// Code churn component (15%)
251    pub code_churn: HealthScoreComponent,
252    /// Commit cadence component (10%)
253    pub commit_cadence: HealthScoreComponent,
254    /// Alert list (sorted by severity descending)
255    pub alerts: Vec<HealthAlert>,
256    /// Number of commits analyzed
257    pub total_commits: usize,
258    /// Number of authors analyzed
259    pub total_authors: usize,
260    /// Analysis period (days)
261    pub analysis_period_days: u64,
262    /// Confidence
263    pub confidence: HealthConfidence,
264}
265
266impl Default for ProjectHealth {
267    fn default() -> Self {
268        Self {
269            overall_score: 50,
270            quality: HealthScoreComponent {
271                score: 0.5,
272                weight: WEIGHT_QUALITY,
273                description: "Commit quality average".to_string(),
274            },
275            test_health: HealthScoreComponent {
276                score: 0.5,
277                weight: WEIGHT_TEST,
278                description: "Test commit ratio".to_string(),
279            },
280            bus_factor_risk: HealthScoreComponent {
281                score: 0.5,
282                weight: WEIGHT_BUS_FACTOR,
283                description: "Knowledge concentration (lower is better)".to_string(),
284            },
285            tech_debt: HealthScoreComponent {
286                score: 0.5,
287                weight: WEIGHT_TECH_DEBT,
288                description: "Technical debt (lower is better)".to_string(),
289            },
290            code_churn: HealthScoreComponent {
291                score: 0.5,
292                weight: WEIGHT_CHURN,
293                description: "Code churn rate".to_string(),
294            },
295            commit_cadence: HealthScoreComponent {
296                score: 0.5,
297                weight: WEIGHT_CADENCE,
298                description: "Commit cadence stability".to_string(),
299            },
300            alerts: Vec::new(),
301            total_commits: 0,
302            total_authors: 0,
303            analysis_period_days: 0,
304            confidence: HealthConfidence {
305                level: ConfidenceLevel::Low,
306                reason: "No data".to_string(),
307            },
308        }
309    }
310}
311
312impl ProjectHealth {
313    /// Get overall score level string (6 levels)
314    pub fn level(&self) -> &'static str {
315        match self.overall_score {
316            90..=100 => "Excellent",
317            75..=89 => "Good",
318            60..=74 => "Fair",
319            45..=59 => "Needs Work",
320            30..=44 => "Poor",
321            _ => "Critical",
322        }
323    }
324
325    /// Get score color (6 levels)
326    pub fn score_color(&self) -> &'static str {
327        match self.overall_score {
328            90..=100 => "green",
329            75..=89 => "teal",
330            60..=74 => "sapphire",
331            45..=59 => "yellow",
332            30..=44 => "peach",
333            _ => "red",
334        }
335    }
336
337    /// Generate score bar (10 levels)
338    pub fn score_bar(&self) -> String {
339        let filled = (self.overall_score.min(100) / 10) as usize;
340        let empty = 10usize.saturating_sub(filled);
341        format!("{}{}", "█".repeat(filled), "░".repeat(empty))
342    }
343}
344
345/// Calculate bus factor
346///
347/// Bus factor = number of people with knowledge of a code area
348/// Measures the risk of "if this person leaves, the project stalls"
349///
350/// Calculation method:
351/// - For each path, count the number of people contributing 50% or more
352/// - If only 1 person has 50%+, bus_factor = 1 (high risk)
353pub fn calculate_bus_factor(
354    events: &[&GitEvent],
355    get_files: impl Fn(&str) -> Option<Vec<String>>,
356    min_commits: usize,
357) -> BusFactorAnalysis {
358    // Aggregate author commit counts per directory
359    let mut dir_author_counts: HashMap<String, HashMap<String, usize>> = HashMap::new();
360
361    for event in events {
362        if let Some(files) = get_files(&event.short_hash) {
363            for file in &files {
364                // Extract top-level directory
365                let parts: Vec<&str> = file.split('/').collect();
366                if parts.len() > 1 {
367                    let top_dir = parts[0].to_string();
368                    let counts = dir_author_counts.entry(top_dir).or_default();
369                    *counts.entry(event.author.clone()).or_insert(0) += 1;
370                }
371                // Also aggregate second-level directories
372                if parts.len() > 2 {
373                    let two_level_dir = format!("{}/{}", parts[0], parts[1]);
374                    let counts = dir_author_counts.entry(two_level_dir).or_default();
375                    *counts.entry(event.author.clone()).or_insert(0) += 1;
376                }
377            }
378        }
379    }
380
381    let mut entries = Vec::new();
382    let mut high_risk_count = 0;
383    let mut medium_risk_count = 0;
384
385    for (path, author_counts) in &dir_author_counts {
386        let total_commits: usize = author_counts.values().sum();
387
388        // Skip if below minimum commit count
389        if total_commits < min_commits {
390            continue;
391        }
392
393        // Create contributor info (sorted by commit count)
394        let mut contributors: Vec<ContributorInfo> = author_counts
395            .iter()
396            .map(|(name, &count)| ContributorInfo {
397                name: name.clone(),
398                commit_count: count,
399                contribution_percent: (count as f64 / total_commits as f64) * 100.0,
400            })
401            .collect();
402        contributors.sort_by(|a, b| b.commit_count.cmp(&a.commit_count));
403
404        // Bus factor calculation: number of people needed to exceed 50% cumulative contribution
405        let mut cumulative = 0.0;
406        let mut bus_factor = 0;
407        for contributor in &contributors {
408            cumulative += contributor.contribution_percent;
409            bus_factor += 1;
410            if cumulative >= BUS_FACTOR_CUMULATIVE_THRESHOLD {
411                break;
412            }
413        }
414
415        let risk_level = match bus_factor {
416            1 => {
417                high_risk_count += 1;
418                BusFactorRisk::High
419            }
420            2 => {
421                medium_risk_count += 1;
422                BusFactorRisk::Medium
423            }
424            _ => BusFactorRisk::Low,
425        };
426
427        entries.push(BusFactorEntry {
428            path: path.clone(),
429            bus_factor,
430            contributors: contributors.into_iter().take(TOP_CONTRIBUTORS).collect(),
431            total_commits,
432            risk_level,
433            is_directory: true,
434        });
435    }
436
437    // Sort by risk level (high->low), then by commit count within same risk level
438    entries.sort_by(|a, b| match (&a.risk_level, &b.risk_level) {
439        (BusFactorRisk::High, BusFactorRisk::High)
440        | (BusFactorRisk::Medium, BusFactorRisk::Medium)
441        | (BusFactorRisk::Low, BusFactorRisk::Low) => b.total_commits.cmp(&a.total_commits),
442        (BusFactorRisk::High, _) => std::cmp::Ordering::Less,
443        (_, BusFactorRisk::High) => std::cmp::Ordering::Greater,
444        (BusFactorRisk::Medium, BusFactorRisk::Low) => std::cmp::Ordering::Less,
445        (BusFactorRisk::Low, BusFactorRisk::Medium) => std::cmp::Ordering::Greater,
446    });
447
448    BusFactorAnalysis {
449        total_paths_analyzed: entries.len(),
450        entries,
451        high_risk_count,
452        medium_risk_count,
453    }
454}
455
456/// Calculate technical debt score
457///
458/// Technical debt = change frequency x complexity x age
459///
460/// Formula:
461/// - Churn score: (change_count / max_change_count)
462/// - Complexity score: (lines_changed / max_lines_changed)
463/// - Age score: (days_elapsed / 365) capped at 1.0
464pub fn calculate_tech_debt(
465    events: &[&GitEvent],
466    get_files: impl Fn(&str) -> Option<Vec<String>>,
467    min_commits: usize,
468) -> TechDebtAnalysis {
469    use chrono::Local;
470
471    // Aggregate statistics per file
472    let mut file_stats: HashMap<String, (usize, usize, DateTime<Local>)> = HashMap::new();
473
474    for event in events {
475        if let Some(files) = get_files(&event.short_hash) {
476            let changes_per_file = (event.files_added + event.files_deleted) / files.len().max(1);
477            for file in files {
478                let entry = file_stats.entry(file).or_insert((0, 0, event.timestamp));
479                entry.0 += 1; // change_count
480                entry.1 += changes_per_file; // total_changes
481                                             // Keep the most recent change timestamp
482                if event.timestamp > entry.2 {
483                    entry.2 = event.timestamp;
484                }
485            }
486        }
487    }
488
489    // Calculate maximum values (for normalization)
490    let max_changes = file_stats.values().map(|(c, _, _)| *c).max().unwrap_or(1);
491    let max_lines = file_stats.values().map(|(_, l, _)| *l).max().unwrap_or(1);
492    let now = Local::now();
493
494    let mut entries = Vec::new();
495    let mut total_score = 0.0;
496    let mut high_debt_count = 0;
497
498    for (path, (change_count, total_changes, last_modified)) in &file_stats {
499        if *change_count < min_commits {
500            continue;
501        }
502
503        // Calculate each score
504        let churn_score = *change_count as f64 / max_changes as f64;
505        let complexity_score = *total_changes as f64 / max_lines as f64;
506
507        // Age score: days since last change
508        let days_since_change = (now - *last_modified).num_days().max(0) as f64;
509        let age_score = (days_since_change / DAYS_PER_YEAR).min(1.0);
510
511        // Overall score = churn(50%) + complexity(40%) + age(10%)
512        // However, age becomes a risk when "old and frequently changed",
513        // so the age score is evaluated in combination with churn
514        let score = churn_score * TECH_DEBT_CHURN_WEIGHT
515            + complexity_score * TECH_DEBT_COMPLEXITY_WEIGHT
516            + (churn_score * age_score) * TECH_DEBT_AGE_WEIGHT;
517
518        let debt_level = if score >= TECH_DEBT_HIGH {
519            high_debt_count += 1;
520            TechDebtLevel::High
521        } else if score >= TECH_DEBT_MEDIUM {
522            TechDebtLevel::Medium
523        } else {
524            TechDebtLevel::Low
525        };
526
527        entries.push(TechDebtEntry {
528            path: path.clone(),
529            score,
530            churn_score,
531            complexity_score,
532            age_score,
533            change_count: *change_count,
534            total_changes: *total_changes,
535            debt_level,
536        });
537
538        total_score += score;
539    }
540
541    // Sort by score descending
542    entries.sort_by(|a, b| {
543        b.score
544            .partial_cmp(&a.score)
545            .unwrap_or(std::cmp::Ordering::Equal)
546    });
547
548    let total_files_analyzed = entries.len();
549    let avg_score = if total_files_analyzed > 0 {
550        total_score / total_files_analyzed as f64
551    } else {
552        0.0
553    };
554
555    TechDebtAnalysis {
556        entries,
557        avg_score,
558        high_debt_count,
559        total_files_analyzed,
560    }
561}
562
563/// Calculate project health
564///
565/// # Arguments
566/// * `events` - Git events to analyze
567/// * `files_fn` - Function to get file list from hash
568/// * `quality_analysis` - Pre-computed Quality analysis result (optional)
569/// * `bus_factor` - Pre-computed bus factor analysis result (optional)
570/// * `tech_debt` - Pre-computed technical debt analysis result (optional)
571/// * `heatmap` - File heatmap (for Code Churn calculation)
572pub fn calculate_project_health<F>(
573    events: &[&GitEvent],
574    files_fn: F,
575    quality_analysis: Option<&CommitQualityAnalysis>,
576    bus_factor: Option<&BusFactorAnalysis>,
577    tech_debt_analysis: Option<&TechDebtAnalysis>,
578    heatmap: &FileHeatmap,
579) -> ProjectHealth
580where
581    F: Fn(&str) -> Option<Vec<String>>,
582{
583    if events.is_empty() {
584        return ProjectHealth::default();
585    }
586
587    let total_commits = events.len();
588    let mut alerts = Vec::new();
589
590    // Collect authors
591    let authors: std::collections::HashSet<&str> =
592        events.iter().map(|e| e.author.as_str()).collect();
593    let total_authors = authors.len();
594
595    // Calculate analysis period
596    let analysis_period_days = if events.len() >= 2 {
597        let newest = events.first().map(|e| e.timestamp);
598        let oldest = events.last().map(|e| e.timestamp);
599        if let (Some(n), Some(o)) = (newest, oldest) {
600            let duration = n.signed_duration_since(o);
601            duration.num_days().unsigned_abs()
602        } else {
603            0
604        }
605    } else {
606        0
607    };
608
609    // 1. Quality score (20%)
610    let quality_score = if let Some(qa) = quality_analysis {
611        qa.avg_score
612    } else {
613        // Simple calculation: ratio of Conventional Commits prefixes
614        let conventional_count = events
615            .iter()
616            .filter(|e| {
617                let msg = e.message.to_lowercase();
618                msg.starts_with("feat:")
619                    || msg.starts_with("fix:")
620                    || msg.starts_with("docs:")
621                    || msg.starts_with("style:")
622                    || msg.starts_with("refactor:")
623                    || msg.starts_with("test:")
624                    || msg.starts_with("chore:")
625                    || msg.starts_with("perf:")
626            })
627            .count();
628        conventional_count as f64 / total_commits as f64
629    };
630
631    let conventional_pct = (quality_score * 100.0).round() as u32;
632
633    let quality = HealthScoreComponent {
634        score: quality_score,
635        weight: WEIGHT_QUALITY,
636        description: format!(
637            "Commit quality: {:.0}% conventional commits",
638            quality_score * 100.0
639        ),
640    };
641
642    if quality_score < BUS_FACTOR_CRITICAL {
643        alerts.push(HealthAlert::with_details(
644            HealthAlertKind::LowCommitQuality,
645            AlertSeverity::Warning,
646            "Low commit quality",
647            format!(
648                "Conventional Commit format is {}% of all commits",
649                conventional_pct
650            ),
651        ));
652    }
653
654    // 2. Test health (15%) -- file pattern based + test: prefix only
655    let test_msg_count = events
656        .iter()
657        .filter(|e| e.message.to_lowercase().starts_with("test:"))
658        .count();
659    let test_file_commit_count = events
660        .iter()
661        .filter(|e| {
662            if let Some(files) = files_fn(&e.short_hash) {
663                files.iter().any(|f| is_test_file(f))
664            } else {
665                false
666            }
667        })
668        .count();
669    // Combine (prioritize file-based detection)
670    let test_score = ((test_msg_count as f64 * TEST_MSG_WEIGHT
671        + test_file_commit_count as f64 * TEST_FILE_WEIGHT)
672        / total_commits as f64)
673        .min(1.0);
674
675    let test_health = HealthScoreComponent {
676        score: test_score,
677        weight: WEIGHT_TEST,
678        description: format!(
679            "Test coverage: {:.0}% test-related commits",
680            test_score * 100.0
681        ),
682    };
683
684    if test_score < LOW_TEST_THRESHOLD {
685        let recent_count = events.len().min(RECENT_COMMITS_WINDOW);
686        let recent_test_files = events
687            .iter()
688            .take(recent_count)
689            .filter(|e| {
690                if let Some(files) = files_fn(&e.short_hash) {
691                    files.iter().any(|f| is_test_file(f))
692                } else {
693                    false
694                }
695            })
696            .count();
697        alerts.push(HealthAlert::with_details(
698            HealthAlertKind::LowTestCoverage,
699            AlertSeverity::Info,
700            "Low test coverage in commits",
701            format!(
702                "In the last {} commits, only {} include test file changes",
703                recent_count, recent_test_files
704            ),
705        ));
706    }
707
708    // 3. Bus factor risk (20%) -- Shannon Entropy
709    let bus_factor_score;
710    let mut top_author_info = String::new();
711    if let Some(bf) = bus_factor {
712        // Lower the score the more paths have high bus factor risk
713        let low_risk_count = bf
714            .total_paths_analyzed
715            .saturating_sub(bf.high_risk_count + bf.medium_risk_count);
716        let risk_ratio = bf.high_risk_count as f64
717            / (bf.high_risk_count + bf.medium_risk_count + low_risk_count).max(1) as f64;
718        bus_factor_score = 1.0 - risk_ratio;
719    } else {
720        // Knowledge distribution via Shannon Entropy
721        let mut author_commits: HashMap<&str, usize> = HashMap::new();
722        for e in events.iter() {
723            *author_commits.entry(e.author.as_str()).or_insert(0) += 1;
724        }
725        let total = total_commits as f64;
726        let entropy: f64 = author_commits
727            .values()
728            .map(|&count| {
729                let p = count as f64 / total;
730                if p > 0.0 {
731                    -p * p.ln()
732                } else {
733                    0.0
734                }
735            })
736            .sum();
737        let max_entropy = (total_authors as f64).ln();
738        bus_factor_score = if max_entropy > 0.0 {
739            entropy / max_entropy
740        } else {
741            0.0
742        };
743
744        // Record top author info (for alerts)
745        if let Some((&top_author, &top_count)) = author_commits.iter().max_by_key(|(_, &c)| c) {
746            let pct = (top_count as f64 / total * 100.0).round() as u32;
747            if pct > SINGLE_AUTHOR_CONCENTRATION {
748                top_author_info = format!(
749                    "{}% of commits are by a single author ({})",
750                    pct, top_author
751                );
752            }
753        }
754    }
755
756    let bus_factor_risk = if total_authors <= 1 {
757        HealthScoreComponent {
758            score: 0.0,
759            weight: 0.0,
760            description: "Skipped: solo development".to_string(),
761        }
762    } else {
763        HealthScoreComponent {
764            score: bus_factor_score,
765            weight: WEIGHT_BUS_FACTOR,
766            description: format!(
767                "Knowledge distribution: {:.0}% (higher is better)",
768                bus_factor_score * 100.0
769            ),
770        }
771    };
772
773    if total_authors > 1 {
774        if bus_factor_score < BUS_FACTOR_CRITICAL {
775            let details = if top_author_info.is_empty() {
776                "Knowledge is concentrated in few contributors. Consider knowledge sharing."
777                    .to_string()
778            } else {
779                top_author_info.clone()
780            };
781            alerts.push(HealthAlert::with_details(
782                HealthAlertKind::HighBusFactorRisk,
783                AlertSeverity::Critical,
784                "High bus factor risk",
785                details,
786            ));
787        } else if bus_factor_score < BUS_FACTOR_WARNING {
788            let details = if top_author_info.is_empty() {
789                "Consider improving knowledge distribution across team members.".to_string()
790            } else {
791                top_author_info
792            };
793            alerts.push(HealthAlert::with_details(
794                HealthAlertKind::ModerateBusFactorRisk,
795                AlertSeverity::Warning,
796                "Moderate bus factor risk",
797                details,
798            ));
799        }
800    }
801
802    // 4. Technical debt (20%) - lower is better
803    let tech_debt_score = if let Some(td) = tech_debt_analysis {
804        1.0 - td.avg_score.min(1.0)
805    } else {
806        // Simple calculation: look at the ratio of large changes
807        let large_commits = events
808            .iter()
809            .filter(|e| e.files_added + e.files_deleted > LARGE_COMMIT_CHANGES)
810            .count();
811        let large_ratio = large_commits as f64 / total_commits as f64;
812        1.0 - large_ratio.min(1.0)
813    };
814
815    let tech_debt = HealthScoreComponent {
816        score: tech_debt_score,
817        weight: WEIGHT_TECH_DEBT,
818        description: format!(
819            "Technical debt: {:.0}% clean (higher is better)",
820            tech_debt_score * 100.0
821        ),
822    };
823
824    if tech_debt_score < TECH_DEBT_MEDIUM {
825        alerts.push(HealthAlert::with_details(
826            HealthAlertKind::HighTechDebt,
827            AlertSeverity::Warning,
828            "High technical debt indicated",
829            "Many large commits suggest accumulated technical debt.",
830        ));
831    }
832
833    // 5. Code Churn (15%) -- calculate churn rate from FileHeatmap
834    let churn_score = if heatmap.total_files > 0 {
835        // High frequency change files: 2x or more the average change count
836        let avg_changes = heatmap.files.iter().map(|f| f.change_count).sum::<usize>() as f64
837            / heatmap.total_files as f64;
838        let high_churn_count = heatmap
839            .files
840            .iter()
841            .filter(|f| f.change_count as f64 > avg_changes * HIGH_CHURN_MULTIPLIER)
842            .count();
843        let churn_ratio = high_churn_count as f64 / heatmap.total_files as f64;
844        (1.0 - churn_ratio).max(0.0)
845    } else {
846        0.5 // Default when no data available
847    };
848
849    let code_churn = HealthScoreComponent {
850        score: churn_score,
851        weight: WEIGHT_CHURN,
852        description: format!(
853            "Code churn: {:.0}% stable (higher is better)",
854            churn_score * 100.0
855        ),
856    };
857
858    if churn_score < CHURN_WARNING_THRESHOLD {
859        // Include the file with the most churn in the alert
860        if let Some(top_file) = heatmap.files.first() {
861            alerts.push(HealthAlert::with_details(
862                HealthAlertKind::HighCodeChurn,
863                AlertSeverity::Warning,
864                "High code churn detected",
865                format!(
866                    "{} has been changed {} times in the analysis period",
867                    top_file.path, top_file.change_count
868                ),
869            ));
870        }
871    }
872
873    // 6. Commit Cadence (10%) -- stability of weekly commit frequency
874    let cadence_score = if analysis_period_days >= MIN_PERIOD_FOR_CADENCE {
875        // Aggregate commit counts per week
876        let mut weekly_counts: HashMap<i64, usize> = HashMap::new();
877        if let Some(oldest_ts) = events.last().map(|e| e.timestamp) {
878            for e in events.iter() {
879                let days_since = e
880                    .timestamp
881                    .signed_duration_since(oldest_ts)
882                    .num_days()
883                    .unsigned_abs();
884                let week = (days_since / 7) as i64; // days per week
885                *weekly_counts.entry(week).or_insert(0) += 1;
886            }
887        }
888
889        if weekly_counts.len() >= 2 {
890            let values: Vec<f64> = weekly_counts.values().map(|&v| v as f64).collect();
891            let mean = values.iter().sum::<f64>() / values.len() as f64;
892            if mean > 0.0 {
893                let variance =
894                    values.iter().map(|v| (v - mean).powi(2)).sum::<f64>() / values.len() as f64;
895                let std_dev = variance.sqrt();
896                let cv = std_dev / mean; // Coefficient of variation
897
898                // CV < 0.5: stable (high score), CV > 2.0: unstable (low score)
899                if cv < CV_STABLE {
900                    1.0
901                } else if cv > CV_UNSTABLE {
902                    CADENCE_SCORE_UNSTABLE
903                } else {
904                    // Linear interpolation: CV_STABLE->1.0, CV_UNSTABLE->0.2
905                    1.0 - (cv - CV_STABLE) / CV_RANGE * CV_WEIGHT
906                }
907            } else {
908                0.5
909            }
910        } else {
911            0.5
912        }
913    } else {
914        0.5 // Analysis period less than 2 weeks
915    };
916
917    let commit_cadence = HealthScoreComponent {
918        score: cadence_score,
919        weight: WEIGHT_CADENCE,
920        description: format!(
921            "Commit cadence: {:.0}% stable (higher is better)",
922            cadence_score * 100.0
923        ),
924    };
925
926    // Calculate overall score (normalized by total weight; bus_factor weight=0 for solo development)
927    let total_weight = quality.weight
928        + test_health.weight
929        + bus_factor_risk.weight
930        + tech_debt.weight
931        + code_churn.weight
932        + commit_cadence.weight;
933    let raw = quality.score * quality.weight
934        + test_health.score * test_health.weight
935        + bus_factor_risk.score * bus_factor_risk.weight
936        + tech_debt.score * tech_debt.weight
937        + code_churn.score * code_churn.weight
938        + commit_cadence.score * commit_cadence.weight;
939    let overall = if total_weight > 0.0 {
940        raw / total_weight
941    } else {
942        0.0
943    };
944    let overall_score = (overall * 100.0).round().min(100.0) as u8;
945
946    // Calculate confidence
947    let confidence = if total_commits >= HIGH_CONF_COMMITS
948        && total_authors >= HIGH_CONF_AUTHORS
949        && analysis_period_days >= HIGH_CONF_DAYS
950    {
951        HealthConfidence {
952            level: ConfidenceLevel::High,
953            reason: format!(
954                "{} commits, {} authors, {} days",
955                total_commits, total_authors, analysis_period_days
956            ),
957        }
958    } else if total_commits >= MEDIUM_CONF_COMMITS && analysis_period_days >= MEDIUM_CONF_DAYS {
959        HealthConfidence {
960            level: ConfidenceLevel::Medium,
961            reason: format!(
962                "{} commits over {} days",
963                total_commits, analysis_period_days
964            ),
965        }
966    } else {
967        HealthConfidence {
968            level: ConfidenceLevel::Low,
969            reason: format!(
970                "Only {} commits, {} authors, {} days — results may not be representative",
971                total_commits, total_authors, analysis_period_days
972            ),
973        }
974    };
975
976    // Sort alerts by severity
977    alerts.sort_by(|a, b| b.severity.cmp(&a.severity));
978
979    ProjectHealth {
980        overall_score,
981        quality,
982        test_health,
983        bus_factor_risk,
984        tech_debt,
985        code_churn,
986        commit_cadence,
987        alerts,
988        total_commits,
989        total_authors,
990        analysis_period_days,
991        confidence,
992    }
993}
994
995#[cfg(test)]
996#[allow(clippy::useless_vec)]
997mod tests {
998    use super::*;
999    use crate::stats::AggregationLevel;
1000    use chrono::Local;
1001
1002    fn create_test_event(author: &str, insertions: usize, deletions: usize) -> GitEvent {
1003        GitEvent::commit(
1004            "abc1234".to_string(),
1005            "test commit".to_string(),
1006            author.to_string(),
1007            Local::now(),
1008            insertions,
1009            deletions,
1010        )
1011    }
1012
1013    fn create_test_event_for_quality(
1014        hash: &str,
1015        message: &str,
1016        insertions: usize,
1017        deletions: usize,
1018    ) -> GitEvent {
1019        GitEvent::commit(
1020            hash.to_string(),
1021            message.to_string(),
1022            "author".to_string(),
1023            Local::now(),
1024            insertions,
1025            deletions,
1026        )
1027    }
1028
1029    // ===== Project Health Tests =====
1030
1031    #[test]
1032    fn test_project_health_default() {
1033        let health = ProjectHealth::default();
1034        assert_eq!(health.overall_score, 50);
1035        assert_eq!(health.level(), "Needs Work");
1036    }
1037
1038    #[test]
1039    fn test_project_health_level() {
1040        let health = ProjectHealth {
1041            overall_score: 95,
1042            ..Default::default()
1043        };
1044        assert_eq!(health.level(), "Excellent");
1045
1046        let health = ProjectHealth {
1047            overall_score: 80,
1048            ..Default::default()
1049        };
1050        assert_eq!(health.level(), "Good");
1051
1052        let health = ProjectHealth {
1053            overall_score: 65,
1054            ..Default::default()
1055        };
1056        assert_eq!(health.level(), "Fair");
1057
1058        let health = ProjectHealth {
1059            overall_score: 50,
1060            ..Default::default()
1061        };
1062        assert_eq!(health.level(), "Needs Work");
1063
1064        let health = ProjectHealth {
1065            overall_score: 35,
1066            ..Default::default()
1067        };
1068        assert_eq!(health.level(), "Poor");
1069
1070        let health = ProjectHealth {
1071            overall_score: 20,
1072            ..Default::default()
1073        };
1074        assert_eq!(health.level(), "Critical");
1075    }
1076
1077    #[test]
1078    fn test_project_health_score_bar() {
1079        let health = ProjectHealth {
1080            overall_score: 100,
1081            ..Default::default()
1082        };
1083        assert_eq!(health.score_bar(), "██████████");
1084
1085        let health = ProjectHealth {
1086            overall_score: 50,
1087            ..Default::default()
1088        };
1089        assert_eq!(health.score_bar(), "█████░░░░░");
1090
1091        let health = ProjectHealth {
1092            overall_score: 0,
1093            ..Default::default()
1094        };
1095        assert_eq!(health.score_bar(), "░░░░░░░░░░");
1096    }
1097
1098    #[test]
1099    fn test_alert_severity_ordering() {
1100        assert!(AlertSeverity::Critical > AlertSeverity::Warning);
1101        assert!(AlertSeverity::Warning > AlertSeverity::Info);
1102    }
1103
1104    #[test]
1105    fn test_calculate_project_health_empty() {
1106        let events: Vec<&GitEvent> = vec![];
1107        let empty_heatmap = FileHeatmap {
1108            files: vec![],
1109            total_files: 0,
1110            aggregation_level: AggregationLevel::Files,
1111        };
1112        let health = calculate_project_health(&events, |_| None, None, None, None, &empty_heatmap);
1113        assert_eq!(health.overall_score, 50);
1114        assert_eq!(health.total_commits, 0);
1115    }
1116
1117    #[test]
1118    fn test_calculate_project_health_with_events() {
1119        let events = vec![
1120            create_test_event_for_quality("hash1", "feat: add new feature", 50, 10),
1121            create_test_event_for_quality("hash2", "fix: bug fix", 20, 5),
1122            create_test_event_for_quality("hash3", "test: add tests", 30, 0),
1123        ];
1124        let refs: Vec<&GitEvent> = events.iter().collect();
1125        let empty_heatmap = FileHeatmap {
1126            files: vec![],
1127            total_files: 0,
1128            aggregation_level: AggregationLevel::Files,
1129        };
1130
1131        let health = calculate_project_health(&refs, |_| None, None, None, None, &empty_heatmap);
1132
1133        assert_eq!(health.total_commits, 3);
1134        assert!(health.overall_score > 0);
1135        // Quality score should be high since they are Conventional commits
1136        assert!(health.quality.score > 0.5);
1137    }
1138
1139    #[test]
1140    fn test_calculate_project_health_alerts() {
1141        // Single author only (solo development) -> bus factor alerts are skipped
1142        let mut events = Vec::new();
1143        for i in 0..10 {
1144            let mut event = create_test_event("single_author", 10, 5);
1145            event.short_hash = format!("hash{}", i);
1146            events.push(event);
1147        }
1148        let refs: Vec<&GitEvent> = events.iter().collect();
1149        let empty_heatmap = FileHeatmap {
1150            files: vec![],
1151            total_files: 0,
1152            aggregation_level: AggregationLevel::Files,
1153        };
1154
1155        let health = calculate_project_health(&refs, |_| None, None, None, None, &empty_heatmap);
1156
1157        // Bus factor alerts are not generated for solo development
1158        let has_bus_factor_alert = health
1159            .alerts
1160            .iter()
1161            .any(|a| a.message.contains("bus factor"));
1162        assert!(!has_bus_factor_alert);
1163        // bus_factor_risk weight should be 0.0
1164        assert_eq!(health.bus_factor_risk.weight, 0.0);
1165
1166        // 2 or more authors -> bus factor alert should be triggered
1167        let mut multi_events = Vec::new();
1168        for i in 0..10 {
1169            let mut event = create_test_event("author_a", 10, 5);
1170            event.short_hash = format!("hash_a{}", i);
1171            multi_events.push(event);
1172        }
1173        // Add just 1 commit from another author (knowledge concentration -> alert triggered)
1174        let mut event_b = create_test_event("author_b", 1, 0);
1175        event_b.short_hash = "hash_b0".to_string();
1176        multi_events.push(event_b);
1177        let multi_refs: Vec<&GitEvent> = multi_events.iter().collect();
1178
1179        let health2 =
1180            calculate_project_health(&multi_refs, |_| None, None, None, None, &empty_heatmap);
1181        let has_bus_factor_alert2 = health2
1182            .alerts
1183            .iter()
1184            .any(|a| a.message.contains("bus factor"));
1185        assert!(has_bus_factor_alert2);
1186        assert_eq!(health2.bus_factor_risk.weight, 0.20);
1187    }
1188
1189    #[test]
1190    fn test_is_test_file() {
1191        assert!(is_test_file("src/foo_test.rs"));
1192        assert!(is_test_file("src/foo_test.go"));
1193        assert!(is_test_file("src/foo.test.ts"));
1194        assert!(is_test_file("src/foo.test.js"));
1195        assert!(is_test_file("src/foo.spec.ts"));
1196        assert!(is_test_file("src/foo.spec.js"));
1197        assert!(is_test_file("tests/test_main.py"));
1198        assert!(is_test_file("__tests__/foo.js"));
1199        assert!(is_test_file("spec/helper.rb"));
1200
1201        // Verify no false positives
1202        assert!(!is_test_file("src/contest.rs"));
1203        assert!(!is_test_file("src/latest.ts"));
1204        assert!(!is_test_file("src/main.rs"));
1205        assert!(!is_test_file("protest.js"));
1206    }
1207
1208    #[test]
1209    fn test_confidence_levels() {
1210        let events: Vec<GitEvent> = (0..100)
1211            .map(|i| {
1212                let mut e = create_test_event(
1213                    if i % 3 == 0 {
1214                        "Alice"
1215                    } else if i % 3 == 1 {
1216                        "Bob"
1217                    } else {
1218                        "Charlie"
1219                    },
1220                    10,
1221                    5,
1222                );
1223                e.short_hash = format!("hash{}", i);
1224                e.timestamp = Local::now() - chrono::Duration::days(i as i64);
1225                e
1226            })
1227            .collect();
1228        let refs: Vec<&GitEvent> = events.iter().collect();
1229        let empty_heatmap = FileHeatmap {
1230            files: vec![],
1231            total_files: 0,
1232            aggregation_level: AggregationLevel::Files,
1233        };
1234        let health = calculate_project_health(&refs, |_| None, None, None, None, &empty_heatmap);
1235        assert_eq!(health.confidence.level, ConfidenceLevel::High);
1236    }
1237}
gitstack/stats/health.rs

gitstack/stats/
health.rs