kimun 0.20.0 - Docs.rs

/// Knowledge map analyzer — code ownership via git blame.
///
/// For each file, determines the primary owner, ownership concentration,
/// contributor count, and bus-factor risk level. Optionally detects
/// knowledge loss when the primary owner is no longer active.
use std::collections::{HashMap, HashSet};
use std::path::PathBuf;

use serde::Serialize;

use crate::git::BlameInfo;

/// Bus-factor risk level based on ownership concentration.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
pub enum RiskLevel {
    Critical,
    High,
    Medium,
    Low,
}

impl RiskLevel {
    /// Human-readable label for display in reports.
    pub fn label(&self) -> &'static str {
        match self {
            RiskLevel::Critical => "CRITICAL",
            RiskLevel::High => "HIGH",
            RiskLevel::Medium => "MEDIUM",
            RiskLevel::Low => "LOW",
        }
    }

    /// Numeric key for sorting risk levels (0 = most critical).
    pub fn sort_key(&self) -> u8 {
        match self {
            RiskLevel::Critical => 0,
            RiskLevel::High => 1,
            RiskLevel::Medium => 2,
            RiskLevel::Low => 3,
        }
    }
}

/// Internal representation of a single author's contribution to a file.
struct AuthorContribution {
    author: String,
    email: String,
    percentage: f64,
    active: bool,
}

/// Ownership analysis result for a single file.
pub struct FileOwnership {
    pub path: PathBuf,
    pub language: String,
    pub total_lines: usize,
    pub primary_owner: String,
    pub primary_email: String,
    pub ownership_pct: f64,
    pub contributors: usize,
    pub risk: RiskLevel,
    pub knowledge_loss: bool,
}

/// Compute ownership metrics from git blame data for a single file.
/// Uses `recent_authors` (if non-empty) to detect knowledge loss risk.
pub fn compute_ownership(
    path: PathBuf,
    language: &str,
    blames: &[BlameInfo],
    recent_authors: &HashSet<String>,
) -> FileOwnership {
    let total_lines: usize = blames.iter().map(|b| b.lines).sum();

    if total_lines == 0 || blames.is_empty() {
        return FileOwnership {
            path,
            language: language.to_string(),
            total_lines: 0,
            primary_owner: "unknown".to_string(),
            primary_email: String::new(),
            ownership_pct: 0.0,
            contributors: 0,
            risk: RiskLevel::Low,
            knowledge_loss: false,
        };
    }

    let contributions: Vec<AuthorContribution> = blames
        .iter()
        .map(|b| {
            let pct = (b.lines as f64 / total_lines as f64) * 100.0;
            AuthorContribution {
                author: b.author.clone(),
                email: b.email.clone(),
                percentage: pct,
                active: recent_authors.contains(&b.email),
            }
        })
        .collect();

    let primary = &contributions[0]; // blames are sorted by lines desc
    let risk = classify_risk(&contributions);
    let knowledge_loss = !recent_authors.is_empty() && !primary.active;

    FileOwnership {
        path,
        language: language.to_string(),
        total_lines,
        primary_owner: primary.author.clone(),
        primary_email: primary.email.clone(),
        ownership_pct: primary.percentage,
        contributors: contributions.len(),
        risk,
        knowledge_loss,
    }
}

/// Classify bus-factor risk based on ownership concentration.
/// Critical: single owner ≥80%. High: top owner ≥60%.
/// Medium: top 2-3 owners combine ≥80%. Low: otherwise.
fn classify_risk(contributors: &[AuthorContribution]) -> RiskLevel {
    if contributors.is_empty() {
        return RiskLevel::Low;
    }

    let top_pct = contributors[0].percentage;

    if top_pct >= 80.0 {
        return RiskLevel::Critical;
    }
    if top_pct >= 60.0 {
        return RiskLevel::High;
    }

    // Check if top 2-3 contributors combine for >80%
    let top_combined: f64 = contributors.iter().take(3).map(|c| c.percentage).sum();
    if top_combined >= 80.0 {
        return RiskLevel::Medium;
    }

    RiskLevel::Low
}

/// Aggregated ownership summary for a single author across all files they own.
pub struct AuthorSummary {
    pub author: String,
    /// Number of files where this author is the primary owner.
    pub files_owned: usize,
    /// Total lines across all owned files.
    pub total_lines: usize,
    /// Unique languages across owned files (sorted).
    pub languages: Vec<String>,
    /// Worst risk level among owned files.
    pub worst_risk: RiskLevel,
    /// Number of owned files flagged for knowledge loss (primary owner inactive).
    pub knowledge_loss_files: usize,
}

/// Aggregate per-file ownership data by primary owner.
/// Each entry in the result represents one author's total footprint.
pub fn aggregate_by_author(files: &[FileOwnership]) -> Vec<AuthorSummary> {
    use std::collections::{BTreeMap, BTreeSet};

    // key: author → (files_owned, total_lines, languages, worst_risk, knowledge_loss_files)
    let mut map: BTreeMap<String, (usize, usize, BTreeSet<String>, RiskLevel, usize)> =
        BTreeMap::new();

    for f in files {
        let entry = map.entry(f.primary_owner.clone()).or_insert((
            0,
            0,
            BTreeSet::new(),
            RiskLevel::Low,
            0,
        ));
        entry.0 += 1;
        entry.1 += f.total_lines;
        entry.2.insert(f.language.clone());
        if f.risk.sort_key() < entry.3.sort_key() {
            entry.3 = f.risk;
        }
        if f.knowledge_loss {
            entry.4 += 1;
        }
    }

    map.into_iter()
        .map(
            |(author, (files_owned, total_lines, languages, worst_risk, knowledge_loss_files))| {
                AuthorSummary {
                    author,
                    files_owned,
                    total_lines,
                    languages: languages.into_iter().collect(),
                    worst_risk,
                    knowledge_loss_files,
                }
            },
        )
        .collect()
}

/// A single contributor's share in the project-wide bus factor.
pub struct BusFactorEntry {
    pub author: String,
    /// Total lines attributed to this author across all analyzed files.
    pub lines: usize,
    /// This author's share of total project lines.
    pub pct: f64,
    /// Cumulative coverage including this and all higher-ranked authors.
    pub cumulative_pct: f64,
    /// True while this author is still needed to reach the threshold.
    /// The last `is_critical = true` entry is the one that pushes cumulative
    /// coverage over the threshold.
    pub is_critical: bool,
}

/// Project-wide bus factor result.
pub struct BusFactor {
    /// Minimum number of contributors whose combined ownership covers `threshold`%.
    pub factor: usize,
    /// Threshold used (e.g. 80.0 for 80%).
    pub threshold: f64,
    /// Total blame lines across all analyzed files.
    pub total_lines: usize,
    /// All contributors sorted by lines owned descending, annotated with
    /// cumulative coverage and criticality.
    pub contributors: Vec<BusFactorEntry>,
}

/// Compute the project bus factor from a map of author → total blame lines.
///
/// The bus factor is the smallest N such that the top N contributors
/// together own ≥ `threshold`% of all code. A bus factor of 1 means
/// a single person owns most of the project — extremely high risk.
pub fn compute_bus_factor(author_lines: &HashMap<String, usize>, threshold: f64) -> BusFactor {
    let total_lines: usize = author_lines.values().sum();
    if total_lines == 0 {
        return BusFactor {
            factor: 0,
            threshold,
            total_lines: 0,
            contributors: vec![],
        };
    }

    // Sort by lines descending, then by name for determinism.
    let mut sorted: Vec<(String, usize)> =
        author_lines.iter().map(|(k, &v)| (k.clone(), v)).collect();
    sorted.sort_by(|a, b| b.1.cmp(&a.1).then_with(|| a.0.cmp(&b.0)));

    let mut cumulative = 0.0;
    let mut contributors: Vec<BusFactorEntry> = Vec::new();

    for (author, lines) in sorted {
        let pct = lines as f64 / total_lines as f64 * 100.0;
        // is_critical: this author is still needed to reach the threshold
        // (cumulative before adding this author is below threshold).
        let is_critical = cumulative < threshold;
        cumulative += pct;
        contributors.push(BusFactorEntry {
            author,
            lines,
            pct,
            cumulative_pct: cumulative,
            is_critical,
        });
    }

    let factor = contributors.iter().filter(|e| e.is_critical).count();
    BusFactor {
        factor,
        threshold,
        total_lines,
        contributors,
    }
}

#[cfg(test)]
#[path = "analyzer_test.rs"]
mod tests;