travelagent-core 1.10.3

//! Commit-level risk scoring for tour-guide batching.
//!
//! A deterministic, rules-based system that scores each hunk by change type
//! (formatting / documentation / config / code), aggregates hunk → file →
//! commit via max (worst-hunk-wins), and allows project-specific glob
//! overrides via the global `[risk]` section in
//! `$XDG_CONFIG_HOME/travelagent/config.toml` (falling back to
//! `~/.config/travelagent/config.toml` on Unix, or the platform-appropriate
//! config dir on Windows) that dominate both ways.
//!
//! Per-repo overrides: a `<repo_root>/.travelagent/config.toml` file can
//! override the global `[risk]` section wholesale (the repo's section
//! fully replaces the global one when present). See
//! [`crate::config::merge_overrides`] for the merge semantics.
//!
//! The core types and defaults live here so both the core model
//! (TourStop/TourState) and the downstream TUI/MCP layers can consume them
//! without pulling in extra dependencies.

use std::path::{Path, PathBuf};

use serde::{Deserialize, Serialize};

use crate::model::{DiffHunk, LineOrigin};

/// Integer risk score, clamped to the inclusive range 0..=5.
///
/// Lower numbers indicate safer changes (formatting, docs) and higher numbers
/// indicate riskier changes (crypto, auth, generated code). Stored as a
/// transparent `u8` so session JSON stays tiny.
#[derive(
    Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash, Serialize, Deserialize, Default,
)]
#[serde(transparent)]
pub struct RiskScore(u8);

impl RiskScore {
    pub const MIN: Self = Self(0);
    pub const MAX: Self = Self(5);

    /// Construct a score, clamping values above 5 to [`RiskScore::MAX`].
    pub fn new(v: u8) -> Self {
        Self(v.min(5))
    }

    pub fn as_u8(self) -> u8 {
        self.0
    }

    /// Used as the `#[serde(default = ...)]` default for backward-compatible
    /// deserialization of session JSON written before risk scoring landed.
    pub fn min() -> Self {
        Self::MIN
    }
}

impl From<u8> for RiskScore {
    fn from(v: u8) -> Self {
        Self::new(v)
    }
}

/// Coarse-grained risk band used for UI affordances (colored diff borders
/// etc.) that want "low / medium / high" rather than a 0–5 score. Mapping is
/// documented on [`RiskBand::for_score`] and intentionally lives next to the
/// score so a single edit updates everything that depends on the bucketing.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum RiskBand {
    /// Scores 0–1 (formatting, docs). Visually subtle / green-ish.
    Low,
    /// Scores 2–3 (config, routine code). Yellow-ish "pay attention" band.
    Medium,
    /// Scores 4–5 (high-risk paths, crypto/auth overrides). Red-ish "slow down".
    High,
}

impl RiskBand {
    /// Bucket a [`RiskScore`] into a coarse band:
    ///
    /// | Score | Band   |
    /// |-------|--------|
    /// | 0–1   | Low    |
    /// | 2–3   | Medium |
    /// | 4–5   | High   |
    ///
    /// The boundaries are intentionally fixed (not configurable) so the
    /// visual semantics of "green / yellow / red" stay consistent across
    /// installs; the underlying per-change-type risk remains tunable via
    /// the `[risk]` section of the TOML config.
    #[must_use]
    pub fn for_score(score: RiskScore) -> Self {
        match score.as_u8() {
            0..=1 => Self::Low,
            2..=3 => Self::Medium,
            _ => Self::High,
        }
    }
}

/// Kind of change a hunk represents. A single hunk may match multiple kinds
/// (e.g. a reformat of a TOML file matches both `Formatting` and `Config`);
/// the scorer picks the max risk of the matched kinds.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ChangeType {
    Formatting,
    Documentation,
    Config,
    Code,
}

impl ChangeType {
    pub fn id(self) -> &'static str {
        match self {
            Self::Formatting => "formatting",
            Self::Documentation => "documentation",
            Self::Config => "config",
            Self::Code => "code",
        }
    }
}

/// Extension-based buckets. Globs use a tiny subset of glob syntax — see
/// [`glob_match`] for the supported patterns.
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
#[serde(default)]
pub struct RiskExtensions {
    pub documentation: Vec<String>,
    pub config: Vec<String>,
}

impl Default for RiskExtensions {
    fn default() -> Self {
        Self {
            documentation: vec!["*.md".into(), "*.rst".into(), "docs/**".into()],
            config: vec![
                "*.toml".into(),
                "*.yaml".into(),
                "*.yml".into(),
                "*.json".into(),
                "*.lock".into(),
                "*.env*".into(),
                "*.sql".into(),
                "*.txt".into(),
                "*.csv".into(),
                "*.tsv".into(),
            ],
        }
    }
}

/// A project-specific override. Matching `glob` against the file path clamps
/// the hunk's risk to exactly `level` — higher or lower than the
/// change-type default. This is the escape hatch for crypto paths (raise to
/// 5) and generated code (drop to 0).
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct RiskRule {
    pub glob: String,
    pub level: u8,
}

/// Config that drives all risk decisions. Loaded from the `[risk]` section of
/// the global `$XDG_CONFIG_HOME/travelagent/config.toml` (falling back to
/// `~/.config/travelagent/config.toml` on Unix, or the platform-appropriate
/// config dir on Windows); missing fields fall back to [`Default`].
///
/// A `<repo_root>/.travelagent/config.toml` file, if present, can override
/// the global `[risk]` section wholesale — the repo's `[risk]` table fully
/// replaces the global one (fields not set in the repo file fall back to
/// [`Default`], not to the global's values). See
/// [`crate::config::merge_overrides`].
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
#[serde(default)]
pub struct RiskConfig {
    pub default_code: u8,
    pub default_config: u8,
    pub default_documentation: u8,
    pub default_formatting: u8,
    pub extensions: RiskExtensions,
    pub rules: Vec<RiskRule>,
}

impl Default for RiskConfig {
    fn default() -> Self {
        Self {
            default_code: 3,
            default_config: 2,
            default_documentation: 1,
            default_formatting: 0,
            extensions: RiskExtensions::default(),
            rules: Vec::new(),
        }
    }
}

impl RiskConfig {
    /// Risk for a specific change type under this config, clamped to 0..=5.
    pub fn risk_for(&self, kind: ChangeType) -> RiskScore {
        let raw = match kind {
            ChangeType::Formatting => self.default_formatting,
            ChangeType::Documentation => self.default_documentation,
            ChangeType::Config => self.default_config,
            ChangeType::Code => self.default_code,
        };
        RiskScore::new(raw)
    }
}

/// A scored commit ready to be fed to [`build_tour_stops`].
#[derive(Debug, Clone)]
pub struct ScoredCommit {
    pub sha: String,
    pub risk: RiskScore,
    pub summary: String,
}

// ---- change-type detection -------------------------------------------------

/// Returns `true` when the hunk's `-` lines and `+` lines have identical
/// non-whitespace content (i.e. a pure reformat). Lines are compared in order
/// — if the two sequences have different lengths or different non-whitespace
/// content, it is not a pure reformat.
fn is_pure_formatting(hunk: &DiffHunk) -> bool {
    let removed: Vec<String> = hunk
        .lines
        .iter()
        .filter(|l| l.origin == LineOrigin::Deletion)
        .map(|l| l.content.split_whitespace().collect::<String>())
        .collect();
    let added: Vec<String> = hunk
        .lines
        .iter()
        .filter(|l| l.origin == LineOrigin::Addition)
        .map(|l| l.content.split_whitespace().collect::<String>())
        .collect();
    if removed.is_empty() && added.is_empty() {
        return false;
    }
    removed == added
}

/// Language detection for comment-only checks. Uses the extension; returns
/// `None` for unrecognized languages so the caller skips the comment-only
/// heuristic.
fn comment_markers_for(path: &Path) -> Option<CommentSyntax> {
    let ext = path
        .extension()
        .and_then(|e| e.to_str())
        .map(str::to_ascii_lowercase)?;
    match ext.as_str() {
        "rs" => Some(CommentSyntax {
            line: &["//"],
            block: &[("/*", "*/")],
        }),
        "py" => Some(CommentSyntax {
            line: &["#"],
            block: &[],
        }),
        "js" | "mjs" | "cjs" | "ts" | "tsx" | "jsx" => Some(CommentSyntax {
            line: &["//"],
            block: &[("/*", "*/")],
        }),
        _ => None,
    }
}

struct CommentSyntax {
    line: &'static [&'static str],
    block: &'static [(&'static str, &'static str)],
}

/// True when every changed (+/-) line in the hunk is a comment. An empty
/// change set returns `false` so we don't label context-only hunks as docs.
fn is_comment_only(hunk: &DiffHunk, syntax: &CommentSyntax) -> bool {
    let mut any = false;
    let mut in_block = false;
    for line in &hunk.lines {
        if !matches!(line.origin, LineOrigin::Addition | LineOrigin::Deletion) {
            continue;
        }
        any = true;
        let trimmed = line.content.trim_start();
        if in_block {
            // Stay in the block until we see a closing marker.
            if syntax
                .block
                .iter()
                .any(|(_, close)| trimmed.contains(close))
            {
                in_block = false;
            }
            continue;
        }
        if syntax.line.iter().any(|m| trimmed.starts_with(m)) {
            continue;
        }
        if let Some((open, close)) = syntax
            .block
            .iter()
            .find(|(open, _)| trimmed.starts_with(open))
        {
            // Single-line /* ... */ or open-only on this line.
            let rest = &trimmed[open.len()..];
            if !rest.contains(close) {
                in_block = true;
            }
            continue;
        }
        return false;
    }
    any
}

// Glob matching helpers live in `crate::glob` and are shared with the
// auto-collapse module. Re-imported here to keep the scoring functions terse.
use crate::glob::{any_glob_matches, glob_match, path_str};

/// Return every change type that applies to this (path, hunk). An empty Vec
/// means the fallback `Code` bucket is used.
pub fn detect_change_types(path: &Path, hunk: &DiffHunk, cfg: &RiskConfig) -> Vec<ChangeType> {
    let mut types = Vec::new();

    // 1. Formatting — identical non-whitespace tokens across -/+ lines.
    if is_pure_formatting(hunk) {
        types.push(ChangeType::Formatting);
    }

    // 2. Documentation — path matches a doc glob, OR (code file, but every
    //    changed line is a comment in a recognized language).
    let in_doc_path = any_glob_matches(&cfg.extensions.documentation, path);
    if in_doc_path {
        types.push(ChangeType::Documentation);
    } else if let Some(syntax) = comment_markers_for(path)
        && is_comment_only(hunk, &syntax)
    {
        types.push(ChangeType::Documentation);
    }

    // 3. Config — path matches a config glob.
    if any_glob_matches(&cfg.extensions.config, path) {
        types.push(ChangeType::Config);
    }

    // 4. If none of the above, fall back to Code. We don't add Code when
    //    any of the specialized buckets matched — the caller computes
    //    max(risks_for_matched_types), so adding Code would always win and
    //    negate the buckets.
    if types.is_empty() {
        types.push(ChangeType::Code);
    }

    types
}

// ---- scoring ---------------------------------------------------------------

/// Score a single hunk under `cfg`. Glob-rule matches dominate absolutely and
/// bypass change-type detection.
pub fn score_hunk(path: &Path, hunk: &DiffHunk, cfg: &RiskConfig) -> RiskScore {
    if let Some(rule) = matching_rule(path, cfg) {
        return RiskScore::new(rule.level);
    }
    let kinds = detect_change_types(path, hunk, cfg);
    kinds
        .into_iter()
        .map(|k| cfg.risk_for(k))
        .max()
        .unwrap_or(RiskScore::MIN)
}

/// Score a file — max of its hunks. A file with no hunks (e.g. rename-only)
/// scores at the rule or the applicable fallback type evaluated against an
/// empty hunk.
pub fn score_file(path: &Path, hunks: &[DiffHunk], cfg: &RiskConfig) -> RiskScore {
    if let Some(rule) = matching_rule(path, cfg) {
        return RiskScore::new(rule.level);
    }
    if hunks.is_empty() {
        // Empty hunks still get a type based on the path (doc/config/code).
        let empty = DiffHunk {
            header: String::new(),
            lines: Vec::new(),
            old_start: 0,
            old_count: 0,
            new_start: 0,
            new_count: 0,
        };
        return score_hunk(path, &empty, cfg);
    }
    hunks
        .iter()
        .map(|h| score_hunk(path, h, cfg))
        .max()
        .unwrap_or(RiskScore::MIN)
}

/// Score a whole commit — max of file scores.
pub fn score_commit(files: &[(PathBuf, Vec<DiffHunk>)], cfg: &RiskConfig) -> RiskScore {
    files
        .iter()
        .map(|(p, hs)| score_file(p, hs, cfg))
        .max()
        .unwrap_or(RiskScore::MIN)
}

fn matching_rule<'a>(path: &Path, cfg: &'a RiskConfig) -> Option<&'a RiskRule> {
    let p = path_str(path);
    cfg.rules.iter().find(|r| glob_match(&r.glob, &p))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::model::{DiffHunk, DiffLine, LineOrigin};

    fn mk_line(origin: LineOrigin, content: &str) -> DiffLine {
        DiffLine {
            origin,
            content: content.to_string(),
            old_lineno: None,
            new_lineno: None,
            highlighted_spans: None,
        }
    }

    fn mk_hunk(lines: Vec<DiffLine>) -> DiffHunk {
        DiffHunk {
            header: "@@".into(),
            lines,
            old_start: 0,
            old_count: 0,
            new_start: 0,
            new_count: 0,
        }
    }

    #[test]
    fn risk_config_defaults_match_spec() {
        let cfg = RiskConfig::default();
        assert_eq!(cfg.default_code, 3);
        assert_eq!(cfg.default_config, 2);
        assert_eq!(cfg.default_documentation, 1);
        assert_eq!(cfg.default_formatting, 0);
        assert!(cfg.extensions.documentation.iter().any(|g| g == "*.md"));
        assert!(cfg.extensions.config.iter().any(|g| g == "*.toml"));
        assert!(cfg.rules.is_empty());
    }

    #[test]
    fn risk_config_roundtrips_through_toml() {
        let toml_str = r#"
default_code = 4
default_config = 2
default_documentation = 1
default_formatting = 0

[extensions]
documentation = ["*.md", "docs/**"]
config = ["*.toml"]

[[rules]]
glob = "src/crypto/**"
level = 5

[[rules]]
glob = "**/*.generated.rs"
level = 0
"#;
        let cfg: RiskConfig = toml::from_str(toml_str).expect("parse toml");
        assert_eq!(cfg.default_code, 4);
        assert_eq!(cfg.extensions.documentation.len(), 2);
        assert_eq!(cfg.rules.len(), 2);
        assert_eq!(cfg.rules[0].glob, "src/crypto/**");
        assert_eq!(cfg.rules[0].level, 5);
    }

    #[test]
    fn risk_score_clamps_to_max() {
        assert_eq!(RiskScore::new(99).as_u8(), 5);
        assert_eq!(RiskScore::new(0).as_u8(), 0);
        assert_eq!(RiskScore::MAX.as_u8(), 5);
    }

    #[test]
    fn risk_band_low_covers_zero_and_one() {
        assert_eq!(RiskBand::for_score(RiskScore::new(0)), RiskBand::Low);
        assert_eq!(RiskBand::for_score(RiskScore::new(1)), RiskBand::Low);
    }

    #[test]
    fn risk_band_medium_at_two_and_three() {
        // Boundary: 1 is still Low, 2 crosses into Medium; 3 stays Medium, 4
        // crosses into High. Lock the transitions so a future tweak to the
        // boundaries updates this test deliberately.
        assert_eq!(RiskBand::for_score(RiskScore::new(1)), RiskBand::Low);
        assert_eq!(RiskBand::for_score(RiskScore::new(2)), RiskBand::Medium);
        assert_eq!(RiskBand::for_score(RiskScore::new(3)), RiskBand::Medium);
        assert_eq!(RiskBand::for_score(RiskScore::new(4)), RiskBand::High);
    }

    #[test]
    fn risk_band_high_at_max() {
        assert_eq!(RiskBand::for_score(RiskScore::MAX), RiskBand::High);
        assert_eq!(RiskBand::for_score(RiskScore::new(5)), RiskBand::High);
    }

    #[test]
    fn formatting_hunk_scores_zero_in_markdown_file() {
        // Per the gray-area rule, a formatting-only change in a doc path has
        // types {Formatting, Documentation} → max(0, 1) = 1.
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "  hello  world"),
            mk_line(LineOrigin::Addition, "hello world"),
        ]);
        let cfg = RiskConfig::default();
        let score = score_hunk(&PathBuf::from("README.md"), &hunk, &cfg);
        assert_eq!(score.as_u8(), 1);
    }

    #[test]
    fn pure_formatting_in_doc_file_scores_as_doc_max() {
        // README.md is a doc-path file. A pure-formatting hunk there matches
        // both Formatting (0) and Documentation (1); max is 1.
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "foo  bar"),
            mk_line(LineOrigin::Addition, "foo bar"),
        ]);
        let cfg = RiskConfig::default();
        assert_eq!(
            score_hunk(&PathBuf::from("docs/guide.md"), &hunk, &cfg).as_u8(),
            1
        );
    }

    #[test]
    fn pure_formatting_in_code_file_still_scores_as_code() {
        // Gray-area rule: formatting-only in a code path is Formatting (0)
        // only — there's no config/doc match. Max of {0} = 0.
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "fn foo(){ return 1 ; }"),
            mk_line(LineOrigin::Addition, "fn foo() { return 1; }"),
        ]);
        let cfg = RiskConfig::default();
        assert_eq!(
            score_hunk(&PathBuf::from("src/main.rs"), &hunk, &cfg).as_u8(),
            0
        );
    }

    #[test]
    fn code_change_scores_3_by_default() {
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "    return None;"),
            mk_line(LineOrigin::Addition, "    return Some(x);"),
        ]);
        let cfg = RiskConfig::default();
        assert_eq!(
            score_hunk(&PathBuf::from("src/main.rs"), &hunk, &cfg).as_u8(),
            3
        );
    }

    #[test]
    fn config_change_scores_2_by_default() {
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "version = \"1.0\""),
            mk_line(LineOrigin::Addition, "version = \"2.0\""),
        ]);
        let cfg = RiskConfig::default();
        assert_eq!(
            score_hunk(&PathBuf::from("Cargo.toml"), &hunk, &cfg).as_u8(),
            2
        );
    }

    #[test]
    fn documentation_change_scores_1_by_default() {
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "Old description"),
            mk_line(LineOrigin::Addition, "New description entirely"),
        ]);
        let cfg = RiskConfig::default();
        assert_eq!(
            score_hunk(&PathBuf::from("README.md"), &hunk, &cfg).as_u8(),
            1
        );
    }

    #[test]
    fn glob_override_can_raise_risk() {
        // A doc change (default 1) under src/crypto/** bumps to 5.
        let mut cfg = RiskConfig::default();
        cfg.rules.push(RiskRule {
            glob: "src/crypto/**".into(),
            level: 5,
        });
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "// old"),
            mk_line(LineOrigin::Addition, "// new"),
        ]);
        assert_eq!(
            score_hunk(&PathBuf::from("src/crypto/keys.md"), &hunk, &cfg).as_u8(),
            5
        );
    }

    #[test]
    fn glob_override_can_lower_risk() {
        // A code change (default 3) on a generated file clamps to 0.
        let mut cfg = RiskConfig::default();
        cfg.rules.push(RiskRule {
            glob: "**/*.generated.rs".into(),
            level: 0,
        });
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "let x = 1;"),
            mk_line(LineOrigin::Addition, "let x = 42;"),
        ]);
        assert_eq!(
            score_hunk(&PathBuf::from("src/proto/foo.generated.rs"), &hunk, &cfg).as_u8(),
            0
        );
    }

    #[test]
    fn comment_only_rust_hunk_in_code_file_scores_as_documentation() {
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "// old comment"),
            mk_line(LineOrigin::Addition, "// new comment"),
        ]);
        let cfg = RiskConfig::default();
        assert_eq!(
            score_hunk(&PathBuf::from("src/main.rs"), &hunk, &cfg).as_u8(),
            1
        );
    }

    #[test]
    fn gray_area_picks_max_of_applicable_types() {
        // Pure-formatting change in a .toml → types {Formatting, Config};
        // max(0, 2) = 2 (Config wins).
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "key=value"),
            mk_line(LineOrigin::Addition, "key = value"),
        ]);
        let cfg = RiskConfig::default();
        assert_eq!(
            score_hunk(&PathBuf::from("config.toml"), &hunk, &cfg).as_u8(),
            2
        );
    }

    #[test]
    fn score_file_is_max_of_hunks() {
        let low = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "// a"),
            mk_line(LineOrigin::Addition, "// b"),
        ]);
        let high = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "let x = 0;"),
            mk_line(LineOrigin::Addition, "let x = panic!();"),
        ]);
        let cfg = RiskConfig::default();
        let s = score_file(&PathBuf::from("src/main.rs"), &[low, high], &cfg);
        assert_eq!(s.as_u8(), 3);
    }

    #[test]
    fn score_commit_is_max_of_files() {
        let low = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "old"),
            mk_line(LineOrigin::Addition, "new"),
        ]);
        let high = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "let x = 0;"),
            mk_line(LineOrigin::Addition, "let x = 1;"),
        ]);
        let cfg = RiskConfig::default();
        let files = vec![
            (PathBuf::from("README.md"), vec![low]),
            (PathBuf::from("src/lib.rs"), vec![high]),
        ];
        assert_eq!(score_commit(&files, &cfg).as_u8(), 3);
    }

    #[test]
    fn file_matching_no_extension_glob_falls_back_to_code() {
        let hunk = mk_hunk(vec![
            mk_line(LineOrigin::Deletion, "line"),
            mk_line(LineOrigin::Addition, "changed"),
        ]);
        let cfg = RiskConfig::default();
        // `Makefile` (no extension) isn't in any default bucket → Code.
        assert_eq!(
            score_hunk(&PathBuf::from("Makefile"), &hunk, &cfg).as_u8(),
            3
        );
    }

    #[test]
    fn glob_match_handles_double_star_prefix_and_segment() {
        assert!(glob_match("**/*.rs", "src/foo.rs"));
        assert!(glob_match("**/*.rs", "deep/nested/bar.rs"));
        assert!(glob_match("docs/**", "docs/a.md"));
        assert!(glob_match("docs/**", "docs/sub/a.md"));
        assert!(!glob_match("*.rs", "src/foo.rs"));
        assert!(glob_match("*.rs", "foo.rs"));
        assert!(glob_match("*.env*", ".env.local"));
    }
}