helen 0.1.0

Repository review gate.
Documentation
//! Elenchus risk scanning.

use std::{ffi::OsStr, fmt::Write as _, path::Path};

/// Risk state detected from the current diff.
#[derive(Clone, Debug, Default)]
pub(super) struct RiskState {
    /// Human-readable risk reasons.
    pub(super) reasons: Vec<String>,
    /// Whether a reviewed risk token was accepted.
    pub(super) override_accepted: bool,
    /// Summary field for risk override status.
    pub(super) override_for_summary: &'static str,
    /// Accepted risk token, if any.
    pub(super) acceptance_token: String,
    /// Whether the elenchus reused a previous passing review.
    pub(super) review_reused: bool,
    /// Cached review hash recorded during token validation.
    pub(super) review_cache_hash: String,
}

impl RiskState {
    /// Creates the default risk state.
    pub(super) fn new() -> Self {
        Self {
            override_for_summary: "disabled",
            ..Self::default()
        }
    }

    /// Returns true when any risky-change reason was detected.
    pub(super) const fn has_reasons(&self) -> bool {
        !self.reasons.is_empty()
    }

    /// Renders reasons for the review prompt.
    pub(super) fn reasons_for_review(&self) -> String {
        if self.reasons.is_empty() {
            String::from("- none")
        } else {
            let mut text = String::new();
            for reason in &self.reasons {
                let _result = writeln!(&mut text, "- {reason}");
            }
            text
        }
    }
}

/// Detects risky-change reasons from changed paths and added lines.
pub(super) fn detect_risk_reasons(changed_files: &str, diff_u0: &str) -> Vec<String> {
    let mut reasons = Vec::new();
    let files = changed_files.lines().collect::<Vec<_>>();

    if files.iter().any(|file| {
        Path::new(file)
            .file_name()
            .and_then(OsStr::to_str)
            .is_some_and(|name| name == "Cargo.toml" || name == "Cargo.lock")
    }) {
        reasons.push(String::from("dependency or lockfile change"));
    }
    if files.iter().any(|file| {
        let path = Path::new(file);
        path.extension().and_then(OsStr::to_str) == Some("sql")
            || path.components().any(|component| {
                component.as_os_str().to_str().is_some_and(|value| {
                    matches!(value, "migration" | "migrations" | "schema" | "db")
                })
            })
    }) {
        reasons.push(String::from("database/schema/migration change"));
    }
    if added_lines(diff_u0).any(|line| contains_word(line, "unsafe")) {
        reasons.push(String::from("new unsafe usage"));
    }
    if added_lines(diff_u0).any(contains_public_api_surface) {
        reasons.push(String::from("public API surface change"));
    }
    if added_lines(diff_u0).any(|line| {
        [
            "auth",
            "token",
            "secret",
            "password",
            "crypto",
            "encrypt",
            "decrypt",
            "signature",
            "credential",
        ]
        .iter()
        .any(|word| contains_word_case_insensitive(line, word))
    }) {
        reasons.push(String::from("security-sensitive code path"));
    }

    reasons
}

/// Iterates added diff lines, excluding file headers.
fn added_lines(diff: &str) -> impl Iterator<Item = &str> {
    diff.lines()
        .filter(|line| line.starts_with('+') && !line.starts_with("+++"))
        .map(|line| &line[1..])
}

/// Returns true when a line adds a public Rust API item.
fn contains_public_api_surface(line: &str) -> bool {
    let words = words(line).collect::<Vec<_>>();
    words.windows(2).any(|window| {
        window[0] == "pub"
            && matches!(
                window[1],
                "trait" | "struct" | "enum" | "fn" | "type" | "mod"
            )
    })
}

/// Returns true when a line contains a case-sensitive identifier word.
fn contains_word(line: &str, needle: &str) -> bool {
    words(line).any(|word| word == needle)
}

/// Returns true when a line contains a case-insensitive identifier word.
fn contains_word_case_insensitive(line: &str, needle: &str) -> bool {
    words(line).any(|word| word.eq_ignore_ascii_case(needle))
}

/// Splits text into identifier-like words.
fn words(line: &str) -> impl Iterator<Item = &str> {
    line.split(|character: char| !character.is_ascii_alphanumeric() && character != '_')
        .filter(|word| !word.is_empty())
}

#[cfg(test)]
mod tests {
    //! Tests for elenchus risk scanning.

    use super::detect_risk_reasons;

    #[test]
    fn risk_scanner_matches_shell_gate_shapes() {
        let changed_files = "Cargo.toml\nsrc/auth/token.rs\ndb/schema.sql\n";
        let diff = "\
diff --git a/src/auth/token.rs b/src/auth/token.rs\n\
@@ -0,0 +1,3 @@\n\
+pub struct Token;\n\
+unsafe fn raw() {}\n\
+let secret = 1;\n";

        let reasons = detect_risk_reasons(changed_files, diff);

        assert_eq!(
            reasons,
            [
                "dependency or lockfile change",
                "database/schema/migration change",
                "new unsafe usage",
                "public API surface change",
                "security-sensitive code path"
            ]
        );
    }
}