opencodecommit 1.5.0

AI-powered git commit message generator that delegates to terminal AI agents
Documentation
use std::path::Path;

use serde::Deserialize;
use serde_json::{Value, json};

use crate::context::extract_changed_file_paths;
use crate::git;
use crate::sensitive::{
    SensitiveAllowlistEntry, SensitiveEnforcement, SensitiveFinding, SensitiveReport,
    is_blocking_finding, scan_diff_for_sensitive_content_with_options,
};
use crate::{Error, Result};

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ScanFormat {
    Text,
    Json,
    Sarif,
    GithubAnnotations,
}

#[derive(Debug, Clone)]
pub struct ScanResult {
    pub report: SensitiveReport,
    pub scanned_files: usize,
}

#[derive(Debug, Deserialize)]
struct AllowlistWrapper {
    #[serde(default)]
    allowlist: Vec<SensitiveAllowlistEntry>,
    #[serde(default)]
    sensitive: AllowlistSensitiveWrapper,
}

#[derive(Debug, Default, Deserialize)]
struct AllowlistSensitiveWrapper {
    #[serde(default)]
    allowlist: Vec<SensitiveAllowlistEntry>,
}

pub fn run_scan(
    diff: &str,
    changed_files: &[String],
    enforcement: SensitiveEnforcement,
    allowlist: &[SensitiveAllowlistEntry],
) -> ScanResult {
    let report =
        scan_diff_for_sensitive_content_with_options(diff, changed_files, enforcement, allowlist);
    ScanResult {
        report,
        scanned_files: changed_files.len(),
    }
}

pub fn load_allowlist_file(path: &Path) -> Result<Vec<SensitiveAllowlistEntry>> {
    let content = std::fs::read_to_string(path).map_err(|err| {
        Error::Config(format!(
            "failed to read allowlist file {}: {err}",
            path.display()
        ))
    })?;
    let wrapper: AllowlistWrapper = toml::from_str(&content)
        .map_err(|err| Error::Config(format!("failed to parse allowlist file: {err}")))?;
    if !wrapper.allowlist.is_empty() {
        Ok(wrapper.allowlist)
    } else {
        Ok(wrapper.sensitive.allowlist)
    }
}

pub fn format_json(report: &SensitiveReport) -> Value {
    json!({
        "findings": report.findings,
        "enforcement": report.enforcement,
        "warning_count": report.warning_count,
        "blocking_count": report.blocking_count,
        "has_findings": report.has_findings,
        "has_blocking_findings": report.has_blocking_findings,
    })
}

pub fn format_sarif(report: &SensitiveReport) -> Value {
    let results = report
        .findings
        .iter()
        .map(|finding| sarif_result(finding, report.enforcement))
        .collect::<Vec<_>>();
    json!({
        "version": "2.1.0",
        "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
        "runs": [{
            "tool": {
                "driver": {
                    "name": "OpenCodeCommit",
                    "informationUri": "https://github.com/Nevaberry/opencodecommit",
                }
            },
            "results": results,
        }],
    })
}

pub fn format_github_annotations(report: &SensitiveReport) -> String {
    report
        .findings
        .iter()
        .map(|finding| {
            let level = if is_blocking_finding(finding, report.enforcement) {
                "error"
            } else {
                "warning"
            };
            let file = finding.file_path.replace(',', "%2C");
            let message = format!(
                "{} [{} / {}] {}",
                finding.category,
                format!("{:?}", finding.tier),
                finding.rule,
                finding.preview
            )
            .replace('\n', " ");
            match finding.line_number {
                Some(line) => format!("::{level} file={file},line={line}::{message}"),
                None => format!("::{level} file={file}::{message}"),
            }
        })
        .collect::<Vec<_>>()
        .join("\n")
}

pub fn format_text(report: &SensitiveReport) -> String {
    report.format_occ_commit_message()
}

pub fn changed_files_from_diff(diff: &str) -> Vec<String> {
    extract_changed_file_paths(diff)
}

pub fn read_git_diff(
    repo_root: &Path,
    source: crate::config::DiffSource,
) -> Result<(String, Vec<String>)> {
    let diff = git::get_diff(source, repo_root)?;
    let changed_files = changed_files_from_diff(&diff);
    Ok((diff, changed_files))
}

fn sarif_result(finding: &SensitiveFinding, enforcement: SensitiveEnforcement) -> Value {
    let level = if is_blocking_finding(finding, enforcement) {
        "error"
    } else {
        "warning"
    };
    let mut result = json!({
        "ruleId": finding.rule,
        "level": level,
        "message": {
            "text": format!("{} [{}] {}", finding.category, finding.file_path, finding.preview)
        },
        "locations": [{
            "physicalLocation": {
                "artifactLocation": {
                    "uri": finding.file_path,
                }
            }
        }],
    });
    if let Some(line_number) = finding.line_number {
        result["locations"][0]["physicalLocation"]["region"] = json!({ "startLine": line_number });
    }
    result
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::sensitive::{SensitiveSeverity, SensitiveTier};

    fn env_diff() -> String {
        r#"diff --git a/.env b/.env
new file mode 100644
index 0000000..1111111
--- /dev/null
+++ b/.env
@@ -0,0 +1 @@
+OPENAI_API_KEY=sk-proj-live-secret-1234567890abcdefghijklmnopqrstuvwxyz
"#
        .to_owned()
    }

    fn sample_report(enforcement: SensitiveEnforcement) -> SensitiveReport {
        SensitiveReport::from_findings_with_enforcement(
            vec![SensitiveFinding {
                category: "token",
                rule: "OPENAI_API_KEY",
                file_path: ".env".to_owned(),
                line_number: Some(1),
                preview: "OPENAI_API_KEY=sk-proj-live-secret".to_owned(),
                tier: SensitiveTier::ConfirmedSecret,
                severity: SensitiveSeverity::Block,
            }],
            enforcement,
        )
    }

    #[test]
    fn run_scan_respects_enforcement() {
        let diff = env_diff();
        let changed_files = changed_files_from_diff(&diff);

        let warn = run_scan(&diff, &changed_files, SensitiveEnforcement::Warn, &[]);
        assert_eq!(warn.scanned_files, 1);
        assert!(warn.report.has_findings());
        assert!(!warn.report.has_blocking_findings());

        let blocking = run_scan(&diff, &changed_files, SensitiveEnforcement::BlockHigh, &[]);
        assert!(blocking.report.has_blocking_findings());
        assert!(blocking.report.blocking_count > 0);
    }

    #[test]
    fn load_allowlist_file_supports_root_and_nested_tables() {
        let temp_root = std::env::temp_dir().join(format!(
            "occ-scan-allowlist-{}-{}",
            std::process::id(),
            std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .unwrap()
                .as_nanos()
        ));
        std::fs::create_dir_all(&temp_root).unwrap();

        let root_path = temp_root.join("root.toml");
        std::fs::write(
            &root_path,
            r#"
allowlist = [
  { rule = "OPENAI_API_KEY", path-regex = "\\.env$" }
]
"#,
        )
        .unwrap();
        let root_entries = load_allowlist_file(&root_path).unwrap();
        assert_eq!(root_entries.len(), 1);
        assert_eq!(root_entries[0].rule.as_deref(), Some("OPENAI_API_KEY"));

        let nested_path = temp_root.join("nested.toml");
        std::fs::write(
            &nested_path,
            r#"
[sensitive]
allowlist = [
  { path-regex = "\\.map$", value-regex = "localhost" }
]
"#,
        )
        .unwrap();
        let nested_entries = load_allowlist_file(&nested_path).unwrap();
        assert_eq!(nested_entries.len(), 1);
        assert_eq!(nested_entries[0].path_regex.as_deref(), Some("\\.map$"));

        let _ = std::fs::remove_dir_all(temp_root);
    }

    #[test]
    fn formatters_emit_machine_readable_outputs() {
        let report = sample_report(SensitiveEnforcement::BlockHigh);

        let json = format_json(&report);
        assert_eq!(json["blocking_count"], 1);
        assert_eq!(json["findings"][0]["file_path"], ".env");

        let sarif = format_sarif(&report);
        assert_eq!(sarif["version"], "2.1.0");
        assert_eq!(sarif["runs"][0]["results"][0]["level"], "error");
        assert_eq!(
            sarif["runs"][0]["results"][0]["locations"][0]["physicalLocation"]["artifactLocation"]
                ["uri"],
            ".env"
        );

        let annotations = format_github_annotations(&report);
        assert!(annotations.contains("::error file=.env,line=1::"));
        assert!(annotations.contains("OPENAI_API_KEY=sk-proj-live-secret"));
    }
}