keyhog-core 0.2.0

Core types, traits, and detector specs for the secret scanner
Documentation
//! Report formatters: text, JSON, JSONL, and SARIF output for scanner findings.

/// Animated ASCII-art banner with true-color gradient rendering.
pub mod banner;
mod json;
mod sarif;
mod text;

use std::collections::HashMap;
use std::sync::{OnceLock, RwLock};

use thiserror::Error;

pub use json::{JsonReporter, JsonlReporter};
pub use sarif::SarifReporter;
pub use text::TextReporter;

/// Errors emitted while writing scanner reports.
///
/// # Examples
///
/// ```rust
/// use keyhog_core::ReportError;
///
/// let error = ReportError::Io(std::io::Error::other("disk full"));
/// assert!(error.to_string().contains("Fix"));
/// ```
#[derive(Debug, Error)]
pub enum ReportError {
    #[error("failed to write report: {0}. Fix: choose a writable output path or write to stdout")]
    Io(#[from] std::io::Error),
    #[error(
        "failed to serialize report: {0}. Fix: switch to a simpler format or report this as a serialization bug"
    )]
    Serialize(#[from] serde_json::Error),
}

/// Trait implemented by all finding reporters.
///
/// # Examples
///
/// ```rust
/// use keyhog_core::{JsonlReporter, Reporter};
///
/// let mut out = Vec::new();
/// let mut reporter = JsonlReporter::new(&mut out);
/// reporter.finish().unwrap();
/// ```
pub trait Reporter {
    /// Emit one finding into the report stream.
    fn report(&mut self, finding: &crate::VerifiedFinding) -> Result<(), ReportError>;
    /// Flush and finalize the report output.
    fn finish(&mut self) -> Result<(), ReportError>;
}

/// Factory used to build dynamically registered reporters.
///
/// # Examples
///
/// ```rust
/// use keyhog_core::{JsonReporter, ReporterFactory};
///
/// let _factory: ReporterFactory = Box::new(|writer| Box::new(JsonReporter::new(writer)));
/// ```
pub type ReporterFactory =
    Box<dyn Fn(Box<dyn std::io::Write + Send + 'static>) -> Box<dyn Reporter> + Send + Sync>;

static REPORTER_REGISTRY: OnceLock<RwLock<HashMap<String, ReporterFactory>>> = OnceLock::new();

/// Register a named reporter factory for custom output formats.
///
/// # Examples
///
/// ```rust
/// use keyhog_core::{JsonReporter, register_reporter};
///
/// register_reporter("json-copy", Box::new(|writer| Box::new(JsonReporter::new(writer))));
/// ```
pub fn register_reporter(name: &str, factory: ReporterFactory) {
    let Ok(mut registry) = REPORTER_REGISTRY
        .get_or_init(|| RwLock::new(HashMap::new()))
        .write()
    else {
        tracing::error!("failed to access reporter registry: cannot register '{name}'");
        return;
    };
    registry.insert(name.to_string(), factory);
}

/// Build a previously registered custom reporter by name.
///
/// # Examples
///
/// ```rust
/// use keyhog_core::{JsonReporter, make_custom_reporter, register_reporter};
///
/// register_reporter("json-copy-lookup", Box::new(|writer| Box::new(JsonReporter::new(writer))));
/// let reporter = make_custom_reporter("json-copy-lookup", Box::new(Vec::new()));
/// assert!(reporter.is_some());
/// ```
pub fn make_custom_reporter(
    name: &str,
    w: Box<dyn std::io::Write + Send + 'static>,
) -> Option<Box<dyn Reporter>> {
    let Ok(registry) = REPORTER_REGISTRY
        .get_or_init(|| RwLock::new(HashMap::new()))
        .read()
    else {
        tracing::error!("failed to access reporter registry: cannot look up '{name}'");
        return None;
    };
    registry.get(name).map(|factory| factory(w))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{MatchLocation, Severity, VerificationResult, VerifiedFinding};
    use std::collections::HashMap;

    fn sample_finding() -> VerifiedFinding {
        VerifiedFinding {
            detector_id: "slack-bot-token".into(),
            detector_name: "Slack Bot Token".into(),
            service: "slack".into(),
            severity: Severity::Critical,
            credential_redacted: "xoxb***************".into(),
            location: MatchLocation {
                source: "filesystem".into(),
                file_path: Some("config.py".into()),
                line: Some(42),
                offset: 0,
                commit: None,
                author: None,
                date: None,
            },
            verification: VerificationResult::Live,
            metadata: HashMap::from([("team".into(), "acme".into())]),
            additional_locations: vec![],
            confidence: Some(0.85),
        }
    }

    #[test]
    fn text_reporter_output() {
        let mut buf = Vec::new();
        let mut reporter = TextReporter::new(&mut buf);
        reporter.report(&sample_finding()).unwrap();
        reporter.finish().unwrap();
        let output = String::from_utf8(buf).unwrap();
        assert!(output.contains("LIVE"));
        assert!(output.contains("Slack Bot Token"));
        assert!(output.contains("config.py:42"));
    }

    #[test]
    fn jsonl_reporter_output() {
        let mut buf = Vec::new();
        let mut reporter = JsonlReporter::new(&mut buf);
        reporter.report(&sample_finding()).unwrap();
        reporter.finish().unwrap();
        let output = String::from_utf8(buf).unwrap();
        let parsed: serde_json::Value = serde_json::from_str(output.trim()).unwrap();
        assert_eq!(parsed["service"], "slack");
    }

    #[test]
    fn sarif_reporter_basic_structure() {
        let mut buf = Vec::new();
        let mut reporter = SarifReporter::new(&mut buf);
        reporter.report(&sample_finding()).unwrap();
        reporter.finish().unwrap();
        let output = String::from_utf8(buf).unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();

        assert_eq!(parsed["version"], "2.1.0");
        assert!(
            parsed["$schema"]
                .as_str()
                .unwrap()
                .contains("sarif-schema-2.1.0.json")
        );

        let runs = parsed["runs"].as_array().unwrap();
        assert_eq!(runs.len(), 1);

        let tool = &runs[0]["tool"]["driver"];
        assert_eq!(tool["name"], "keyhog");
        assert!(tool["version"].is_string());

        let rules = tool["rules"].as_array().unwrap();
        assert_eq!(rules.len(), 1);
        assert_eq!(rules[0]["id"], "slack-bot-token");
        assert_eq!(rules[0]["name"], "Slack Bot Token");
        assert!(rules[0]["properties"]["service"].is_string());

        let results = runs[0]["results"].as_array().unwrap();
        assert_eq!(results.len(), 1);
        assert_eq!(results[0]["ruleId"], "slack-bot-token");
        assert_eq!(results[0]["level"], "error");
        assert!(
            results[0]["message"]["text"]
                .as_str()
                .unwrap()
                .contains("slack")
        );

        let location = &results[0]["locations"][0];
        assert_eq!(
            location["physicalLocation"]["artifactLocation"]["uri"],
            "config.py"
        );
        assert_eq!(location["physicalLocation"]["region"]["startLine"], 42);

        let props = &results[0]["properties"];
        assert_eq!(props["verification"], "live");
        assert_eq!(props["confidence"], 0.85);
        assert_eq!(props["metadata.team"], "acme");
    }

    #[test]
    fn sarif_reporter_severity_mapping() {
        let severities = vec![
            (Severity::Critical, "error"),
            (Severity::High, "error"),
            (Severity::Medium, "warning"),
            (Severity::Low, "note"),
            (Severity::Info, "note"),
        ];

        for (sev, expected_level) in severities {
            let mut finding = sample_finding();
            finding.severity = sev;
            finding.detector_id = format!("test-{}", expected_level);

            let mut buf = Vec::new();
            let mut reporter = SarifReporter::new(&mut buf);
            reporter.report(&finding).unwrap();
            reporter.finish().unwrap();

            let output = String::from_utf8(buf).unwrap();
            let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
            let results = parsed["runs"][0]["results"].as_array().unwrap();
            assert_eq!(
                results[0]["level"], expected_level,
                "severity {:?} should map to level {}",
                sev, expected_level
            );
        }
    }

    #[test]
    fn sarif_reporter_multiple_findings() {
        let mut buf = Vec::new();
        let mut reporter = SarifReporter::new(&mut buf);

        let finding1 = sample_finding();
        let mut finding2 = sample_finding();
        finding2.detector_id = "github-token".into();
        finding2.detector_name = "GitHub Token".into();
        finding2.service = "github".into();
        finding2.location.file_path = Some(".env".into());
        finding2.location.line = Some(10);

        reporter.report(&finding1).unwrap();
        reporter.report(&finding2).unwrap();
        reporter.finish().unwrap();

        let output = String::from_utf8(buf).unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();

        let rules = parsed["runs"][0]["tool"]["driver"]["rules"]
            .as_array()
            .unwrap();
        assert_eq!(rules.len(), 2);

        let results = parsed["runs"][0]["results"].as_array().unwrap();
        assert_eq!(results.len(), 2);
    }

    #[test]
    fn sarif_reporter_git_location() {
        let mut finding = sample_finding();
        finding.location.commit = Some("abc123".into());
        finding.location.author = Some("developer".into());
        finding.location.date = Some("2026-03-20T12:00:00Z".into());

        let mut buf = Vec::new();
        let mut reporter = SarifReporter::new(&mut buf);
        reporter.report(&finding).unwrap();
        reporter.finish().unwrap();

        let output = String::from_utf8(buf).unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();

        let location = &parsed["runs"][0]["results"][0]["locations"][0];
        let logical_locs = location["logicalLocations"].as_array().unwrap();

        assert_eq!(logical_locs.len(), 3);
        assert_eq!(logical_locs[0]["kind"], "commit");
        assert_eq!(logical_locs[0]["name"], "abc123");
        assert_eq!(logical_locs[1]["kind"], "author");
        assert_eq!(logical_locs[1]["name"], "developer");
        assert_eq!(logical_locs[2]["kind"], "date");
        assert_eq!(logical_locs[2]["name"], "2026-03-20T12:00:00Z");
    }

    #[test]
    fn sarif_reporter_related_locations() {
        let mut finding = sample_finding();
        finding.additional_locations = vec![MatchLocation {
            source: "filesystem".into(),
            file_path: Some("backup.py".into()),
            line: Some(100),
            offset: 0,
            commit: None,
            author: None,
            date: None,
        }];

        let mut buf = Vec::new();
        let mut reporter = SarifReporter::new(&mut buf);
        reporter.report(&finding).unwrap();
        reporter.finish().unwrap();

        let output = String::from_utf8(buf).unwrap();
        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();

        let related = parsed["runs"][0]["results"][0]["relatedLocations"]
            .as_array()
            .unwrap();
        assert_eq!(related.len(), 1);
        assert_eq!(
            related[0]["physicalLocation"]["artifactLocation"]["uri"],
            "backup.py"
        );
        assert_eq!(related[0]["physicalLocation"]["region"]["startLine"], 100);
    }
}