use crate::models::{Finding, HealthReport, Severity, Tier};
use anyhow::Result;
use chrono::Utc;
use serde::Serialize;
use std::collections::HashMap;
const SARIF_SCHEMA: &str =
"https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json";
const SARIF_VERSION: &str = "2.1.0";
fn sarif_level(f: &Finding) -> &'static str {
tier_sarif_level(f.tier)
}
fn tier_sarif_level(tier: Tier) -> &'static str {
match tier {
Tier::Blocking => "error",
Tier::Advisory => "warning",
Tier::Deep => "note",
}
}
fn severity_to_security_score(severity: &Severity) -> f64 {
match severity {
Severity::Critical => 9.5,
Severity::High => 7.5,
Severity::Medium => 5.0,
Severity::Low => 2.5,
Severity::Info => 1.0,
}
}
fn path_to_sarif_uri(path: &std::path::Path) -> String {
let raw = path.to_string_lossy();
let mut encoded = String::with_capacity(raw.len());
for b in raw.bytes() {
match b {
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' | b'/' | b':' => {
encoded.push(char::from(b))
}
_ => encoded.push_str(&format!("%{:02X}", b)),
}
}
encoded
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifReport {
#[serde(rename = "$schema")]
schema: String,
version: String,
runs: Vec<SarifRun>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifRun {
tool: SarifTool,
results: Vec<SarifResult>,
invocations: Vec<SarifInvocation>,
#[serde(skip_serializing_if = "Option::is_none")]
original_uri_base_ids: Option<HashMap<String, SarifArtifactLocation>>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifTool {
driver: SarifDriver,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifDriver {
name: String,
version: String,
information_uri: String,
rules: Vec<SarifRule>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifRule {
id: String,
name: String,
short_description: SarifMessage,
full_description: SarifMessage,
default_configuration: SarifConfiguration,
properties: SarifRuleProperties,
help_uri: String,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifConfiguration {
level: String,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifRuleProperties {
tags: Vec<String>,
#[serde(rename = "security-severity")]
security_severity: String,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifResult {
rule_id: String,
level: String,
message: SarifMessage,
#[serde(skip_serializing_if = "Vec::is_empty")]
locations: Vec<SarifLocation>,
fingerprints: HashMap<String, String>,
properties: SarifResultProperties,
#[serde(skip_serializing_if = "Option::is_none")]
rank: Option<f64>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifLocation {
physical_location: SarifPhysicalLocation,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifPhysicalLocation {
artifact_location: SarifArtifactLocation,
#[serde(skip_serializing_if = "Option::is_none")]
region: Option<SarifRegion>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifArtifactLocation {
uri: String,
#[serde(skip_serializing_if = "Option::is_none")]
uri_base_id: Option<String>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifRegion {
start_line: u32,
#[serde(skip_serializing_if = "Option::is_none")]
end_line: Option<u32>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifMessage {
text: String,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifInvocation {
execution_successful: bool,
end_time_utc: String,
#[serde(skip_serializing_if = "Vec::is_empty")]
tool_execution_notifications: Vec<SarifNotification>,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifNotification {
level: String,
message: SarifMessage,
descriptor: SarifDescriptor,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifDescriptor {
id: String,
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifResultProperties {
severity: String,
tier: String,
#[serde(skip_serializing_if = "Option::is_none")]
evidence: Option<crate::models::Evidence>,
#[serde(skip_serializing_if = "Option::is_none")]
confidence: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
suggested_fix: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
estimated_effort: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
category: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
cwe_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
threshold_metadata: Option<std::collections::BTreeMap<String, String>>,
#[serde(skip_serializing_if = "Option::is_none")]
alternative_branch: Option<crate::dual_branch::AlternativeBranch>,
#[serde(skip_serializing_if = "Vec::is_empty")]
prediction_reasons: Vec<crate::dual_branch::PredictionReason>,
}
pub fn render(report: &HealthReport) -> Result<String> {
let sarif = build_sarif(report);
Ok(serde_json::to_string_pretty(&sarif)?)
}
fn build_sarif(report: &HealthReport) -> SarifReport {
let mut findings_by_detector: HashMap<String, Vec<&Finding>> = HashMap::new();
for finding in &report.findings {
findings_by_detector
.entry(finding.detector.clone())
.or_default()
.push(finding);
}
let rules: Vec<SarifRule> = findings_by_detector
.iter()
.map(|(detector, findings)| build_rule(detector, findings))
.collect();
let results: Vec<SarifResult> = report
.findings
.iter()
.enumerate()
.map(|(i, f)| build_result(f, i))
.collect();
let driver = SarifDriver {
name: "Repotoire".to_string(),
version: env!("CARGO_PKG_VERSION").to_string(),
information_uri: "https://repotoire.com".to_string(),
rules,
};
let run = SarifRun {
tool: SarifTool { driver },
results,
invocations: vec![build_invocation(report)],
original_uri_base_ids: None,
};
SarifReport {
schema: SARIF_SCHEMA.to_string(),
version: SARIF_VERSION.to_string(),
runs: vec![run],
}
}
fn build_invocation(report: &HealthReport) -> SarifInvocation {
let message_text = format!(
"Analysis complete. Grade: {}, Score: {:.1}/100",
report.grade, report.overall_score
);
let notification = SarifNotification {
level: "note".to_string(),
message: SarifMessage { text: message_text },
descriptor: SarifDescriptor {
id: "summary".to_string(),
},
};
SarifInvocation {
execution_successful: true,
end_time_utc: Utc::now().to_rfc3339(),
tool_execution_notifications: vec![notification],
}
}
fn build_rule(detector: &str, findings: &[&Finding]) -> SarifRule {
let max_severity = findings
.iter()
.map(|f| &f.severity)
.max()
.unwrap_or(&Severity::Info);
let max_tier = findings
.iter()
.map(|f| f.tier)
.max()
.unwrap_or(Tier::Advisory);
let rule_id = normalize_rule_id(detector);
let name = detector.replace("Detector", "");
let description = get_detector_description(detector);
let tags = get_detector_tags(detector);
SarifRule {
id: rule_id.clone(),
name: name.clone(),
short_description: SarifMessage {
text: format!("Issue detected by {}", name),
},
full_description: SarifMessage { text: description },
default_configuration: SarifConfiguration {
level: tier_sarif_level(max_tier).to_string(),
},
properties: SarifRuleProperties {
tags,
security_severity: format!("{:.1}", severity_to_security_score(max_severity)),
},
help_uri: format!(
"https://repotoire.com/docs/detectors/{}",
rule_id.to_lowercase()
),
}
}
fn build_result(finding: &Finding, index: usize) -> SarifResult {
let rule_id = normalize_rule_id(&finding.detector);
let locations: Vec<SarifLocation> = finding
.affected_files
.iter()
.map(|file| SarifLocation {
physical_location: SarifPhysicalLocation {
artifact_location: SarifArtifactLocation {
uri: path_to_sarif_uri(file),
uri_base_id: Some("%SRCROOT%".to_string()),
},
region: finding.line_start.map(|start| {
let start_line = start.max(1);
SarifRegion {
start_line,
end_line: finding.line_end.map(|end| end.max(start_line)),
}
}),
},
})
.collect();
let mut fingerprints = HashMap::new();
fingerprints.insert(
"repotoire/finding/v1".to_string(),
if finding.id.is_empty() {
format!("finding-{}", index)
} else {
finding.id.clone()
},
);
let rank = finding.confidence.map(|c| (c * 100.0).clamp(0.0, 100.0));
SarifResult {
rule_id,
level: sarif_level(finding).to_string(),
message: SarifMessage {
text: if finding.description.is_empty() {
finding.title.clone()
} else {
finding.description.clone()
},
},
locations,
fingerprints,
properties: SarifResultProperties {
severity: finding.severity.to_string(),
tier: finding.tier.to_string(),
evidence: finding.evidence.clone(),
confidence: finding.confidence,
suggested_fix: finding.suggested_fix.clone(),
estimated_effort: finding.estimated_effort.clone(),
category: finding.category.clone(),
cwe_id: finding.cwe_id.clone(),
threshold_metadata: if finding.threshold_metadata.is_empty() {
None
} else {
Some(finding.threshold_metadata.clone())
},
alternative_branch: finding.alternative_branch.clone(),
prediction_reasons: finding.prediction_reasons.clone(),
},
rank,
}
}
fn normalize_rule_id(detector: &str) -> String {
let name = detector.replace("Detector", "");
let mut result = String::new();
for (i, ch) in name.chars().enumerate() {
if ch.is_uppercase() && i > 0 {
result.push('-');
}
result.push(ch.to_ascii_lowercase());
}
format!("repotoire/{}", result)
}
fn get_detector_description(detector: &str) -> String {
match detector {
"CircularDependencyDetector" => {
"Detects circular import dependencies that can cause import errors and make the codebase harder to maintain.".to_string()
}
"GodClassDetector" => {
"Identifies classes that have grown too large and complex, violating the Single Responsibility Principle.".to_string()
}
"LongParameterListDetector" => {
"Detects functions with too many parameters, which reduces readability and maintainability.".to_string()
}
"DeadCodeDetector" => {
"Graph-based detection of unreachable code that can be safely removed.".to_string()
}
"FeatureEnvyDetector" => {
"Detects methods that use more features from other classes than their own.".to_string()
}
"DataClumpsDetector" => {
"Identifies groups of data that frequently appear together and should be encapsulated.".to_string()
}
"ShotgunSurgeryDetector" => {
"Identifies changes that require modifications in many different places.".to_string()
}
_ => format!("Code analysis performed by {} detector.", detector.replace("Detector", "")),
}
}
fn get_detector_tags(detector: &str) -> Vec<String> {
let security = ["BanditDetector", "SemgrepDetector"];
let quality = ["RuffLintDetector", "MypyDetector", "PylintDetector"];
let complexity = [
"RadonDetector",
"GodClassDetector",
"LongParameterListDetector",
];
let architecture = [
"CircularDependencyDetector",
"FeatureEnvyDetector",
"ShotgunSurgeryDetector",
"DataClumpsDetector",
];
let maintenance = ["DeadCodeDetector", "VultureDetector", "JscpdDetector"];
let mut tags = Vec::new();
if security.contains(&detector) {
tags.extend(["security", "vulnerability"].map(String::from));
}
if quality.contains(&detector) {
tags.extend(["quality", "style"].map(String::from));
}
if complexity.contains(&detector) {
tags.extend(["complexity", "maintainability"].map(String::from));
}
if architecture.contains(&detector) {
tags.extend(["architecture", "design"].map(String::from));
}
if maintenance.contains(&detector) {
tags.extend(["maintenance", "technical-debt"].map(String::from));
}
if tags.is_empty() {
tags.push("code-smell".to_string());
}
tags
}
#[cfg(test)]
mod tests {
use super::*;
use crate::models::Grade;
use std::path::PathBuf;
#[test]
fn test_normalize_rule_id() {
assert_eq!(
normalize_rule_id("CircularDependencyDetector"),
"repotoire/circular-dependency"
);
assert_eq!(normalize_rule_id("GodClassDetector"), "repotoire/god-class");
}
#[test]
fn sarif_level_maps_from_tier() {
let f_block = Finding {
tier: Tier::Blocking,
severity: Severity::Critical,
..Default::default()
};
let f_adv = Finding {
tier: Tier::Advisory,
severity: Severity::High,
..Default::default()
};
let f_deep = Finding {
tier: Tier::Deep,
severity: Severity::Low,
..Default::default()
};
assert_eq!(sarif_level(&f_block), "error");
assert_eq!(sarif_level(&f_adv), "warning");
assert_eq!(sarif_level(&f_deep), "note");
let blocking = Finding {
id: "blk".to_string(),
detector: "CommandInjectionDetector".to_string(),
tier: Tier::Blocking,
severity: Severity::Critical,
confidence: Some(0.97),
affected_files: vec![PathBuf::from("src/app.js")],
line_start: Some(10),
line_end: Some(10),
evidence: Some(crate::models::Evidence::ConfigFact {
span: crate::models::SourceSpan {
file: PathBuf::from("src/app.js"),
line_start: 10,
line_end: 10,
snippet: None,
},
rule: "tls_verify_disabled".to_string(),
}),
..Default::default()
};
let result = build_result(&blocking, 0);
let value = serde_json::to_value(&result).expect("serialize SarifResult");
assert_eq!(value["level"], "error");
assert_eq!(value["properties"]["tier"], "blocking");
assert_eq!(value["properties"]["confidence"].as_f64(), Some(0.97));
assert_eq!(
value["properties"]["evidence"]["config_fact"]["rule"],
"tls_verify_disabled"
);
let advisory = Finding {
id: "adv".to_string(),
detector: "DeepNestingDetector".to_string(),
tier: Tier::Advisory,
severity: Severity::Medium,
affected_files: vec![PathBuf::from("src/app.js")],
..Default::default()
};
let result = build_result(&advisory, 1);
let value = serde_json::to_value(&result).expect("serialize SarifResult");
assert_eq!(value["level"], "warning");
assert_eq!(value["properties"]["tier"], "advisory");
assert!(value["properties"].get("evidence").is_none());
}
#[test]
fn test_path_to_sarif_uri_encodes_reserved_characters() {
let path = PathBuf::from("repotoire/web/src/app/(marketing)/blog/[slug]/page.tsx");
assert_eq!(
path_to_sarif_uri(&path),
"repotoire/web/src/app/%28marketing%29/blog/%5Bslug%5D/page.tsx"
);
}
#[test]
fn test_confidence_to_rank() {
let high_conf_finding = Finding {
id: "test-1".to_string(),
detector: "TestDetector".to_string(),
severity: Severity::High,
title: "High confidence finding".to_string(),
description: "Test".to_string(),
affected_files: vec![PathBuf::from("test.py")],
line_start: Some(10),
line_end: Some(20),
confidence: Some(0.95),
..Default::default()
};
let result = build_result(&high_conf_finding, 0);
assert_eq!(result.rank, Some(95.0));
let med_conf_finding = Finding {
confidence: Some(0.7),
..high_conf_finding.clone()
};
let result = build_result(&med_conf_finding, 1);
assert_eq!(result.rank, Some(70.0));
let no_conf_finding = Finding {
confidence: None,
..high_conf_finding.clone()
};
let result = build_result(&no_conf_finding, 2);
assert_eq!(result.rank, None);
let over_conf_finding = Finding {
confidence: Some(1.5),
..high_conf_finding.clone()
};
let result = build_result(&over_conf_finding, 3);
assert_eq!(result.rank, Some(100.0));
let neg_conf_finding = Finding {
confidence: Some(-0.1),
..high_conf_finding
};
let result = build_result(&neg_conf_finding, 4);
assert_eq!(result.rank, Some(0.0));
}
#[test]
fn test_sarif_valid_structure() {
let report = crate::reporters::tests::test_report();
let sarif_str = render(&report).expect("render SARIF");
let parsed: serde_json::Value = serde_json::from_str(&sarif_str).expect("parse SARIF JSON");
assert_eq!(parsed["version"], "2.1.0");
assert!(parsed["$schema"].as_str().is_some());
assert!(!parsed["runs"].as_array().expect("runs array").is_empty());
}
#[test]
fn test_sarif_has_results() {
let report = crate::reporters::tests::test_report();
let sarif_str = render(&report).expect("render SARIF");
let parsed: serde_json::Value = serde_json::from_str(&sarif_str).expect("parse SARIF JSON");
let results = parsed["runs"][0]["results"]
.as_array()
.expect("results array");
assert!(!results.is_empty());
}
#[test]
fn test_rank_in_sarif_output() {
let report = HealthReport {
overall_score: 85.0,
grade: Grade::B,
structure_score: 90.0,
quality_score: 80.0,
architecture_score: Some(85.0),
findings: vec![Finding {
id: "test-sarif".to_string(),
detector: "SecurityDetector".to_string(),
severity: Severity::High,
title: "Security issue".to_string(),
description: "Potential vulnerability".to_string(),
affected_files: vec![PathBuf::from("src/main.py")],
line_start: Some(42),
line_end: Some(42),
confidence: Some(0.85),
..Default::default()
}],
findings_summary: crate::models::FindingsSummary::default(),
total_files: 10,
total_functions: 50,
total_classes: 5,
total_loc: 5000,
suppression_events: Vec::new(),
suppressed_unaccounted_blocking_count: 0,
};
let sarif_json = render(&report).expect("SARIF render should succeed");
assert!(
sarif_json.contains("\"rank\": 85.0"),
"SARIF output should contain rank: 85.0"
);
}
#[test]
fn test_suggested_fix_stays_in_properties_not_sarif_fixes() {
let report = HealthReport {
overall_score: 80.0,
grade: Grade::B,
structure_score: 80.0,
quality_score: 80.0,
architecture_score: Some(80.0),
findings: vec![Finding {
id: "test-fix".to_string(),
detector: "SecurityDetector".to_string(),
severity: Severity::High,
title: "Security issue".to_string(),
description: "Potential vulnerability".to_string(),
affected_files: vec![PathBuf::from("src/main.py")],
line_start: Some(7),
line_end: Some(7),
suggested_fix: Some("Replace string formatting with parameters".to_string()),
..Default::default()
}],
findings_summary: crate::models::FindingsSummary::default(),
total_files: 1,
total_functions: 1,
total_classes: 0,
total_loc: 10,
suppression_events: Vec::new(),
suppressed_unaccounted_blocking_count: 0,
};
let sarif_str = render(&report).expect("render SARIF");
let parsed: serde_json::Value = serde_json::from_str(&sarif_str).expect("parse SARIF JSON");
let result = &parsed["runs"][0]["results"][0];
assert_eq!(
result["properties"]["suggestedFix"],
"Replace string formatting with parameters"
);
assert!(
result.get("fixes").is_none(),
"SARIF should not emit invalid machine-applicable fixes for free-text suggestions"
);
}
fn report_with(finding: Finding) -> HealthReport {
HealthReport {
overall_score: 80.0,
grade: Grade::B,
structure_score: 80.0,
quality_score: 80.0,
architecture_score: Some(80.0),
findings: vec![finding],
findings_summary: crate::models::FindingsSummary::default(),
total_files: 1,
total_functions: 1,
total_classes: 0,
total_loc: 10,
suppression_events: Vec::new(),
suppressed_unaccounted_blocking_count: 0,
}
}
#[test]
fn test_sarif_omits_dual_branch_keys_when_empty() {
let report = report_with(Finding {
id: "single-branch".to_string(),
detector: "TestDetector".to_string(),
severity: Severity::Medium,
title: "ordinary finding".to_string(),
description: "no dual-branch payload".to_string(),
affected_files: vec![PathBuf::from("src/main.py")],
..Default::default()
});
let sarif_str = render(&report).expect("render SARIF");
let parsed: serde_json::Value = serde_json::from_str(&sarif_str).expect("parse SARIF JSON");
let props = &parsed["runs"][0]["results"][0]["properties"];
assert!(
props.get("alternativeBranch").is_none(),
"single-branch finding should not emit alternativeBranch"
);
assert!(
props.get("predictionReasons").is_none(),
"single-branch finding should not emit predictionReasons"
);
}
#[test]
fn test_sarif_surfaces_dual_branch_payload_under_properties() {
use crate::dual_branch::{
AlternativeBranch, BranchLabel, PredictionReason, PredictionReasonKind,
};
let report = report_with(Finding {
id: "dual-branch".to_string(),
detector: "InsecureTlsDetector".to_string(),
severity: Severity::High,
title: "TLS verification disabled".to_string(),
description: "verify=False on a TLS call".to_string(),
affected_files: vec![PathBuf::from("src/client.py")],
line_start: Some(42),
line_end: Some(42),
alternative_branch: Some(AlternativeBranch {
label: BranchLabel::Benign,
severity: Severity::Info,
title: "Intentional test bypass".to_string(),
description: "Likely a test helper".to_string(),
suggested_fix: Some("Annotate with repotoire:ignore".to_string()),
}),
prediction_reasons: vec![
PredictionReason {
kind: PredictionReasonKind::KeywordArgument {
name: "verify".to_string(),
value: "False".to_string(),
},
weight: -0.5,
note: "verify=False leans RealBug".to_string(),
},
PredictionReason {
kind: PredictionReasonKind::TestFixtureFile,
weight: 0.3,
note: "file path looks like a test fixture".to_string(),
},
],
..Default::default()
});
let sarif_str = render(&report).expect("render SARIF");
let parsed: serde_json::Value = serde_json::from_str(&sarif_str).expect("parse SARIF JSON");
let props = &parsed["runs"][0]["results"][0]["properties"];
let alt = &props["alternativeBranch"];
assert!(
alt.is_object(),
"alternativeBranch should be present as an object"
);
assert_eq!(alt["label"], "benign");
assert_eq!(alt["severity"], "info");
assert_eq!(alt["title"], "Intentional test bypass");
assert_eq!(
alt["suggested_fix"], "Annotate with repotoire:ignore",
"AlternativeBranch fields keep snake_case (its own struct does not opt into camelCase)"
);
let reasons = props["predictionReasons"]
.as_array()
.expect("predictionReasons is an array");
assert_eq!(reasons.len(), 2);
assert_eq!(reasons[0]["kind"], "keyword_argument");
assert_eq!(reasons[0]["name"], "verify");
assert_eq!(reasons[0]["value"], "False");
assert_eq!(reasons[0]["weight"].as_f64().unwrap(), -0.5);
assert_eq!(reasons[1]["kind"], "test_fixture_file");
}
}