use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use serde::Serialize;
use droidsaw_common::finding::{
Confidence, Finding, FindingProvenance, GaugeClass, Layer, Severity,
};
const SARIF_SCHEMA: &str = "https://json.schemastore.org/sarif-2.1.0.json";
const SARIF_VERSION: &str = "2.1.0";
const TOOL_NAME: &str = "droidsaw";
const CWE_COMPONENT_NAME: &str = "CWE";
#[derive(Debug, Serialize)]
pub struct Sarif {
#[serde(rename = "$schema")]
schema: &'static str,
version: &'static str,
runs: Vec<Run>,
}
#[derive(Debug, Serialize)]
struct Run {
tool: Tool,
results: Vec<SarifResult>,
#[serde(skip_serializing_if = "Vec::is_empty")]
taxonomies: Vec<ToolComponent>,
}
#[derive(Debug, Serialize)]
struct Tool {
driver: Driver,
}
#[derive(Debug, Serialize)]
struct Driver {
name: &'static str,
version: &'static str,
rules: Vec<ReportingDescriptor>,
}
#[derive(Debug, Serialize)]
struct ReportingDescriptor {
id: String,
name: String,
#[serde(rename = "shortDescription")]
short_description: Message,
#[serde(rename = "fullDescription")]
full_description: Message,
#[serde(rename = "defaultConfiguration")]
default_configuration: RuleConfiguration,
properties: RuleProperties,
}
#[derive(Debug, Serialize)]
struct RuleConfiguration {
level: &'static str,
enabled: bool,
}
#[derive(Debug, Serialize)]
struct RuleProperties {
precision: &'static str,
tags: Vec<String>,
#[serde(rename = "security-severity")]
security_severity: String,
}
#[derive(Debug, Serialize)]
struct ToolComponent {
name: &'static str,
taxa: Vec<TaxonItem>,
}
#[derive(Debug, Serialize)]
struct TaxonItem {
id: String,
}
#[derive(Debug, Serialize)]
struct SarifResult {
#[serde(rename = "ruleId")]
rule_id: String,
#[serde(rename = "ruleIndex")]
rule_index: usize,
level: &'static str,
kind: &'static str,
rank: u32,
message: Message,
#[serde(skip_serializing_if = "Vec::is_empty")]
locations: Vec<Location>,
#[serde(rename = "codeFlows", skip_serializing_if = "Vec::is_empty")]
code_flows: Vec<CodeFlow>,
#[serde(skip_serializing_if = "Vec::is_empty")]
taxa: Vec<TaxonReference>,
#[serde(skip_serializing_if = "Vec::is_empty")]
suppressions: Vec<Suppression>,
#[serde(rename = "partialFingerprints")]
partial_fingerprints: PartialFingerprints,
properties: ResultProperties,
}
#[derive(Debug, Serialize)]
struct PartialFingerprints {
#[serde(rename = "droidsawFindingId/v1")]
droidsaw_finding_id: String,
}
#[derive(Debug, Serialize)]
struct ResultProperties {
gauge_class: String,
confidence: String,
source: String,
layer: String,
}
#[derive(Debug, Serialize)]
struct Suppression {
kind: &'static str,
justification: String,
}
#[derive(Debug, Serialize)]
struct TaxonReference {
id: String,
#[serde(rename = "toolComponent")]
tool_component: ToolComponentReference,
}
#[derive(Debug, Serialize)]
struct ToolComponentReference {
name: &'static str,
}
#[derive(Debug, Serialize)]
struct Location {
#[serde(rename = "physicalLocation")]
physical_location: PhysicalLocation,
#[serde(rename = "logicalLocations", skip_serializing_if = "Vec::is_empty")]
logical_locations: Vec<LogicalLocation>,
}
#[derive(Debug, Serialize)]
struct PhysicalLocation {
#[serde(rename = "artifactLocation")]
artifact_location: ArtifactLocation,
#[serde(skip_serializing_if = "Option::is_none")]
region: Option<Region>,
}
#[derive(Debug, Serialize)]
struct ArtifactLocation {
uri: String,
}
#[derive(Debug, Serialize)]
struct Region {
#[serde(rename = "byteOffset")]
byte_offset: u64,
}
#[derive(Debug, Serialize)]
struct LogicalLocation {
#[serde(rename = "fullyQualifiedName")]
fully_qualified_name: String,
}
#[derive(Debug, Serialize)]
struct CodeFlow {
#[serde(rename = "threadFlows")]
thread_flows: Vec<ThreadFlow>,
}
#[derive(Debug, Serialize)]
struct ThreadFlow {
locations: Vec<ThreadFlowLocation>,
}
#[derive(Debug, Serialize)]
struct ThreadFlowLocation {
location: ThreadFlowInnerLocation,
}
#[derive(Debug, Serialize)]
struct ThreadFlowInnerLocation {
message: Message,
#[serde(rename = "physicalLocation")]
physical_location: PhysicalLocation,
}
#[derive(Debug, Serialize)]
struct Message {
text: String,
}
impl Message {
fn new(text: impl Into<String>) -> Self {
Self { text: text.into() }
}
}
fn severity_level(severity: Severity) -> &'static str {
match severity {
Severity::Critical | Severity::High => "error",
Severity::Medium => "warning",
Severity::Low => "note",
Severity::Info => "none",
}
}
fn severity_rank(severity: Severity) -> u32 {
match severity {
Severity::Critical => 100,
Severity::High => 80,
Severity::Medium => 50,
Severity::Low => 20,
Severity::Info => 0,
}
}
fn security_severity(severity: Severity) -> String {
let score = match severity {
Severity::Critical => "9.5",
Severity::High => "8.0",
Severity::Medium => "5.0",
Severity::Low => "3.0",
Severity::Info => "0.0",
};
score.to_string()
}
fn precision(confidence: Confidence, gauge_class: GaugeClass) -> &'static str {
match (confidence, gauge_class) {
(Confidence::Verified, GaugeClass::Cryptographic) => "very-high",
(Confidence::Verified, GaugeClass::Representational) => "high",
(Confidence::Verified, GaugeClass::Semantic) => "high",
(Confidence::Unverified, GaugeClass::Semantic) => "medium",
_ => "medium",
}
}
fn rule_enabled(precision: &str) -> bool {
matches!(precision, "high" | "very-high")
}
fn result_kind(confidence: Confidence) -> &'static str {
match confidence {
Confidence::Verified => "pass",
Confidence::Confirmed | Confidence::Unverified | Confidence::Dismissed => "fail",
}
}
fn layer_uri(layer: Layer) -> String {
match layer {
Layer::Apk => "apk://".to_string(),
Layer::Dex => "dex://".to_string(),
Layer::Hbc => "hbc://".to_string(),
Layer::Native => "native://".to_string(),
}
}
fn artifact_uri(finding: &Finding) -> String {
match finding.file_path.as_deref() {
Some(path) if !path.is_empty() => path.to_string(),
_ => layer_uri(finding.layer),
}
}
fn finding_fingerprint(finding: &Finding) -> String {
let mut hasher = DefaultHasher::new();
finding.id.hash(&mut hasher);
finding.file_path.hash(&mut hasher);
finding.byte_offset.hash(&mut hasher);
finding.func_id.hash(&mut hasher);
format!("{:016x}", hasher.finish())
}
fn physical_location(finding: &Finding) -> PhysicalLocation {
let region = finding.byte_offset.map(|byte_offset| Region { byte_offset });
PhysicalLocation {
artifact_location: ArtifactLocation { uri: artifact_uri(finding) },
region,
}
}
fn code_flow(provenance: &FindingProvenance) -> CodeFlow {
let source_location = ThreadFlowLocation {
location: ThreadFlowInnerLocation {
message: Message::new(format!("source: {}", provenance.source_layer.as_str())),
physical_location: PhysicalLocation {
artifact_location: ArtifactLocation {
uri: layer_uri(provenance.source_layer),
},
region: None,
},
},
};
let sink_location = ThreadFlowLocation {
location: ThreadFlowInnerLocation {
message: Message::new(format!(
"sink: {} via {}",
provenance.sink_layer.as_str(),
provenance.bridge
)),
physical_location: PhysicalLocation {
artifact_location: ArtifactLocation {
uri: layer_uri(provenance.sink_layer),
},
region: None,
},
},
};
CodeFlow {
thread_flows: vec![ThreadFlow {
locations: vec![source_location, sink_location],
}],
}
}
fn result_message(finding: &Finding) -> Message {
match finding.extra.as_deref() {
Some(extra) if !extra.is_empty() => {
Message::new(format!("{} ({})", finding.detail, extra))
}
_ => Message::new(finding.detail.clone()),
}
}
fn build_rule(finding: &Finding) -> ReportingDescriptor {
let precision_label = precision(finding.confidence, finding.gauge_class);
ReportingDescriptor {
id: finding.id.clone(),
name: finding.id.clone(),
short_description: Message::new(finding.id.clone()),
full_description: Message::new(finding.detail.clone()),
default_configuration: RuleConfiguration {
level: severity_level(finding.severity),
enabled: rule_enabled(precision_label),
},
properties: RuleProperties {
precision: precision_label,
tags: vec![
finding.gauge_class.as_str().to_string(),
finding.source.as_str().to_string(),
],
security_severity: security_severity(finding.severity),
},
}
}
#[must_use]
pub fn to_sarif(findings: &[Finding]) -> Sarif {
let mut rule_index: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
let mut rules: Vec<ReportingDescriptor> = Vec::new();
for finding in findings {
if !rule_index.contains_key(&finding.id) {
let idx = rules.len();
rule_index.insert(finding.id.clone(), idx);
rules.push(build_rule(finding));
}
}
let mut seen_cwe: std::collections::HashSet<u16> = std::collections::HashSet::new();
let mut cwe_taxa: Vec<TaxonItem> = Vec::new();
for finding in findings {
if let Some(cwe) = finding.cwe {
if seen_cwe.insert(cwe) {
cwe_taxa.push(TaxonItem { id: format!("CWE-{cwe}") });
}
}
}
let results: Vec<SarifResult> = findings
.iter()
.map(|finding| build_result(finding, &rule_index))
.collect();
let taxonomies = if cwe_taxa.is_empty() {
Vec::new()
} else {
vec![ToolComponent {
name: CWE_COMPONENT_NAME,
taxa: cwe_taxa,
}]
};
Sarif {
schema: SARIF_SCHEMA,
version: SARIF_VERSION,
runs: vec![Run {
tool: Tool {
driver: Driver {
name: TOOL_NAME,
version: env!("CARGO_PKG_VERSION"),
rules,
},
},
results,
taxonomies,
}],
}
}
fn build_result(
finding: &Finding,
rule_index: &std::collections::HashMap<String, usize>,
) -> SarifResult {
let idx = rule_index.get(&finding.id).copied().unwrap_or(0);
let locations = {
let logical_locations = finding
.func_id
.map(|func_id| {
vec![LogicalLocation {
fully_qualified_name: format!("func#{func_id}"),
}]
})
.unwrap_or_default();
vec![Location {
physical_location: physical_location(finding),
logical_locations,
}]
};
let code_flows = finding
.provenance
.as_ref()
.map(|prov| vec![code_flow(prov)])
.unwrap_or_default();
let taxa = finding
.cwe
.map(|cwe| {
vec![TaxonReference {
id: format!("CWE-{cwe}"),
tool_component: ToolComponentReference {
name: CWE_COMPONENT_NAME,
},
}]
})
.unwrap_or_default();
let suppressions = if finding.confidence == Confidence::Dismissed {
let justification = match finding.dismiss_reason.as_deref() {
Some(reason) if !reason.is_empty() => format!("dismissed: {reason}"),
_ => "dismissed".to_string(),
};
vec![Suppression {
kind: "external",
justification,
}]
} else {
Vec::new()
};
let kind = result_kind(finding.confidence);
let level = if kind == "fail" {
severity_level(finding.severity)
} else {
"none"
};
SarifResult {
rule_id: finding.id.clone(),
rule_index: idx,
level,
kind,
rank: severity_rank(finding.severity),
message: result_message(finding),
locations,
code_flows,
taxa,
suppressions,
partial_fingerprints: PartialFingerprints {
droidsaw_finding_id: finding_fingerprint(finding),
},
properties: ResultProperties {
gauge_class: finding.gauge_class.as_str().to_string(),
confidence: finding.confidence.as_str().to_string(),
source: finding.source.as_str().to_string(),
layer: finding.layer.as_str().to_string(),
},
}
}
#[cfg(test)]
mod tests {
use super::*;
use droidsaw_common::finding::{Finding, FindingProvenance, Layer, Severity, Source};
use serde_json::Value;
fn sample_findings() -> Vec<Finding> {
let taint = Finding::new(
"CROSS_LAYER_TAINT",
Layer::Dex,
Severity::High,
"Hermes secret flows into Dex sink",
)
.with_cwe(200)
.with_source(Source::Taint)
.with_provenance(FindingProvenance {
source_layer: Layer::Hbc,
bridge: "NativeModules.SecretStore.get".into(),
sink_layer: Layer::Dex,
});
let mut dismissed = Finding::new(
"CLEARTEXT_TRAFFIC",
Layer::Apk,
Severity::Medium,
"HTTP endpoint in manifest",
)
.with_cwe(319)
.with_file_path("AndroidManifest.xml");
dismissed.confidence = Confidence::Dismissed;
dismissed.dismiss_reason = Some("test-only endpoint".into());
let plain = Finding::new(
"DEBUG_CERT",
Layer::Apk,
Severity::Low,
"debug signing certificate",
);
let dup = Finding::new(
"CLEARTEXT_TRAFFIC",
Layer::Apk,
Severity::Medium,
"second cleartext endpoint",
)
.with_cwe(319);
vec![taint, dismissed, plain, dup]
}
#[test]
fn output_conforms_to_sarif_2_1_0_schema() {
let schema: Value = serde_json::from_str(include_str!(
"../tests/fixtures/sarif/sarif-schema-2.1.0.json"
))
.expect("vendored SARIF 2.1.0 schema parses");
let validator = jsonschema::validator_for(&schema).expect("build SARIF validator");
for findings in [sample_findings(), Vec::new()] {
let instance = serde_json::to_value(to_sarif(&findings)).expect("serialize SARIF");
let errors: Vec<String> =
validator.iter_errors(&instance).map(|e| e.to_string()).collect();
assert!(
errors.is_empty(),
"SARIF output failed 2.1.0 schema validation:\n{}",
errors.join("\n")
);
}
}
#[test]
fn top_level_shape() {
let sarif = to_sarif(&sample_findings());
let value = serde_json::to_value(&sarif).expect("serialize");
assert_eq!(value["version"], Value::String("2.1.0".into()));
assert_eq!(
value["$schema"],
Value::String(SARIF_SCHEMA.to_string())
);
assert_eq!(value["runs"][0]["tool"]["driver"]["name"], "droidsaw");
assert_eq!(
value["runs"][0]["tool"]["driver"]["version"],
env!("CARGO_PKG_VERSION")
);
}
#[test]
fn rule_and_result_counts() {
let findings = sample_findings();
let sarif = to_sarif(&findings);
let value = serde_json::to_value(&sarif).expect("serialize");
let rules = value["runs"][0]["tool"]["driver"]["rules"]
.as_array()
.expect("rules array");
assert_eq!(rules.len(), 3);
let results = value["runs"][0]["results"]
.as_array()
.expect("results array");
assert_eq!(results.len(), findings.len());
}
#[test]
fn rule_index_resolves() {
let sarif = to_sarif(&sample_findings());
let value = serde_json::to_value(&sarif).expect("serialize");
let rules = value["runs"][0]["tool"]["driver"]["rules"]
.as_array()
.expect("rules array");
let results = value["runs"][0]["results"]
.as_array()
.expect("results array");
for result in results {
let rule_id = result["ruleId"].as_str().expect("ruleId");
let rule_index = result["ruleIndex"].as_u64().expect("ruleIndex");
let referenced = rules
.get(usize::try_from(rule_index).expect("index fits"))
.expect("rule at index");
assert_eq!(referenced["id"].as_str(), Some(rule_id));
}
}
#[test]
fn taint_finding_has_code_flow() {
let sarif = to_sarif(&sample_findings());
let value = serde_json::to_value(&sarif).expect("serialize");
let results = value["runs"][0]["results"]
.as_array()
.expect("results array");
let taint = results
.iter()
.find(|r| r["ruleId"] == "CROSS_LAYER_TAINT")
.expect("taint result present");
let thread_flow_locations =
&taint["codeFlows"][0]["threadFlows"][0]["locations"];
let locs = thread_flow_locations.as_array().expect("locations array");
assert_eq!(locs.len(), 2);
let source_msg = locs[0]["location"]["message"]["text"]
.as_str()
.expect("source message");
assert!(source_msg.starts_with("source: hbc"), "{source_msg}");
let sink_msg = locs[1]["location"]["message"]["text"]
.as_str()
.expect("sink message");
assert!(sink_msg.starts_with("sink: dex via"), "{sink_msg}");
}
#[test]
fn non_taint_finding_has_no_code_flow() {
let sarif = to_sarif(&sample_findings());
let value = serde_json::to_value(&sarif).expect("serialize");
let results = value["runs"][0]["results"]
.as_array()
.expect("results array");
let plain = results
.iter()
.find(|r| r["ruleId"] == "DEBUG_CERT")
.expect("plain result present");
assert!(plain.get("codeFlows").is_none());
}
#[test]
fn dismissed_finding_has_suppression() {
let sarif = to_sarif(&sample_findings());
let value = serde_json::to_value(&sarif).expect("serialize");
let results = value["runs"][0]["results"]
.as_array()
.expect("results array");
let dismissed = results
.iter()
.find(|r| {
r["ruleId"] == "CLEARTEXT_TRAFFIC"
&& r.get("suppressions").is_some()
})
.expect("dismissed result present");
let suppressions = dismissed["suppressions"]
.as_array()
.expect("suppressions array");
assert_eq!(suppressions.len(), 1);
assert_eq!(suppressions[0]["kind"], "external");
let justification = suppressions[0]["justification"]
.as_str()
.expect("justification");
assert!(justification.contains("dismissed"), "{justification}");
assert!(justification.contains("test-only endpoint"), "{justification}");
}
#[test]
fn cwe_taxonomy_present_when_cwe_set() {
let sarif = to_sarif(&sample_findings());
let value = serde_json::to_value(&sarif).expect("serialize");
let taxonomies = value["runs"][0]["taxonomies"]
.as_array()
.expect("taxonomies array");
assert_eq!(taxonomies.len(), 1);
assert_eq!(taxonomies[0]["name"], "CWE");
let taxa = taxonomies[0]["taxa"].as_array().expect("taxa array");
assert_eq!(taxa.len(), 2);
}
#[test]
fn no_taxonomy_when_no_cwe() {
let findings = vec![Finding::new(
"DEBUG_CERT",
Layer::Apk,
Severity::Low,
"debug signing certificate",
)];
let sarif = to_sarif(&findings);
let value = serde_json::to_value(&sarif).expect("serialize");
assert!(value["runs"][0].get("taxonomies").is_none());
}
#[test]
fn empty_findings_produce_valid_log() {
let sarif = to_sarif(&[]);
let value = serde_json::to_value(&sarif).expect("serialize");
assert_eq!(value["version"], "2.1.0");
assert_eq!(
value["runs"][0]["results"].as_array().map(Vec::len),
Some(0)
);
assert_eq!(
value["runs"][0]["tool"]["driver"]["rules"]
.as_array()
.map(Vec::len),
Some(0)
);
}
#[test]
fn fingerprint_is_stable() {
let findings = sample_findings();
let a = finding_fingerprint(&findings[0]);
let b = finding_fingerprint(&findings[0]);
assert_eq!(a, b);
assert_eq!(a.len(), 16);
}
#[test]
fn level_mapping() {
assert_eq!(severity_level(Severity::Critical), "error");
assert_eq!(severity_level(Severity::High), "error");
assert_eq!(severity_level(Severity::Medium), "warning");
assert_eq!(severity_level(Severity::Low), "note");
assert_eq!(severity_level(Severity::Info), "none");
}
#[test]
fn verified_finding_level_is_none_when_kind_not_fail() {
let mut verified = Finding::new(
"HARDCODED_KEY",
Layer::Dex,
Severity::High,
"hardcoded AES key",
);
verified.confidence = Confidence::Verified;
let mut unverified = Finding::new(
"HARDCODED_KEY",
Layer::Dex,
Severity::High,
"hardcoded AES key (heuristic)",
);
unverified.confidence = Confidence::Unverified;
let sarif = to_sarif(&[verified, unverified]);
let value = serde_json::to_value(&sarif).expect("serialize");
let results = value["runs"][0]["results"]
.as_array()
.expect("results array");
assert_eq!(results.len(), 2);
assert_eq!(results[0]["kind"], "pass");
assert_eq!(results[0]["level"], "none");
assert_eq!(results[1]["kind"], "fail");
assert_eq!(results[1]["level"], "error");
}
}