use std::collections::BTreeMap;
use droidsaw_common::{Finding, GaugeClass};
use serde::{Deserialize, Serialize};
use serde_json::Value;
pub const AUDIT_ENVELOPE_VERSION: u32 = 2;
pub const TOP_FINDINGS_CAP: usize = 5;
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ApkSummary {
pub has_hbc: bool,
pub hbc_bytes: u64,
pub hbc_function_count: u32,
pub dex_count: u32,
pub dex_total_bytes: u64,
pub dex_methods_total: u64,
pub dex_classes_total: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq)]
pub struct SeverityStrata {
pub actionable: BTreeMap<String, u64>,
pub signing_facts: BTreeMap<String, u64>,
pub encoding_noise: BTreeMap<String, u64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuditEnvelope {
pub schema_version: u32,
pub findings: Vec<Finding>,
pub finding_count: u64,
pub findings_emitted: u64,
pub taint_flow_count: u64,
pub severity_summary: BTreeMap<String, u64>,
#[serde(default)]
pub severity_by_gauge: SeverityStrata,
pub top_findings: Vec<Value>,
pub truncated: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub db_path: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub db_queries: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub finding_xrefs_written: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub detectors: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub trufflehog: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub semgrep: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none")]
pub timings_ms: Option<Value>,
#[serde(skip_serializing_if = "Option::is_none", default)]
pub apk_summary: Option<ApkSummary>,
pub meta: AuditMeta,
}
impl AuditEnvelope {
#[allow(
clippy::as_conversions,
reason = "PROOF: filter().count() returns usize bounded by findings.len() ≤ usize::MAX; usize→u64 widening is lossless on every supported 64-bit target."
)]
pub fn count_taint_flow_findings(findings: &[Finding]) -> u64 {
findings
.iter()
.filter(|f| {
matches!(
f.id.as_str(),
"HBC_TAINT_FLOW" | "DEX_TAINT_FLOW" | "BRIDGE_TAINT_FLOW"
)
})
.count() as u64
}
pub fn stratify_by_gauge(findings: &[Finding]) -> SeverityStrata {
let mut strata = SeverityStrata::default();
for f in findings {
let bucket = match GaugeClass::of(&f.id) {
GaugeClass::Semantic => &mut strata.actionable,
GaugeClass::Cryptographic => &mut strata.signing_facts,
GaugeClass::Representational => &mut strata.encoding_noise,
};
let key = format!("{:?}", f.severity);
let c = bucket.entry(key).or_insert(0);
*c = c.saturating_add(1);
}
strata
}
pub fn rank_top_findings(findings: &[Finding], cap: usize) -> Vec<Value> {
use droidsaw_common::Severity;
let mut ranked: Vec<&Finding> = findings
.iter()
.filter(|f| matches!(f.severity, Severity::Critical | Severity::High))
.collect();
ranked.sort_by(|a, b| {
let a_actionable = GaugeClass::of(&a.id) == GaugeClass::Semantic;
let b_actionable = GaugeClass::of(&b.id) == GaugeClass::Semantic;
b_actionable
.cmp(&a_actionable)
.then_with(|| a.severity.cmp(&b.severity))
.then_with(|| a.id.cmp(&b.id))
});
ranked
.into_iter()
.take(cap)
.map(|f| {
serde_json::json!({
"severity": format!("{:?}", f.severity),
"id": f.id,
"detail": f.detail,
"cwe": f.cwe,
})
})
.collect()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuditMeta {
pub count: u64,
pub truncated: bool,
pub hint: String,
pub related: Vec<String>,
#[serde(default)]
pub thread_pool_size: usize,
}
#[cfg(test)]
mod tests {
use super::*;
fn minimal_envelope() -> AuditEnvelope {
AuditEnvelope {
schema_version: AUDIT_ENVELOPE_VERSION,
findings: vec![],
finding_count: 0,
findings_emitted: 0,
taint_flow_count: 0,
severity_summary: BTreeMap::new(),
severity_by_gauge: SeverityStrata::default(),
top_findings: vec![],
truncated: false,
db_path: None,
db_queries: None,
finding_xrefs_written: None,
detectors: None,
trufflehog: None,
semgrep: None,
timings_ms: None,
apk_summary: None,
meta: AuditMeta {
count: 0,
truncated: false,
hint: "test".to_string(),
related: vec![],
thread_pool_size: 1,
},
}
}
#[test]
fn cli_shape_roundtrip() {
let env = minimal_envelope();
let json = serde_json::to_string(&env).expect("serialize");
let back: AuditEnvelope = serde_json::from_str(&json).expect("deserialize");
assert_eq!(back.schema_version, AUDIT_ENVELOPE_VERSION);
assert!(back.findings.is_empty());
assert!(back.db_path.is_none());
assert!(back.db_queries.is_none());
assert!(!back.truncated);
}
#[test]
fn mcp_shape_roundtrip() {
let mut env = minimal_envelope();
env.finding_count = 12;
env.findings_emitted = 15;
env.taint_flow_count = 3;
env.severity_summary.insert("High".to_string(), 8);
env.severity_summary.insert("Critical".to_string(), 4);
env.top_findings = vec![
serde_json::json!({"severity": "Critical", "id": "HARDCODED_KEY", "detail": "...", "cwe": 321}),
];
env.truncated = true;
env.db_path = Some("/tmp/droidsaw-audit-abc.db".to_string());
env.db_queries = Some(serde_json::json!({"all_high": "SELECT ..."}));
env.finding_xrefs_written = Some(42);
env.timings_ms = Some(serde_json::json!({"core_total": 1200}));
env.meta = AuditMeta {
count: 1,
truncated: true,
hint: "pair with query for full list".to_string(),
related: vec!["query".to_string(), "investigate".to_string()],
thread_pool_size: 1,
};
let json = serde_json::to_string(&env).expect("serialize");
let back: AuditEnvelope = serde_json::from_str(&json).expect("deserialize");
assert_eq!(back.schema_version, AUDIT_ENVELOPE_VERSION);
assert_eq!(back.finding_count, 12);
assert_eq!(back.findings_emitted, 15);
assert_eq!(back.taint_flow_count, 3);
assert_eq!(back.severity_summary.get("Critical").copied(), Some(4));
assert_eq!(back.top_findings.len(), 1);
assert!(back.truncated);
assert_eq!(back.db_path.as_deref(), Some("/tmp/droidsaw-audit-abc.db"));
assert!(back.db_queries.is_some());
assert_eq!(back.finding_xrefs_written, Some(42));
assert!(back.timings_ms.is_some());
assert_eq!(back.meta.count, 1);
assert!(back.meta.truncated);
}
#[test]
fn count_taint_flow_findings_matches_three_ids_only() {
use droidsaw_common::{Finding, Layer, Severity};
let findings = vec![
Finding::new("HBC_TAINT_FLOW", Layer::Hbc, Severity::Critical, "eval"),
Finding::new("DEX_TAINT_FLOW", Layer::Dex, Severity::High, "ipc"),
Finding::new("DEX_TAINT_FLOW", Layer::Dex, Severity::High, "fs"),
Finding::new("BRIDGE_TAINT_FLOW", Layer::Dex, Severity::High, "bridge"),
Finding::new("JNI_TAINTED_NATIVE_CALL", Layer::Dex, Severity::Medium, "jni"),
Finding::new("BRIDGE_RESOLUTION_AMBIGUOUS", Layer::Dex, Severity::Info, "amb"),
Finding::new("HARDCODED_KEY", Layer::Apk, Severity::Critical, "key"),
];
let n = AuditEnvelope::count_taint_flow_findings(&findings);
assert_eq!(
n, 4,
"must count exactly the three TAINT_FLOW ids (HBC, DEX, BRIDGE); \
JNI_TAINTED_NATIVE_CALL and BRIDGE_RESOLUTION_AMBIGUOUS excluded by design"
);
}
#[test]
fn count_taint_flow_findings_zero_on_empty_input() {
assert_eq!(AuditEnvelope::count_taint_flow_findings(&[]), 0);
}
#[test]
fn count_taint_flow_findings_zero_on_non_taint_findings_only() {
use droidsaw_common::{Finding, Layer, Severity};
let findings = vec![
Finding::new("HARDCODED_KEY", Layer::Apk, Severity::Critical, "x"),
Finding::new("V1_MANIFEST_MISMATCH", Layer::Apk, Severity::High, "y"),
];
assert_eq!(AuditEnvelope::count_taint_flow_findings(&findings), 0);
}
#[test]
fn schema_version_present_in_json() {
let env = minimal_envelope();
let json = serde_json::to_string(&env).expect("serialize");
assert!(
json.contains(&format!("\"schema_version\":{AUDIT_ENVELOPE_VERSION}")),
"schema_version must appear in JSON; got: {json}",
);
}
#[test]
fn none_optional_fields_omitted_from_json() {
let env = minimal_envelope();
let json = serde_json::to_string(&env).expect("serialize");
assert!(!json.contains("\"db_path\""), "db_path must be absent when None");
assert!(!json.contains("\"db_queries\""), "db_queries must be absent when None");
assert!(!json.contains("\"detectors\""), "detectors must be absent when None");
assert!(!json.contains("\"timings_ms\""), "timings_ms must be absent when None");
assert!(!json.contains("\"trufflehog\""), "trufflehog must be absent when None");
assert!(!json.contains("\"semgrep\""), "semgrep must be absent when None");
assert!(!json.contains("\"apk_summary\""), "apk_summary must be absent when None");
}
#[test]
fn cli_emits_mcp_parses() {
let mut env = minimal_envelope();
env.finding_count = 1;
env.findings_emitted = 1;
env.severity_summary.insert("High".to_string(), 1);
env.detectors = Some(serde_json::json!({"semgrep": {"status": "skipped_by_mode"}}));
env.meta = AuditMeta {
count: 1,
truncated: false,
hint: "filter by severity via jq".to_string(),
related: vec!["export".to_string(), "audit".to_string()],
thread_pool_size: 1,
};
let cli_json = serde_json::to_string(&env).expect("serialize");
let mcp_view: AuditEnvelope = serde_json::from_str(&cli_json).expect("deserialize");
assert_eq!(mcp_view.schema_version, AUDIT_ENVELOPE_VERSION);
assert_eq!(mcp_view.finding_count, 1);
assert!(mcp_view.db_path.is_none());
assert!(mcp_view.detectors.is_some());
}
#[test]
fn apk_summary_roundtrip() {
let mut env = minimal_envelope();
env.apk_summary = Some(ApkSummary {
has_hbc: true,
hbc_bytes: 1234,
hbc_function_count: 42,
dex_count: 2,
dex_total_bytes: 5678,
dex_methods_total: 80_000,
dex_classes_total: 5_500,
});
let json = serde_json::to_string(&env).expect("serialize");
assert!(json.contains("\"apk_summary\""), "apk_summary must appear in JSON when Some");
let back: AuditEnvelope = serde_json::from_str(&json).expect("deserialize");
let summary = back.apk_summary.expect("apk_summary must survive round-trip");
assert_eq!(summary.has_hbc, true, "has_hbc must survive round-trip");
assert_eq!(summary.hbc_bytes, 1234, "hbc_bytes must survive round-trip");
assert_eq!(summary.hbc_function_count, 42, "hbc_function_count must survive round-trip");
assert_eq!(summary.dex_count, 2, "dex_count must survive round-trip");
assert_eq!(summary.dex_total_bytes, 5678, "dex_total_bytes must survive round-trip");
assert_eq!(summary.dex_methods_total, 80_000, "dex_methods_total must survive round-trip");
assert_eq!(summary.dex_classes_total, 5_500, "dex_classes_total must survive round-trip");
}
#[test]
fn mcp_emits_cli_parses() {
let mut env = minimal_envelope();
env.finding_count = 5;
env.findings_emitted = 7;
env.db_path = Some("/tmp/test.db".to_string());
env.top_findings = vec![serde_json::json!({"severity": "High", "id": "FOO"})];
env.truncated = false;
env.timings_ms = Some(serde_json::json!({"core_total": 800}));
env.meta = AuditMeta {
count: 1,
truncated: false,
hint: "pair with query".to_string(),
related: vec!["query".to_string()],
thread_pool_size: 1,
};
let mcp_json = serde_json::to_string(&env).expect("serialize");
let cli_view: AuditEnvelope = serde_json::from_str(&mcp_json).expect("deserialize");
assert_eq!(cli_view.schema_version, AUDIT_ENVELOPE_VERSION);
assert_eq!(cli_view.finding_count, 5);
assert_eq!(cli_view.db_path.as_deref(), Some("/tmp/test.db"));
assert!(cli_view.findings.is_empty());
assert_eq!(cli_view.top_findings.len(), 1);
}
fn mixed_gauge_findings() -> Vec<Finding> {
use droidsaw_common::{Finding, Layer, Severity};
vec![
Finding::new("SPLIT_KEY_MISMATCH", Layer::Apk, Severity::Critical, "cert mismatch"),
Finding::new("ENCRYPTED_ASSET", Layer::Apk, Severity::High, "encrypted blob"),
Finding::new("DEX_TAINT_FLOW", Layer::Dex, Severity::High, "source to sink"),
]
}
#[test]
fn fixture_ids_classify_as_intended() {
use droidsaw_common::GaugeClass;
assert_eq!(GaugeClass::of("SPLIT_KEY_MISMATCH"), GaugeClass::Cryptographic);
assert_eq!(GaugeClass::of("ENCRYPTED_ASSET"), GaugeClass::Representational);
assert_eq!(GaugeClass::of("DEX_TAINT_FLOW"), GaugeClass::Semantic);
}
#[test]
fn stratify_buckets_by_gauge_class() {
let strata = AuditEnvelope::stratify_by_gauge(&mixed_gauge_findings());
assert_eq!(
strata.signing_facts.get("Critical").copied(),
Some(1),
"Critical SPLIT_KEY_MISMATCH must bucket into signing_facts",
);
assert_eq!(
strata.actionable.get("Critical").copied(),
None,
"actionable Critical count must be 0 (signing fact does not inflate the actionable histogram)",
);
assert_eq!(
strata.encoding_noise.get("High").copied(),
Some(1),
"High ENCRYPTED_ASSET must bucket into encoding_noise",
);
assert_eq!(
strata.actionable.get("High").copied(),
Some(1),
"High DEX_TAINT_FLOW must bucket into actionable",
);
assert_eq!(strata.actionable.len(), 1, "actionable holds only the Semantic High");
}
#[test]
fn rank_top_findings_puts_semantic_first() {
let ranked = AuditEnvelope::rank_top_findings(&mixed_gauge_findings(), TOP_FINDINGS_CAP);
assert_eq!(ranked.len(), 3, "all three Critical/High findings project");
assert_eq!(
ranked[0].get("id").and_then(|v| v.as_str()),
Some("DEX_TAINT_FLOW"),
"the Semantic finding must rank first, ahead of the Critical signing fact",
);
}
#[test]
fn strata_per_severity_sum_equals_flat_summary() {
let findings = mixed_gauge_findings();
let strata = AuditEnvelope::stratify_by_gauge(&findings);
let mut flat = BTreeMap::<String, u64>::new();
for f in &findings {
*flat.entry(format!("{:?}", f.severity)).or_insert(0) += 1;
}
let mut keys: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
keys.extend(flat.keys().cloned());
keys.extend(strata.actionable.keys().cloned());
keys.extend(strata.signing_facts.keys().cloned());
keys.extend(strata.encoding_noise.keys().cloned());
for sev in &keys {
let strata_sum = strata.actionable.get(sev).copied().unwrap_or(0)
+ strata.signing_facts.get(sev).copied().unwrap_or(0)
+ strata.encoding_noise.get(sev).copied().unwrap_or(0);
assert_eq!(
strata_sum,
flat.get(sev).copied().unwrap_or(0),
"per-severity strata total must equal flat severity_summary for {sev}",
);
}
}
#[test]
fn old_schema_without_severity_by_gauge_deserializes() {
let old_json = serde_json::json!({
"schema_version": 1,
"findings": [],
"finding_count": 0,
"findings_emitted": 0,
"taint_flow_count": 0,
"severity_summary": {"High": 2},
"top_findings": [],
"truncated": false,
"meta": {
"count": 0,
"truncated": false,
"hint": "x",
"related": []
}
})
.to_string();
let env: AuditEnvelope =
serde_json::from_str(&old_json).expect("old-schema JSON must still deserialize");
assert_eq!(env.severity_by_gauge, SeverityStrata::default());
assert!(env.severity_by_gauge.actionable.is_empty());
assert!(env.severity_by_gauge.signing_facts.is_empty());
assert!(env.severity_by_gauge.encoding_noise.is_empty());
assert_eq!(env.severity_summary.get("High").copied(), Some(2));
}
}