use std::collections::BTreeMap;
use droidsaw_common::telemetry::SilentSkipSnapshot;
use droidsaw_common::threat_model::{AcquisitionMetadata, InconclusiveReason};
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use super::Result;
pub const SCHEMA_VERSION: u32 = 1;
pub const MIN_SUPPORTED_VERSION: u32 = 1;
fn default_schema_version() -> u32 {
0
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EvidenceEnvelope {
#[serde(default = "default_schema_version")]
pub schema_version: u32,
pub tool_version: String,
pub input_apk_hash: String,
pub acquisition: AcquisitionMetadata,
pub finding_count: u64,
pub completeness_summary: BTreeMap<InconclusiveReason, u64>,
pub finding_set_hash: String,
#[serde(skip)]
pub findings_ndjson: Vec<u8>,
#[serde(default)]
pub silent_skip_snapshot: Option<SilentSkipSnapshot>,
}
impl EvidenceEnvelope {
pub fn schema_version(&self) -> u32 {
self.schema_version
}
}
pub fn produce_unsigned_envelope(
db: &Connection,
acquisition: &AcquisitionMetadata,
tool_version: &str,
) -> Result<EvidenceEnvelope> {
let mut stmt = db.prepare(
"SELECT \
rowid, severity, layer, id_tag, gauge_class, source, confidence, \
dismiss_reason, detail, cwe, extra, \
adversary_profile_relevance, completeness, \
source_api, source_api_args, resolution \
FROM findings \
ORDER BY severity, layer, id_tag, rowid",
)?;
let rows = stmt.query_map([], |row| {
Ok(FindingRow {
rowid: row.get(0)?,
severity: row.get(1)?,
layer: row.get(2)?,
id_tag: row.get(3)?,
gauge_class: row.get(4)?,
source: row.get(5)?,
confidence: row.get(6)?,
dismiss_reason: row.get(7)?,
detail: row.get(8)?,
cwe: row.get(9)?,
extra: row.get(10)?,
adversary_profile_relevance: row.get(11)?,
completeness: row.get(12)?,
source_api: row.get(13)?,
source_api_args: row.get(14)?,
resolution: row.get(15)?,
})
})?;
let mut findings_canonical = Vec::<u8>::new();
let mut finding_count: u64 = 0;
let mut completeness_summary: BTreeMap<InconclusiveReason, u64> = BTreeMap::new();
for row in rows {
let row = row?;
if let Some(reason) = parse_inconclusive_reason(&row.completeness) {
let count = completeness_summary.entry(reason).or_insert(0);
*count = count.saturating_add(1);
}
let canonical_line = canonicalize_finding_row(&row)?;
if !findings_canonical.is_empty() {
findings_canonical.push(b'\n');
}
findings_canonical.extend_from_slice(&canonical_line);
finding_count = finding_count.saturating_add(1);
}
let finding_set_hash = sha256_hex(&findings_canonical);
Ok(EvidenceEnvelope {
schema_version: SCHEMA_VERSION,
tool_version: tool_version.to_string(),
input_apk_hash: String::new(),
acquisition: acquisition.clone(),
finding_count,
completeness_summary,
finding_set_hash,
findings_ndjson: findings_canonical,
silent_skip_snapshot: None,
})
}
#[derive(Debug)]
struct FindingRow {
rowid: i64,
severity: String,
layer: String,
id_tag: String,
gauge_class: String,
source: String,
confidence: String,
dismiss_reason: Option<String>,
detail: String,
cwe: Option<i64>,
extra: Option<String>,
adversary_profile_relevance: String,
completeness: String,
source_api: Option<String>,
source_api_args: Option<String>,
resolution: Option<String>,
}
fn canonicalize_finding_row(row: &FindingRow) -> Result<Vec<u8>> {
let mut map: BTreeMap<&'static str, serde_json::Value> = BTreeMap::new();
map.insert("rowid", serde_json::json!(row.rowid));
map.insert("severity", serde_json::Value::String(row.severity.clone()));
map.insert("layer", serde_json::Value::String(row.layer.clone()));
map.insert("id_tag", serde_json::Value::String(row.id_tag.clone()));
map.insert("gauge_class", serde_json::Value::String(row.gauge_class.clone()));
map.insert("source", serde_json::Value::String(row.source.clone()));
map.insert("confidence", serde_json::Value::String(row.confidence.clone()));
map.insert("dismiss_reason", to_str_or_null(row.dismiss_reason.as_ref()));
map.insert("detail", serde_json::Value::String(row.detail.clone()));
map.insert(
"cwe",
row.cwe
.map(|v| serde_json::Value::Number(v.into()))
.unwrap_or(serde_json::Value::Null),
);
map.insert("extra", parse_optional_json(row.extra.as_deref())?);
map.insert("adversary_profile_relevance", parse_required_json(&row.adversary_profile_relevance)?);
map.insert("completeness", serde_json::Value::String(row.completeness.clone()));
map.insert("source_api", to_str_or_null(row.source_api.as_ref()));
map.insert("source_api_args", parse_optional_json(row.source_api_args.as_deref())?);
map.insert("resolution", parse_optional_json(row.resolution.as_deref())?);
let canonical = sort_value_keys(serde_json::Value::Object(
map.into_iter().map(|(k, v)| (k.to_string(), v)).collect(),
));
Ok(serde_json::to_vec(&canonical)?)
}
fn sort_value_keys(value: serde_json::Value) -> serde_json::Value {
match value {
serde_json::Value::Object(m) => {
let mut sorted: BTreeMap<String, serde_json::Value> = BTreeMap::new();
for (k, v) in m {
sorted.insert(k, sort_value_keys(v));
}
serde_json::Value::Object(sorted.into_iter().collect())
}
serde_json::Value::Array(a) => {
serde_json::Value::Array(a.into_iter().map(sort_value_keys).collect())
}
other => other,
}
}
fn to_str_or_null(s: Option<&String>) -> serde_json::Value {
match s {
Some(s) => serde_json::Value::String(s.clone()),
None => serde_json::Value::Null,
}
}
fn parse_optional_json(s: Option<&str>) -> Result<serde_json::Value> {
match s {
None | Some("") => Ok(serde_json::Value::Null),
Some(t) => Ok(serde_json::from_str(t)?),
}
}
fn parse_required_json(s: &str) -> Result<serde_json::Value> {
Ok(serde_json::from_str(s)?)
}
fn parse_inconclusive_reason(s: &str) -> Option<InconclusiveReason> {
let reason = s.strip_prefix("inconclusive:")?;
match reason {
"packed" => Some(InconclusiveReason::Packed),
"native_only" => Some(InconclusiveReason::NativeOnly),
"dynamic_load" => Some(InconclusiveReason::DynamicLoad),
"obfuscation_threshold_exceeded" => Some(InconclusiveReason::ObfuscationThresholdExceeded),
"unsupported_format" => Some(InconclusiveReason::UnsupportedFormat),
_ => None,
}
}
fn sha256_hex(bytes: &[u8]) -> String {
let mut h = Sha256::new();
h.update(bytes);
let digest = h.finalize();
let mut s = String::with_capacity(64);
for b in digest {
const HEX: &[u8; 16] = b"0123456789abcdef";
let hi = HEX.get(usize::from(b >> 4)).copied().unwrap_or(b'?');
let lo = HEX.get(usize::from(b & 0x0F)).copied().unwrap_or(b'?');
s.push(char::from(hi));
s.push(char::from(lo));
}
s
}
#[cfg(test)]
mod tests {
use super::*;
fn fresh_db() -> Connection {
let conn = Connection::open_in_memory().expect("open in-memory");
conn.execute_batch(
"CREATE TABLE findings (
rowid INTEGER PRIMARY KEY,
severity TEXT NOT NULL,
layer TEXT NOT NULL,
id_tag TEXT NOT NULL,
gauge_class TEXT NOT NULL DEFAULT 'Semantic',
source TEXT NOT NULL DEFAULT 'manifest',
confidence TEXT NOT NULL DEFAULT 'unverified',
dismiss_reason TEXT,
detail TEXT NOT NULL,
cwe INTEGER,
extra TEXT,
adversary_profile_relevance TEXT NOT NULL DEFAULT '[]',
completeness TEXT NOT NULL DEFAULT 'present',
source_api TEXT,
source_api_args TEXT,
resolution TEXT
);",
)
.expect("schema");
conn
}
fn insert(
db: &Connection,
severity: &str,
layer: &str,
id_tag: &str,
completeness: &str,
extra: Option<&str>,
) {
db.execute(
"INSERT INTO findings (severity, layer, id_tag, detail, extra, completeness) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
rusqlite::params![severity, layer, id_tag, "test detail", extra, completeness],
)
.expect("insert");
}
#[test]
fn empty_db_produces_empty_ndjson_with_zero_count() {
let db = fresh_db();
let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "0.0.0")
.expect("produce");
assert_eq!(env.finding_count, 0);
assert!(env.findings_ndjson.is_empty());
assert!(env.completeness_summary.is_empty());
assert_eq!(
env.finding_set_hash,
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
);
}
#[test]
fn two_runs_produce_identical_hash() {
let db = fresh_db();
insert(&db, "High", "Apk", "TM_A", "present", None);
insert(&db, "Critical", "Hbc", "TM_B", "absent", Some(r#"{"k":"v"}"#));
let acquisition = AcquisitionMetadata::default();
let a = produce_unsigned_envelope(&db, &acquisition, "1.0").expect("a");
let b = produce_unsigned_envelope(&db, &acquisition, "1.0").expect("b");
assert_eq!(a.finding_set_hash, b.finding_set_hash);
assert_eq!(a.findings_ndjson, b.findings_ndjson);
}
#[test]
fn ndjson_has_exactly_n_minus_one_newlines_for_n_findings() {
let db = fresh_db();
insert(&db, "High", "Apk", "TM_A", "present", None);
insert(&db, "Critical", "Hbc", "TM_B", "absent", None);
insert(&db, "Medium", "Apk", "TM_C", "present", None);
let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "1.0")
.expect("produce");
assert_eq!(env.finding_count, 3);
let lines = env.findings_ndjson.iter().filter(|&&b| b == b'\n').count();
assert_eq!(lines, 2, "3 rows ⇒ 2 separator newlines (no trailing newline)");
}
#[test]
fn completeness_summary_counts_inconclusive_reasons() {
let db = fresh_db();
insert(&db, "High", "Apk", "TM_A", "inconclusive:packed", None);
insert(&db, "High", "Apk", "TM_B", "inconclusive:packed", None);
insert(&db, "High", "Apk", "TM_C", "inconclusive:native_only", None);
insert(&db, "High", "Apk", "TM_D", "present", None);
let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "1.0")
.expect("produce");
assert_eq!(
env.completeness_summary.get(&InconclusiveReason::Packed).copied(),
Some(2),
);
assert_eq!(
env.completeness_summary.get(&InconclusiveReason::NativeOnly).copied(),
Some(1),
);
assert!(!env
.completeness_summary
.contains_key(&InconclusiveReason::UnsupportedFormat));
}
#[test]
fn ndjson_keys_are_sorted_alphabetically() {
let db = fresh_db();
insert(&db, "High", "Apk", "TM_A", "present", Some(r#"{"z":"last","a":"first"}"#));
let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "1.0")
.expect("produce");
let s = std::str::from_utf8(&env.findings_ndjson).expect("utf8");
let pos_confidence = s.find("\"confidence\"").expect("confidence present");
let pos_detail = s.find("\"detail\"").expect("detail present");
let pos_id_tag = s.find("\"id_tag\"").expect("id_tag present");
assert!(pos_confidence < pos_detail);
assert!(pos_detail < pos_id_tag);
let pos_a = s.find("\"a\":").expect("a present");
let pos_z = s.find("\"z\":").expect("z present");
assert!(pos_a < pos_z, "nested extra keys must sort: {s}");
}
#[test]
fn order_of_inserts_does_not_change_canonical_bytes() {
let db1 = fresh_db();
insert(&db1, "Critical", "Hbc", "TM_B", "present", None);
insert(&db1, "High", "Apk", "TM_A", "present", None);
let db2 = fresh_db();
insert(&db2, "High", "Apk", "TM_A", "present", None);
insert(&db2, "Critical", "Hbc", "TM_B", "present", None);
let acquisition = AcquisitionMetadata::default();
let a = produce_unsigned_envelope(&db1, &acquisition, "1.0").expect("a");
let b = produce_unsigned_envelope(&db2, &acquisition, "1.0").expect("b");
assert!(!a.finding_set_hash.is_empty());
assert!(!b.finding_set_hash.is_empty());
assert_eq!(a.finding_count, b.finding_count);
}
#[test]
fn sha256_hex_format_matches_known_vector() {
let h = sha256_hex(b"abc");
assert_eq!(
h,
"ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
);
assert_eq!(h.len(), 64);
assert!(h.chars().all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_lowercase())));
}
#[test]
fn schema_version_field_present_in_json_and_equals_schema_version_const() {
let db = fresh_db();
let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "1.0")
.expect("produce");
assert_eq!(env.schema_version, SCHEMA_VERSION);
assert_eq!(env.schema_version(), SCHEMA_VERSION);
let json = serde_json::to_string(&env).expect("serialize");
assert!(
json.contains("\"schema_version\":1"),
"schema_version must appear in JSON output; got: {json}",
);
let back: EvidenceEnvelope = serde_json::from_str(&json).expect("deserialize");
assert_eq!(back.schema_version, SCHEMA_VERSION);
}
#[test]
fn legacy_envelope_without_schema_version_deserializes_to_zero() {
let legacy_json = r#"{
"tool_version": "0.9.0",
"input_apk_hash": "",
"acquisition": {"source_kind": "unknown"},
"finding_count": 0,
"completeness_summary": {},
"finding_set_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}"#;
let env: EvidenceEnvelope = serde_json::from_str(legacy_json).expect("deserialize");
assert_eq!(
env.schema_version, 0,
"legacy envelope (no schema_version field) must deserialize as 0 (legacy sentinel)",
);
assert!(
env.schema_version() < MIN_SUPPORTED_VERSION,
"legacy version {v} should be below MIN_SUPPORTED_VERSION {m}",
v = env.schema_version(),
m = MIN_SUPPORTED_VERSION,
);
}
}