droidsaw 2.0.0 - Docs.rs

//! Unsigned evidence envelope producer.
//!
//! Reads findings out of the audit-findings sqlite DB, produces a canonical
//! NDJSON serialization (deterministic ordering + field-sorted per row),
//! computes a SHA-256 of the canonical bytes, and packages the result with
//! tool / acquisition / completeness metadata into an `EvidenceEnvelope`.
//!
//! ## Reproducibility contract
//!
//! Two runs of [`produce_unsigned_envelope`] on the same input must produce
//! the same `finding_set_hash`. Tested by `tests/threat_model_envelope.rs`.
//! Regressions usually come from:
//! - Iteration order over a non-stable container (HashMap, etc.).
//! - JSON serialization that doesn't sort keys.
//! - Floating-point timestamps or `serde_json::Value::Number` reformatting.
//!
//! The producer guards against all three by:
//! - Sorting findings explicitly on `(severity, layer, id_tag, rowid)`.
//! - Re-serializing every per-row JSON object via a `BTreeMap<String, Value>`
//!   round-trip, which sorts keys alphabetically.
//! - Recursively sorting nested JSON-object keys before re-emit.
//!
//! ## Envelope schema versioning
//!
//! [`EvidenceEnvelope`] carries a `schema_version` field (public `u32`).
//! Consumers gate on this field when the envelope's field shape changes:
//!
//! - **Additive changes** (new optional field, widened enum): do **not** bump
//!   `SCHEMA_VERSION`. Old consumers ignore the field; new consumers read it.
//! - **Breaking changes** (removed field, renamed field, changed type): bump
//!   `SCHEMA_VERSION` by 1. Consumers that hard-require a minimum version
//!   compare against [`MIN_SUPPORTED_VERSION`].
//!
//! Note: `SCHEMA_VERSION` (public envelope contract, starts at 1) is separate
//! from the internal `FINDINGS_SCHEMA_REV` (sqlite migration rev, currently at
//! 5). They track independent concerns — sqlite schema migrations vs.
//! JSON-over-the-wire field shape — and are not kept in sync.

use std::collections::BTreeMap;

use droidsaw_common::telemetry::SilentSkipSnapshot;
use droidsaw_common::threat_model::{AcquisitionMetadata, InconclusiveReason};
use rusqlite::Connection;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};

use super::Result;

/// Current public envelope schema version.
///
/// Bump when a **breaking** field-shape change lands (removed field, renamed
/// field, changed type). Additive changes (new optional field) do not bump.
/// See the module-level doc for the full bump policy.
///
/// Relationship to the internal sqlite `FINDINGS_SCHEMA_REV`: they are
/// independent version counters. `SCHEMA_VERSION` tracks the JSON wire
/// shape; `FINDINGS_SCHEMA_REV` tracks the sqlite migration ladder.
pub const SCHEMA_VERSION: u32 = 1;

/// Minimum envelope schema version that this build of droidsaw can consume.
///
/// Consumers with their own forward-compat gates compare an incoming
/// envelope's `schema_version` against this constant to decide whether they
/// need an upgrade path. Currently equal to `SCHEMA_VERSION` because this
/// is the first public version.
pub const MIN_SUPPORTED_VERSION: u32 = 1;

/// Returns `SCHEMA_VERSION` — used as the serde `default =` value for
/// `EvidenceEnvelope::schema_version` when deserializing pre-field envelopes
/// (i.e., envelopes written before this field existed).
///
/// Convention: pre-existing envelopes deserialize with `schema_version = 0`,
/// signalling "no version was stamped — treat as legacy". Consumers that need
/// a strict minimum can compare `schema_version` against
/// [`MIN_SUPPORTED_VERSION`].
fn default_schema_version() -> u32 {
    0
}

/// Top-level envelope shape. Produced by [`produce_unsigned_envelope`];
/// the audit subcommand serializes it to `envelope.json` alongside the
/// canonical `findings.ndjson` payload.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct EvidenceEnvelope {
    /// Envelope schema version. Consumers gate on this field when deciding
    /// whether they can read the envelope. See [`SCHEMA_VERSION`] and
    /// [`MIN_SUPPORTED_VERSION`] for the bump policy.
    ///
    /// `serde(default)` emits `0` when deserializing an envelope that predates
    /// this field — the `0` value means "legacy, no version was stamped".
    /// Current production envelopes carry `schema_version: SCHEMA_VERSION` (= 1).
    #[serde(default = "default_schema_version")]
    pub schema_version: u32,
    /// Tool version that produced this envelope (caller-provided —
    /// usually `env!("CARGO_PKG_VERSION")`).
    pub tool_version: String,
    /// SHA-256 of the input file analysed by droidsaw, hex-encoded.
    /// Empty string when the input came from a non-file pipeline (e.g.,
    /// stdin); operators must populate via the `--pre-analysis-hash`
    /// channel for forensic reproducibility.
    pub input_apk_hash: String,
    /// Acquisition provenance (CLI-flag-supplied — see
    /// `--acquired-from / --operator / --case-ref / --acquired-at`).
    pub acquisition: AcquisitionMetadata,
    /// How many findings landed in the canonical NDJSON payload.
    pub finding_count: u64,
    /// Histogram: how often did each `Inconclusive` reason appear in
    /// the finding set's `completeness` column. Empty map = every
    /// finding scored `present` / `absent`.
    pub completeness_summary: BTreeMap<InconclusiveReason, u64>,
    /// SHA-256 of `findings_ndjson`, hex-encoded. The reproducibility
    /// gauge: byte-equality on this hash across two runs proves the
    /// canonicalization is stable.
    pub finding_set_hash: String,
    /// Canonical NDJSON: one finding per line, field-sorted, no
    /// trailing newline. UTF-8 by construction (sqlite TEXT columns are
    /// returned as `String`).
    ///
    /// `#[serde(skip)]`: the audit subcommand writes this as a sidecar
    /// `findings.ndjson` next to `envelope.json`. Inlining the bytes would
    /// double-encode them through serde's `Vec<u8>` (an array of integers)
    /// and bloat the metadata file.
    #[serde(skip)]
    pub findings_ndjson: Vec<u8>,
    /// Per-site silent-skip counter readings captured at the end of the
    /// audit run. `None` when the audit pipeline did not opt into
    /// counter wiring — distinguishable from "all zeros" so operators
    /// can tell "no skips fired" from "counters not populated."
    ///
    /// Phase-2 of `silent-skip-counter-coverage`: the field lands here
    /// ahead of the per-crate wire-ups so envelope-shape change does not
    /// race against bundle-crate FFs. Phase-2 follow-up wires
    /// dex/hermes/apk to populate the snapshot.
    ///
    /// `#[serde(default)]` keeps envelopes from before this field
    /// deserialisable.
    #[serde(default)]
    pub silent_skip_snapshot: Option<SilentSkipSnapshot>,
}

impl EvidenceEnvelope {
    /// Returns the envelope's schema version. Consumers can compare this
    /// against [`MIN_SUPPORTED_VERSION`] to decide whether they need an
    /// upgrade path. Returns `0` for envelopes deserialized from JSON that
    /// predates the `schema_version` field (i.e., legacy format).
    pub fn schema_version(&self) -> u32 {
        self.schema_version
    }
}

/// Build an unsigned evidence envelope from a v2 findings DB.
///
/// `db` must be a `rusqlite::Connection` against a DB at `FINDINGS_SCHEMA_REV`
/// or higher (see `commands::migrate_findings_schema_to_current`).
/// `acquisition` carries CLI-supplied provenance; `tool_version` is what
/// gets stamped into the envelope.
///
/// Determinism: every call on the same `(db, acquisition, tool_version)`
/// triple must produce the same `finding_set_hash`. The reproducibility
/// fixture under `tests/threat_model_envelope.rs` enforces this.
pub fn produce_unsigned_envelope(
    db: &Connection,
    acquisition: &AcquisitionMetadata,
    tool_version: &str,
) -> Result<EvidenceEnvelope> {
    // Pull the rows we need in canonical order. ORDER BY in SQL guarantees a
    // stable sequence regardless of insertion order, vacuum status, or page
    // layout.
    let mut stmt = db.prepare(
        "SELECT \
             rowid, severity, layer, id_tag, gauge_class, source, confidence, \
             dismiss_reason, detail, cwe, extra, \
             adversary_profile_relevance, completeness, \
             source_api, source_api_args, resolution \
         FROM findings \
         ORDER BY severity, layer, id_tag, rowid",
    )?;
    let rows = stmt.query_map([], |row| {
        Ok(FindingRow {
            rowid: row.get(0)?,
            severity: row.get(1)?,
            layer: row.get(2)?,
            id_tag: row.get(3)?,
            gauge_class: row.get(4)?,
            source: row.get(5)?,
            confidence: row.get(6)?,
            dismiss_reason: row.get(7)?,
            detail: row.get(8)?,
            cwe: row.get(9)?,
            extra: row.get(10)?,
            adversary_profile_relevance: row.get(11)?,
            completeness: row.get(12)?,
            source_api: row.get(13)?,
            source_api_args: row.get(14)?,
            resolution: row.get(15)?,
        })
    })?;

    let mut findings_canonical = Vec::<u8>::new();
    let mut finding_count: u64 = 0;
    let mut completeness_summary: BTreeMap<InconclusiveReason, u64> = BTreeMap::new();

    for row in rows {
        let row = row?;
        // Bump the inconclusive-reason histogram BEFORE canonicalization so
        // we don't recompute it from the rendered NDJSON later. The DB stores
        // `completeness` as the schema-text shape `present | absent |
        // inconclusive:<reason>`; parse that here.
        if let Some(reason) = parse_inconclusive_reason(&row.completeness) {
            // PROOF: `or_insert(0)` inserts the key if absent and returns &mut to the
            // (now-guaranteed-present) value. Re-looking up via `.get(&reason)` is
            // redundant — the entry already holds the current count. Use the returned
            // &mut directly to increment in-place, eliminating the dead re-lookup.
            let count = completeness_summary.entry(reason).or_insert(0);
            *count = count.saturating_add(1);
        }

        let canonical_line = canonicalize_finding_row(&row)?;
        if !findings_canonical.is_empty() {
            findings_canonical.push(b'\n');
        }
        findings_canonical.extend_from_slice(&canonical_line);
        finding_count = finding_count.saturating_add(1);
    }

    let finding_set_hash = sha256_hex(&findings_canonical);

    Ok(EvidenceEnvelope {
        schema_version: SCHEMA_VERSION,
        tool_version: tool_version.to_string(),
        input_apk_hash: String::new(),
        acquisition: acquisition.clone(),
        finding_count,
        completeness_summary,
        finding_set_hash,
        findings_ndjson: findings_canonical,
        // Phase-2 wire-up pending: bundle-crate sites still need to
        // populate a `SilentSkipCounters` instance and snapshot it
        // here. Leave `None` so legacy envelopes round-trip and
        // operators can tell "no wiring" from "wired but zero skips."
        silent_skip_snapshot: None,
    })
}

/// Parsed v2 findings row. Mirrors the column set the migrator added.
#[derive(Debug)]
struct FindingRow {
    rowid: i64,
    severity: String,
    layer: String,
    id_tag: String,
    gauge_class: String,
    source: String,
    confidence: String,
    dismiss_reason: Option<String>,
    detail: String,
    cwe: Option<i64>,
    extra: Option<String>,
    adversary_profile_relevance: String,
    completeness: String,
    source_api: Option<String>,
    source_api_args: Option<String>,
    resolution: Option<String>,
}

/// Render a single finding row as canonical JSON bytes — keys sorted
/// alphabetically, `extra` / `source_api_args` / `resolution` /
/// `adversary_profile_relevance` re-parsed and re-serialized so any nested
/// keys also sort.
fn canonicalize_finding_row(row: &FindingRow) -> Result<Vec<u8>> {
    let mut map: BTreeMap<&'static str, serde_json::Value> = BTreeMap::new();
    map.insert("rowid", serde_json::json!(row.rowid));
    map.insert("severity", serde_json::Value::String(row.severity.clone()));
    map.insert("layer", serde_json::Value::String(row.layer.clone()));
    map.insert("id_tag", serde_json::Value::String(row.id_tag.clone()));
    map.insert("gauge_class", serde_json::Value::String(row.gauge_class.clone()));
    map.insert("source", serde_json::Value::String(row.source.clone()));
    map.insert("confidence", serde_json::Value::String(row.confidence.clone()));
    map.insert("dismiss_reason", to_str_or_null(row.dismiss_reason.as_ref()));
    map.insert("detail", serde_json::Value::String(row.detail.clone()));
    map.insert(
        "cwe",
        row.cwe
            .map(|v| serde_json::Value::Number(v.into()))
            .unwrap_or(serde_json::Value::Null),
    );
    map.insert("extra", parse_optional_json(row.extra.as_deref())?);
    map.insert("adversary_profile_relevance", parse_required_json(&row.adversary_profile_relevance)?);
    map.insert("completeness", serde_json::Value::String(row.completeness.clone()));
    map.insert("source_api", to_str_or_null(row.source_api.as_ref()));
    map.insert("source_api_args", parse_optional_json(row.source_api_args.as_deref())?);
    map.insert("resolution", parse_optional_json(row.resolution.as_deref())?);

    let canonical = sort_value_keys(serde_json::Value::Object(
        map.into_iter().map(|(k, v)| (k.to_string(), v)).collect(),
    ));
    Ok(serde_json::to_vec(&canonical)?)
}

/// Recursively sort keys on every JSON object in `value`. Required because
/// `serde_json::Map` preserves insertion order — we need a deterministic
/// alphabetic order so the SHA-256 over the rendered bytes is stable across
/// every machine and every Rust release.
fn sort_value_keys(value: serde_json::Value) -> serde_json::Value {
    match value {
        serde_json::Value::Object(m) => {
            let mut sorted: BTreeMap<String, serde_json::Value> = BTreeMap::new();
            for (k, v) in m {
                sorted.insert(k, sort_value_keys(v));
            }
            serde_json::Value::Object(sorted.into_iter().collect())
        }
        serde_json::Value::Array(a) => {
            serde_json::Value::Array(a.into_iter().map(sort_value_keys).collect())
        }
        other => other,
    }
}

fn to_str_or_null(s: Option<&String>) -> serde_json::Value {
    match s {
        Some(s) => serde_json::Value::String(s.clone()),
        None => serde_json::Value::Null,
    }
}

/// Parse a JSON column that is allowed to be NULL or empty. Empty string
/// maps to `null` so older v1 rows (which don't have these columns) emit
/// the same canonical shape as v2 rows with a freshly-defaulted column.
fn parse_optional_json(s: Option<&str>) -> Result<serde_json::Value> {
    match s {
        None | Some("") => Ok(serde_json::Value::Null),
        Some(t) => Ok(serde_json::from_str(t)?),
    }
}

/// Parse a JSON column that is NOT NULL (e.g., `adversary_profile_relevance`,
/// which has a `DEFAULT '[]'` so the column is always populated).
fn parse_required_json(s: &str) -> Result<serde_json::Value> {
    Ok(serde_json::from_str(s)?)
}

/// Parse the schema-text shape `present | absent | inconclusive:<reason>`
/// emitted by the v2 migration's DEFAULT into a typed
/// [`InconclusiveReason`] for the histogram.
fn parse_inconclusive_reason(s: &str) -> Option<InconclusiveReason> {
    let reason = s.strip_prefix("inconclusive:")?;
    match reason {
        "packed" => Some(InconclusiveReason::Packed),
        "native_only" => Some(InconclusiveReason::NativeOnly),
        "dynamic_load" => Some(InconclusiveReason::DynamicLoad),
        "obfuscation_threshold_exceeded" => Some(InconclusiveReason::ObfuscationThresholdExceeded),
        "unsupported_format" => Some(InconclusiveReason::UnsupportedFormat),
        _ => None,
    }
}

fn sha256_hex(bytes: &[u8]) -> String {
    let mut h = Sha256::new();
    h.update(bytes);
    let digest = h.finalize();
    let mut s = String::with_capacity(64);
    for b in digest {
        // No format!()/write! — direct hex emit is allocation-cheap and
        // avoids carrying the `arithmetic_side_effects` lint over `>> 4`.
        const HEX: &[u8; 16] = b"0123456789abcdef";
        let hi = HEX.get(usize::from(b >> 4)).copied().unwrap_or(b'?');
        let lo = HEX.get(usize::from(b & 0x0F)).copied().unwrap_or(b'?');
        s.push(char::from(hi));
        s.push(char::from(lo));
    }
    s
}

#[cfg(test)]
mod tests {
    use super::*;

    fn fresh_db() -> Connection {
        let conn = Connection::open_in_memory().expect("open in-memory");
        conn.execute_batch(
            "CREATE TABLE findings (
                rowid                       INTEGER PRIMARY KEY,
                severity                    TEXT NOT NULL,
                layer                       TEXT NOT NULL,
                id_tag                      TEXT NOT NULL,
                gauge_class                 TEXT NOT NULL DEFAULT 'Semantic',
                source                      TEXT NOT NULL DEFAULT 'manifest',
                confidence                  TEXT NOT NULL DEFAULT 'unverified',
                dismiss_reason              TEXT,
                detail                      TEXT NOT NULL,
                cwe                         INTEGER,
                extra                       TEXT,
                adversary_profile_relevance TEXT NOT NULL DEFAULT '[]',
                completeness                TEXT NOT NULL DEFAULT 'present',
                source_api                  TEXT,
                source_api_args             TEXT,
                resolution                  TEXT
            );",
        )
        .expect("schema");
        conn
    }

    fn insert(
        db: &Connection,
        severity: &str,
        layer: &str,
        id_tag: &str,
        completeness: &str,
        extra: Option<&str>,
    ) {
        db.execute(
            "INSERT INTO findings (severity, layer, id_tag, detail, extra, completeness) \
             VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
            rusqlite::params![severity, layer, id_tag, "test detail", extra, completeness],
        )
        .expect("insert");
    }

    #[test]
    fn empty_db_produces_empty_ndjson_with_zero_count() {
        let db = fresh_db();
        let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "0.0.0")
            .expect("produce");
        assert_eq!(env.finding_count, 0);
        assert!(env.findings_ndjson.is_empty());
        assert!(env.completeness_summary.is_empty());
        // SHA-256 of the empty string.
        assert_eq!(
            env.finding_set_hash,
            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
        );
    }

    #[test]
    fn two_runs_produce_identical_hash() {
        let db = fresh_db();
        insert(&db, "High", "Apk", "TM_A", "present", None);
        insert(&db, "Critical", "Hbc", "TM_B", "absent", Some(r#"{"k":"v"}"#));

        let acquisition = AcquisitionMetadata::default();
        let a = produce_unsigned_envelope(&db, &acquisition, "1.0").expect("a");
        let b = produce_unsigned_envelope(&db, &acquisition, "1.0").expect("b");
        assert_eq!(a.finding_set_hash, b.finding_set_hash);
        assert_eq!(a.findings_ndjson, b.findings_ndjson);
    }

    #[test]
    fn ndjson_has_exactly_n_minus_one_newlines_for_n_findings() {
        let db = fresh_db();
        insert(&db, "High", "Apk", "TM_A", "present", None);
        insert(&db, "Critical", "Hbc", "TM_B", "absent", None);
        insert(&db, "Medium", "Apk", "TM_C", "present", None);

        let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "1.0")
            .expect("produce");
        assert_eq!(env.finding_count, 3);
        let lines = env.findings_ndjson.iter().filter(|&&b| b == b'\n').count();
        assert_eq!(lines, 2, "3 rows ⇒ 2 separator newlines (no trailing newline)");
    }

    #[test]
    fn completeness_summary_counts_inconclusive_reasons() {
        let db = fresh_db();
        insert(&db, "High", "Apk", "TM_A", "inconclusive:packed", None);
        insert(&db, "High", "Apk", "TM_B", "inconclusive:packed", None);
        insert(&db, "High", "Apk", "TM_C", "inconclusive:native_only", None);
        insert(&db, "High", "Apk", "TM_D", "present", None);

        let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "1.0")
            .expect("produce");
        assert_eq!(
            env.completeness_summary.get(&InconclusiveReason::Packed).copied(),
            Some(2),
        );
        assert_eq!(
            env.completeness_summary.get(&InconclusiveReason::NativeOnly).copied(),
            Some(1),
        );
        assert!(!env
            .completeness_summary
            .contains_key(&InconclusiveReason::UnsupportedFormat));
    }

    #[test]
    fn ndjson_keys_are_sorted_alphabetically() {
        let db = fresh_db();
        insert(&db, "High", "Apk", "TM_A", "present", Some(r#"{"z":"last","a":"first"}"#));

        let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "1.0")
            .expect("produce");
        let s = std::str::from_utf8(&env.findings_ndjson).expect("utf8");
        // Top-level keys: cwe should come before detail, etc. The simplest
        // gauge: `confidence` (c…) precedes `detail` (d…) precedes `id_tag` (i…).
        let pos_confidence = s.find("\"confidence\"").expect("confidence present");
        let pos_detail = s.find("\"detail\"").expect("detail present");
        let pos_id_tag = s.find("\"id_tag\"").expect("id_tag present");
        assert!(pos_confidence < pos_detail);
        assert!(pos_detail < pos_id_tag);

        // Nested `extra` keys: 'a' precedes 'z'.
        let pos_a = s.find("\"a\":").expect("a present");
        let pos_z = s.find("\"z\":").expect("z present");
        assert!(pos_a < pos_z, "nested extra keys must sort: {s}");
    }

    #[test]
    fn order_of_inserts_does_not_change_canonical_bytes() {
        // Reproducibility cornerstone: shuffling insert order must produce
        // byte-identical canonical NDJSON. The stable sort happens on
        // `(severity, layer, id_tag, rowid)`.
        let db1 = fresh_db();
        insert(&db1, "Critical", "Hbc", "TM_B", "present", None);
        insert(&db1, "High", "Apk", "TM_A", "present", None);

        let db2 = fresh_db();
        insert(&db2, "High", "Apk", "TM_A", "present", None);
        insert(&db2, "Critical", "Hbc", "TM_B", "present", None);

        let acquisition = AcquisitionMetadata::default();
        let a = produce_unsigned_envelope(&db1, &acquisition, "1.0").expect("a");
        let b = produce_unsigned_envelope(&db2, &acquisition, "1.0").expect("b");

        // Note: `rowid` differs across the two DBs (different insertion
        // order) so the rendered NDJSON is NOT byte-identical here. What we
        // want is a row-set-level reproducibility guarantee — the *set* of
        // findings is the same. Re-run the same DB twice (covered by
        // `two_runs_produce_identical_hash`) for the strict byte-identity gauge.
        // This test asserts we don't crash on different insertion orders +
        // both produce a non-empty hash.
        assert!(!a.finding_set_hash.is_empty());
        assert!(!b.finding_set_hash.is_empty());
        assert_eq!(a.finding_count, b.finding_count);
    }

    #[test]
    fn sha256_hex_format_matches_known_vector() {
        // Vector: SHA-256("abc")
        // = ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad
        let h = sha256_hex(b"abc");
        assert_eq!(
            h,
            "ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad"
        );
        assert_eq!(h.len(), 64);
        assert!(h.chars().all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_lowercase())));
    }

    // --- schema_version round-trip tests ---

    #[test]
    fn schema_version_field_present_in_json_and_equals_schema_version_const() {
        let db = fresh_db();
        let env = produce_unsigned_envelope(&db, &AcquisitionMetadata::default(), "1.0")
            .expect("produce");
        assert_eq!(env.schema_version, SCHEMA_VERSION);
        assert_eq!(env.schema_version(), SCHEMA_VERSION);

        // Serialize and confirm the field appears in JSON.
        let json = serde_json::to_string(&env).expect("serialize");
        assert!(
            json.contains("\"schema_version\":1"),
            "schema_version must appear in JSON output; got: {json}",
        );

        // Deserialize and confirm the field survives the round-trip.
        let back: EvidenceEnvelope = serde_json::from_str(&json).expect("deserialize");
        assert_eq!(back.schema_version, SCHEMA_VERSION);
    }

    #[test]
    fn legacy_envelope_without_schema_version_deserializes_to_zero() {
        // Simulate an envelope written before the schema_version field existed:
        // omit the field entirely from the JSON. The serde `default =
        // "default_schema_version"` attribute must fill it in as 0 (legacy
        // sentinel), not SCHEMA_VERSION.
        let legacy_json = r#"{
            "tool_version": "0.9.0",
            "input_apk_hash": "",
            "acquisition": {"source_kind": "unknown"},
            "finding_count": 0,
            "completeness_summary": {},
            "finding_set_hash": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
        }"#;
        let env: EvidenceEnvelope = serde_json::from_str(legacy_json).expect("deserialize");
        assert_eq!(
            env.schema_version, 0,
            "legacy envelope (no schema_version field) must deserialize as 0 (legacy sentinel)",
        );
        assert!(
            env.schema_version() < MIN_SUPPORTED_VERSION,
            "legacy version {v} should be below MIN_SUPPORTED_VERSION {m}",
            v = env.schema_version(),
            m = MIN_SUPPORTED_VERSION,
        );
    }
}