droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
use serde_json::{json, Value};

use crate::context::CrossLayerContext;

use super::meta;

/// Diff two Hermes bundles. Takes the currently-loaded `CrossLayerContext`
/// as the "old" side and a path to a second file as the "new" side.
/// Returns `{version_delta, string_counts, function_counts,
/// added_strings, removed_strings, _meta}`. Per policy (a), Hermes-only
/// (full P5 APK diff is queued as a separate task).
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "string_count/function_count `as i64` widens u32→i64; diff bounded by |u32::MAX − 0| < i64::MAX/2; no overflow path.")]
pub fn diff(ctx: &CrossLayerContext, new_path: &std::path::Path) -> anyhow::Result<Value> {
    // Present-but-unparseable bundle on the old side: surface the
    // recorded typed parse error rather than "no Hermes bytecode".
    ctx.ensure_hbc_parsed()?;
    let old_owned = ctx
        .hbc
        .as_ref()
        .ok_or_else(|| anyhow::anyhow!("no Hermes bytecode loaded — diff requires HBC in both files"))?;
    let old_hbc = old_owned.hbc();

    // Per-path scope for the new side: if `new_path` triggers a parse or
    // downstream panic, the bundle lands under `new_path`'s own hash, not
    // the caller-scoped old hash.
    let new_hash = CrossLayerContext::input_hash(new_path)?;
    let new_ctx = droidsaw_common::diag::with_input_hash(&new_hash, || {
        CrossLayerContext::parse(new_path, None)
    })?;
    // Same containment shape for the new side: its APK parse now
    // succeeds with a broken optional bundle, so re-surface the
    // recorded typed error here where the hbc layer is required.
    new_ctx.ensure_hbc_parsed()?;
    let new_owned = new_ctx
        .hbc
        .as_ref()
        .ok_or_else(|| anyhow::anyhow!("no Hermes bytecode in {}; diff requires HBC in both files", new_path.display()))?;
    let new_hbc = new_owned.hbc();

    use std::collections::BTreeSet;
    let mut old_strings: BTreeSet<String> = BTreeSet::new();
    let mut new_strings: BTreeSet<String> = BTreeSet::new();
    for i in 0..old_hbc.string_count {
        old_strings.insert(old_hbc.string_as_str_or_empty(i).into_owned());
    }
    for i in 0..new_hbc.string_count {
        new_strings.insert(new_hbc.string_as_str_or_empty(i).into_owned());
    }

    // Only keep strings longer than 3 bytes in the added/removed lists —
    // shorter strings are noise (single keywords, punctuation, numbers).
    let added: Vec<String> = new_strings
        .difference(&old_strings)
        .filter(|s| s.len() > 3)
        .cloned()
        .collect();
    let removed: Vec<String> = old_strings
        .difference(&new_strings)
        .filter(|s| s.len() > 3)
        .cloned()
        .collect();

    Ok(json!({
        "old_version": old_hbc.version,
        "new_version": new_hbc.version,
        "string_counts": {
            "old": old_hbc.string_count,
            "new": new_hbc.string_count,
            "delta": i64::from(new_hbc.string_count) - i64::from(old_hbc.string_count),
        },
        "function_counts": {
            "old": old_hbc.function_count,
            "new": new_hbc.function_count,
            "delta": i64::from(new_hbc.function_count) - i64::from(old_hbc.function_count),
        },
        "added_strings": added,
        "removed_strings": removed,
        "_meta": meta(
            1,
            false,
            "Hermes-only bundle diff — strings filtered to len>3 for signal; the full APK-level diff is queued as P5",
            &["strings", "hbc functions", "xrefs"],
        ),
    }))
}