droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
use serde::Serialize;
use serde_json::{json, Value};

use crate::context::CrossLayerContext;

use super::{meta, progress};

#[derive(Serialize)]
struct XrefEntry {
    layer: String,
    /// "string" — string-literal load (const-string).
    /// "type"   — class/type descriptor referenced via new-instance,
    ///            check-cast, instance-of, new-array, filled-new-array,
    ///            or const-class.
    /// "method" — method descriptor invoked via any invoke-* opcode.
    kind: &'static str,
    string: String,
    functions: Vec<String>,
}

/// Maximum byte length of a `search` pattern accepted by `xrefs`.
/// The `regex` crate has its own DFA size cap (~10 MB by default), so
/// pathological patterns fail to compile rather than hang, but a
/// 4 KiB cap on the *source* keeps the MCP attack surface tight without
/// rejecting any plausible analyst query.
const XREFS_PATTERN_MAX_BYTES: usize = 4096;

#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` is usize+1 bounded by ctx.dex.len() ≤ isize::MAX.")]
pub fn xrefs(
    ctx: &CrossLayerContext,
    search: Option<&str>,
    limit: Option<usize>,
) -> anyhow::Result<Value> {
    // RAII drain guard: scanner walks hbc.function_get() which can emit
    // hermes findings on adversarial input. Without a drain on Drop,
    // findings leak into the next bundle parsed on the same tokio
    // blocking-pool worker (cross-tenant attribution).
    let _drain_guard = crate::context::HermesFindingDrainGuard::install_discard();

    if let Some(s) = search {
        if s.is_empty() {
            anyhow::bail!("xrefs: search pattern must be non-empty");
        }
        if s.len() > XREFS_PATTERN_MAX_BYTES {
            anyhow::bail!(
                "xrefs: search pattern length {} exceeds cap {}",
                s.len(),
                XREFS_PATTERN_MAX_BYTES,
            );
        }
    }
    let re = search.map(regex::Regex::new).transpose()?;
    let mut all: Vec<XrefEntry> = Vec::new();
    let mut truncated = false;

    // Hermes: string -> function references via scanner
    if let Some(hbc_owned) = ctx.hbc.as_ref() {
        let hbc = hbc_owned.hbc();
        let hbc_data = hbc_owned.bytes();
        let scan = droidsaw_hermes::scanner::scan_parsed(hbc, hbc_data);

        for (str_id, func_ids) in &scan.string_refs {
            let value = hbc.string_as_str_or_empty(*str_id);
            if let Some(ref re) = re
                && !re.is_match(&value)
            {
                continue;
            }
            let functions: Vec<String> = func_ids
                .iter()
                .map(|fid| {
                    let f = hbc.function_get(*fid);
                    let name = if f.name_id < hbc.string_count {
                        hbc.string_as_str_or_empty(f.name_id).into_owned()
                    } else {
                        format!("#{fid}")
                    };
                    format!("{name}(#{fid})")
                })
                .collect();
            all.push(XrefEntry {
                layer: "hbc".to_string(),
                kind: "string",
                string: value.into_owned(),
                functions,
            });
            if let Some(cap) = limit
                && all.len() >= cap
            {
                truncated = true;
                break;
            }
        }
    }

    // DEX: string → methods via const-string / const-string-jumbo.
    // Backed by `droidsaw_dex::xrefs::Xrefs::build`, which walks every
    // code_item once and records the loading methods per StringIdx.
    if !truncated {
        'dex: for (i, dex) in ctx.dex.iter().enumerate() {
            let label = format!("dex{}", i + 1);
            let Some(raw) = ctx.dex_bytes(i) else {
                continue;
            };
            let xrefs = match droidsaw_dex::xrefs::Xrefs::build(dex, raw) {
                Ok(x) => x,
                Err(e) => {
                    progress!("dex{:?}: xrefs build failed: {:?}", i + 1, e);
                    continue;
                }
            };
            for entry in &dex.strings {
                let s = entry.as_str_lossy();
                if let Some(ref re) = re
                    && !re.is_match(s)
                {
                    continue;
                }
                let functions: Vec<String> = xrefs
                    .string_to_methods
                    .get(s)
                    .map(|mks| {
                        mks.iter()
                            .map(|m| format!("{}->{}{}", m.class, m.name, m.proto))
                            .collect()
                    })
                    .unwrap_or_default();
                // Skip strings present in the pool but never loaded
                // by any method — they're not real xrefs.
                if functions.is_empty() {
                    continue;
                }
                all.push(XrefEntry {
                    layer: label.clone(),
                    kind: "string",
                    string: s.to_string(),
                    functions,
                });
                if let Some(cap) = limit
                    && all.len() >= cap
                {
                    truncated = true;
                    break 'dex;
                }
            }

            // Type-descriptor refs: new-instance / check-cast /
            // instance-of / new-array / filled-new-array / const-class.
            // The descriptor is matched as-is (`Ldalvik/system/Foo;`);
            // a user searching for the bare class name relies on the
            // regex being a substring matcher, which `regex::Regex::is_match`
            // is by default.
            for (desc, methods) in &xrefs.type_xrefs {
                if let Some(ref re) = re
                    && !re.is_match(desc)
                {
                    continue;
                }
                let functions: Vec<String> = methods
                    .iter()
                    .map(|m| format!("{}->{}{}", m.class, m.name, m.proto))
                    .collect();
                if functions.is_empty() {
                    continue;
                }
                all.push(XrefEntry {
                    layer: label.clone(),
                    kind: "type",
                    string: desc.clone(),
                    functions,
                });
                if let Some(cap) = limit
                    && all.len() >= cap
                {
                    truncated = true;
                    break 'dex;
                }
            }

            // Invoke-* refs: surface callees whose descriptor matches.
            // The "string" field carries the canonical callee triple
            // (`class->name+proto`) so an analyst searching for a bare
            // method name (`getStringExtra`) gets a substring hit, and
            // a search for a fully-qualified target also works.
            for (callee, callers) in &xrefs.callers_of {
                let triple = format!("{}->{}{}", callee.class, callee.name, callee.proto);
                if let Some(ref re) = re
                    && !re.is_match(&triple)
                {
                    continue;
                }
                let functions: Vec<String> = callers
                    .iter()
                    .map(|m| format!("{}->{}{}", m.class, m.name, m.proto))
                    .collect();
                if functions.is_empty() {
                    continue;
                }
                all.push(XrefEntry {
                    layer: label.clone(),
                    kind: "method",
                    string: triple,
                    functions,
                });
                if let Some(cap) = limit
                    && all.len() >= cap
                {
                    truncated = true;
                    break 'dex;
                }
            }
        }
    }

    let count = all.len();
    let out = json!({
        "xrefs": all,
        "_meta": meta(
            count,
            truncated,
            "pair with --search for a targeted query; --limit caps the output",
            &["strings", "frida", "decompile"],
        ),
    });
    Ok(out)
}