droidsaw 1.0.0 - Docs.rs

// SPDX-License-Identifier: BSD-3-Clause

#![cfg_attr(not(test), deny(
    clippy::unwrap_used,
    clippy::expect_used,
    clippy::panic,
    clippy::unreachable,
    clippy::todo,
    clippy::arithmetic_side_effects,
    clippy::indexing_slicing,
    clippy::string_slice,
    clippy::let_underscore_future,
    clippy::await_holding_lock,
    clippy::await_holding_refcell_ref,
    clippy::if_let_mutex,
    clippy::large_futures,
    clippy::as_underscore,
    clippy::cast_lossless,
    clippy::cast_possible_truncation,
    clippy::cast_sign_loss,
    clippy::cast_precision_loss,
    clippy::cast_possible_wrap,
    clippy::as_conversions,
))]

use std::path::PathBuf;
use std::process::ExitCode;

use clap::{Parser, Subcommand};

use droidsaw::commands::{
    self, CorpusCommands, DeobfStringsArgs, DexCommands, HbcCommands, InspectCommands,
    ScanCommands, TriageCommands,
};
use droidsaw::context::CrossLayerContext;
use droidsaw::error;

/// Boxed dispatch body: takes the parsed context, returns a JSON value.
/// Used by `dispatch_inspect` / `dispatch_scan` to dynamically dispatch
/// over `InspectCommands` / `ScanCommands` variants without duplicating
/// the `with_input_hash` + parse scaffolding at every match arm.
type CtxJsonFn = Box<dyn FnOnce(&CrossLayerContext) -> anyhow::Result<serde_json::Value>>;

/// DROIDSAW — Unified Android reverse engineering CLI.
///
/// JSON-only output. Designed for LLM agent consumption. Stdout is a
/// JSON document (object, array, or NDJSON stream depending on
/// subcommand). Stderr carries progress. Errors are JSON envelopes on
/// stdout and a non-zero exit code.
#[derive(Parser)]
#[command(
    name = "droidsaw",
    about = "DROIDSAW — Android reverse engineering toolkit. JSON output, agent-first.",
    long_about = "Unified Android reverse engineering CLI. Every subcommand writes JSON to \
stdout and progress to stderr. Exit code is non-zero on failure and stdout carries an error \
envelope. See `droidsaw <cmd> --help` for per-command return shape."
)]
struct Cli {
    /// Memory budget in bytes for a single parse operation.
    /// Default: 4 GiB. Omit for the default; pass `usize::MAX` for no limit.
    /// Note: `--budget-mem 0` means zero bytes remaining — the first parse
    /// call will reject any non-empty input with a budget-exhausted error.
    #[arg(long = "budget-mem", global = true, value_name = "BYTES")]
    budget_mem: Option<usize>,

    /// Wall-clock time budget in seconds for a single parse operation.
    /// Default: no time limit. Example: `--budget-time 60`.
    #[arg(long = "budget-time", global = true, value_name = "SECS")]
    budget_time_secs: Option<f64>,

    /// Disable split APK auto-discovery. By default, when the input is an
    /// APK, droidsaw scans the same directory for co-located
    /// `split_config.*.apk` files and merges them into a single analysis
    /// bundle. Pass `--no-auto-splits` to load only the named APK file
    /// without merging siblings.
    #[arg(long = "no-auto-splits", global = true)]
    no_auto_splits: bool,

    /// Force serial execution by configuring rayon's global thread pool to a
    /// single worker. Equivalent to `RAYON_NUM_THREADS=1` but discoverable in
    /// `--help`. Use for deterministic output, differential testing
    /// (parallel-vs-serial finding equivalence), or reproducible bug
    /// repros. Doesn't change findings on production input — proven by the
    /// rayon-singlethreaded-differential gauge — but does serialize the
    /// audit pipeline; expect ~2× wall on typical APKs.
    #[arg(long = "single-thread", global = true)]
    single_thread: bool,

    /// Opt in to one or more `permissive_recovery.*` parser tolerances. Comma-
    /// separated list; each name maps to a field on
    /// `droidsaw_apk::PermissiveRecoveryOpts`. Unknown names exit with code 2.
    ///
    /// Known opts (see `droidsaw_apk::PermissiveRecoveryOpts` doc-comments
    /// for per-opt threat models):
    /// - `multi_root` — accept_multi_root: first-wins recovery on
    ///   multiple top-level AXML elements + AXML_MULTIPLE_ROOTS finding.
    /// - `unclosed_elements` — accept_unclosed_elements: drop dangling
    ///   stack at EOF (bottom-of-stack becomes root) +
    ///   AXML_UNCLOSED_ELEMENTS finding.
    /// - `orphan_end_element` — accept_orphan_end_element: skip orphan
    ///   END_ELEMENT + AXML_ORPHAN_END_ELEMENT finding.
    /// - `all` — enable all of the above.
    ///
    /// The `permissive_recovery_` prefix is the user's acknowledgment that the
    /// chosen opt may produce a parse that diverges from runtime in
    /// over-report-allowed-only ways (audit may cry wolf on
    /// runtime-invisible content; never misses runtime-visible
    /// content).
    #[arg(long = "permissive-recovery", global = true, value_delimiter = ',', value_name = "OPT")]
    permissive_recovery: Vec<String>,

    #[command(subcommand)]
    command: Commands,
}

/// Parse the `--permissive-recovery=foo,bar` opt list into a `PermissiveRecoveryOpts`.
/// Returns `Err` with a usage menu if any opt name is unknown.
fn resolve_permissive_recovery_opts(
    raw: &[String],
) -> Result<droidsaw_apk::PermissiveRecoveryOpts, String> {
    let mut opts = droidsaw_apk::PermissiveRecoveryOpts::default();
    for name in raw {
        match name.as_str() {
            "multi_root" => opts.accept_multi_root = true,
            "unclosed_elements" => opts.accept_unclosed_elements = true,
            "orphan_end_element" => opts.accept_orphan_end_element = true,
            "all" => {
                opts.accept_multi_root = true;
                opts.accept_unclosed_elements = true;
                opts.accept_orphan_end_element = true;
            }
            other => {
                return Err(format!(
                    "unknown --permissive-recovery opt '{other}'. Known: \
                     multi_root, unclosed_elements, orphan_end_element, all"
                ));
            }
        }
    }
    Ok(opts)
}

/// Resolved parse-budget configuration derived from CLI flags.
/// All fields are `Copy` so they thread freely through inner `fn` closures.
#[derive(Copy, Clone)]
struct BudgetSpec {
    mem_bytes: usize,
    time_secs: Option<f64>,
    no_auto_splits: bool,
}

impl BudgetSpec {
    fn make_budget(self) -> droidsaw_common::budget::ParseBudget {
        use std::time::{Duration, Instant};
        droidsaw_common::budget::ParseBudget {
            memory_bytes_remaining: self.mem_bytes,
            steps_remaining: usize::MAX,
            // WHY: validate before calling from_secs_f64 which panics on NaN /
            // negative / overflow. Adversarial CLI input can supply any f64;
            // invalid values silently become no deadline rather than a panic.
            deadline: self.time_secs.and_then(|s| {
                if s.is_finite() && s > 0.0 {
                    Instant::now().checked_add(Duration::from_secs_f64(s))
                } else {
                    None
                }
            }),
        }
    }
}

#[derive(Subcommand)]
enum Commands {
    // ── Top-level (matches MCP surface) ──────────────────────────

    /// Layer summary: bytecode layers + manifest + signing.
    #[command(name = droidsaw_cli_contract::SUBCOMMAND_INFO)]
    Info {
        /// Path to APK, HBC, or DEX file.
        path: PathBuf,
    },
    /// Full security audit: findings + taint + trufflehog + semgrep + leads.
    #[command(name = droidsaw_cli_contract::SUBCOMMAND_AUDIT)]
    Audit {
        /// Path to APK, HBC, or DEX file.
        path: PathBuf,
        /// Minimum entropy threshold for high-entropy string scan (bits/char).
        #[arg(long, default_value = "4.5")]
        entropy: f32,
        /// Directory to write extracted source into (default: temp dir).
        ///
        /// When `--format unsigned-evidence` is set, this is the directory
        /// where `envelope.json` + `findings.ndjson` + the canonical
        /// `findings.db` are written. Required for that format.
        #[arg(long)]
        output: Option<PathBuf>,
        /// Skip semgrep extraction and scan. Equivalent to
        /// `--mode=basic` for the semgrep half (kept for back-compat with
        /// scripts that pre-date `--mode`). When set, overrides
        /// `--mode`'s semgrep gate.
        #[arg(long)]
        no_semgrep: bool,
        /// Output format selector. Default = JSON to stdout (the
        /// existing audit shape). `unsigned-evidence` produces a canonical
        /// NDJSON evidence envelope under `--output`.
        #[arg(long, value_name = "FORMAT")]
        format: Option<String>,
        /// Detector selector — `basic | full | semgrep | trufflehog`.
        /// Default = `full` — `audit` carries comprehensive semantics;
        /// the JSON output's `detectors` field surfaces which detectors
        /// actually ran vs were skipped (binary not on PATH, mode-gated,
        /// or subprocess error). `basic` is the opt-in fast path
        /// (parser-side findings + bundled YARA only; no subprocess
        /// spawns; ~10–30 sec wall on most APKs) — recommended for CI /
        /// agent loops. `semgrep` and `trufflehog` overlay one
        /// subprocess each on top of basic. Parsed by
        /// `droidsaw_cli_contract::AuditMode::from_cli_str`.
        #[arg(long = droidsaw_cli_contract::FLAG_AUDIT_MODE,
              value_name = "MODE", default_value = "full")]
        mode: String,
        /// Write a fresh findings DB instead of upserting into the
        /// existing one at the canonical path. Off by default — the
        /// audit DB is updated incrementally so re-running the same
        /// mode does not duplicate findings (UPSERT by stable identity)
        /// and re-running a different mode adds findings under a new
        /// `mode` tag. With this flag set, prior rows for the same mode
        /// are cleared before insert (one-shot semantics).
        #[arg(long = droidsaw_cli_contract::FLAG_NO_UPDATE_DB)]
        no_update_db: bool,
        /// Threat-model: how the sample was acquired.
        /// One of `adb_pull` / `file_upload` / `download_url` /
        /// `device_image` / `unknown`. Carried into the evidence envelope.
        #[arg(long = "acquired-from", value_name = "KIND")]
        acquired_from: Option<String>,
        /// Threat-model: operator handle / username.
        #[arg(long, value_name = "HANDLE")]
        operator: Option<String>,
        /// Threat-model: authority / case reference (matter ID, ticket).
        #[arg(long = "case-ref", value_name = "REF")]
        case_ref: Option<String>,
        /// Threat-model: acquisition timestamp (RFC-3339, UTC).
        #[arg(long = "acquired-at", value_name = "RFC3339")]
        acquired_at: Option<String>,
        /// Threat-model: STIX 2.1 bundle to load alongside the audit.
        ///
        /// Repeatable. Each path is parsed; Indicator SDOs across all
        /// supplied bundles are deduplicated on `id` (first-win across
        /// the supplied list). Local file paths only — no network IO.
        ///
        /// Indicators are stored on the envelope's reference set; actual
        /// matching against APK content lands in
        /// `threat-model-third-party-inventory` (the next stream).
        #[arg(long = "stix-feed", value_name = "PATH", action = clap::ArgAction::Append)]
        stix_feed: Vec<PathBuf>,
        /// User-owned semgrep rules — repeatable `--rules <path>` and/or
        /// `DROIDSAW_SEMGREP_RULES` env var. `--no-auto` suppresses the
        /// `--config auto` registry default. Droidsaw does not ship rules.
        /// Only consumed when the audit mode runs semgrep (`full` or
        /// `semgrep`).
        #[command(flatten)]
        semgrep_args: droidsaw::semgrep::SemgrepArgs,
    },
    /// Decompile: DEX class → Java, Hermes function → JS. Auto-dispatches.
    #[command(name = droidsaw_cli_contract::SUBCOMMAND_DECOMPILE)]
    Decompile {
        /// Path to APK, HBC, or DEX file.
        path: PathBuf,
        /// Function ID (Hermes) or target identifier.
        target: Option<String>,
        /// Emit valid JS (Hermes only).
        #[arg(long)]
        js: bool,
        /// Decompile every function/class.
        #[arg(long = droidsaw_cli_contract::FLAG_ALL)]
        all: bool,
        /// DEX class index (0-based global).
        #[arg(long)]
        class_index: Option<usize>,
        /// Regex on class descriptor or function name.
        #[arg(short, long)]
        search: Option<String>,
        /// Output directory for `--all` bulk emit. Tree:
        /// `<out>/dex/<dex-name>/sources/<pkg>/<Class>.java` per DEX class,
        /// `<out>/hbc/f<id>_<name>.js` per HBC function,
        /// `<out>/strings/{dex,hbc,native}.txt` per-layer string tables,
        /// and `<out>/meta.json` with provenance + canonical counts.
        /// Without this flag, `--all` returns the JSON envelope on stdout.
        #[arg(long = droidsaw_cli_contract::FLAG_OUT)]
        out: Option<PathBuf>,
        /// Permit writing into a non-empty `--out <dir>`. Without this,
        /// `--out` against an existing non-empty directory bails to
        /// protect prior investigation output from silent clobber.
        #[arg(long)]
        overwrite: bool,
    },
    /// Search strings across all bytecode and native layers.
    Strings {
        /// Path to APK, HBC, or DEX file.
        path: PathBuf,
        /// Regex filter applied to each string.
        #[arg(short, long)]
        search: Option<String>,
        /// Minimum string length (default 4 when --layer native; 0 otherwise).
        #[arg(short = 'n', long)]
        min_length: Option<usize>,
        /// Cap on returned items.
        #[arg(long)]
        limit: Option<usize>,
        /// Layer filter: "dex", "dex1"/"dex2"/… for a specific DEX, "hbc", or
        /// "native" for ELF .rodata + .dynstr strings from .so files in an APK.
        /// Omit to search all layers.
        #[arg(long)]
        layer: Option<String>,
    },
    /// Cross-reference strings to functions.
    Xrefs {
        /// Path to APK, HBC, or DEX file.
        path: PathBuf,
        /// Regex filter over the string being referenced.
        #[arg(short, long)]
        search: Option<String>,
        /// Cap on returned xrefs.
        #[arg(long)]
        limit: Option<usize>,
    },
    /// AndroidManifest analysis.
    Manifest {
        /// Path to APK file.
        path: PathBuf,
    },
    /// APK signing info (v1 cert + v2/v3/v4 blocks + findings).
    Signing {
        /// Path to APK file.
        path: PathBuf,
    },
    /// Generate Frida hook stubs for functions matching a string pattern.
    Frida {
        /// Path to APK, HBC, or DEX file.
        path: PathBuf,
        /// Regex pattern; functions referencing matching strings get hooks.
        #[arg(short, long)]
        search: String,
    },
    /// Diff two Hermes bundles (old vs new).
    Diff {
        /// Baseline (old) APK/HBC file.
        old: PathBuf,
        /// Candidate (new) APK/HBC file.
        new: PathBuf,
    },

    /// Emulator-based string deobfuscation: run a target DEX method
    /// with concrete argument sets and collect recovered plaintext strings.
    ///
    /// Returns `{target, pairs, summary, _meta}`. Use `--int-range 0..255`
    /// to enumerate all single-integer inputs, or `--args-json` for
    /// explicit multi-argument invocations. See `--help` for examples.
    #[command(name = "deobf-strings")]
    DeobfStrings {
        #[command(flatten)]
        args: DeobfStringsArgs,
    },

    // ── Grouped subcommands (power-user, CLI-only) ───────────────

    /// Hermes bytecode primitives.
    Hbc {
        #[command(subcommand)]
        command: HbcCommands,
    },
    /// DEX bytecode primitives.
    Dex {
        #[command(subcommand)]
        command: DexCommands,
    },
    /// APK container inspection.
    Inspect {
        #[command(subcommand)]
        command: InspectCommands,
    },
    /// Scanning and extraction tools.
    Scan {
        #[command(subcommand)]
        command: ScanCommands,
    },
    /// Multi-APK corpus operations.
    Corpus {
        #[command(subcommand)]
        command: CorpusCommands,
    },
    /// Crash-bundle triage helpers (panic-hook diag → adversarial fixture).
    Triage {
        #[command(subcommand)]
        command: TriageCommands,
    },
}

fn main() -> ExitCode {
    droidsaw_common::diag::init();
    let cli = Cli::parse();
    if cli.single_thread {
        // Configure rayon to use exactly one worker thread. `build_global` can
        // only be called once per process and must run before any rayon work,
        // so it lives here at the top of main().
        //
        // `--single-thread` is a precondition, not a hint: the differential-
        // test correctness claim depends on the audit actually running
        // serially. If `build_global()` returns Err AND the resulting pool
        // is not size 1, the flag cannot be honored — fail loudly rather
        // than emit findings the operator believes are serial when they
        // aren't. Documented workaround: set `RAYON_NUM_THREADS=1` in the
        // environment, or remove the conflicting env var.
        if rayon::ThreadPoolBuilder::new()
            .num_threads(1)
            .build_global()
            .is_err()
            && rayon::current_num_threads() != 1
        {
            eprintln!(
                "error: --single-thread cannot be honored — rayon pool is already \
                 initialized with {} threads (likely RAYON_NUM_THREADS env var). Set \
                 RAYON_NUM_THREADS=1 before invoking, or remove the conflicting env var.",
                rayon::current_num_threads()
            );
            return ExitCode::from(2);
        }
    }
    let budget_spec = BudgetSpec {
        mem_bytes: cli.budget_mem.unwrap_or(
            droidsaw_common::budget::DEFAULT_BUDGET_MEM_BYTES,
        ),
        time_secs: cli.budget_time_secs,
        no_auto_splits: cli.no_auto_splits,
    };
    let permissive_recovery_opts = match resolve_permissive_recovery_opts(&cli.permissive_recovery) {
        Ok(d) => d,
        Err(msg) => {
            eprintln!("{msg}");
            return ExitCode::from(2);
        }
    };
    let (operation, result) = dispatch(cli.command, budget_spec, permissive_recovery_opts);
    match result {
        Ok(()) => ExitCode::SUCCESS,
        Err(e) => error::emit(&e, operation),
    }
}

fn dispatch(
    command: Commands,
    budget_spec: BudgetSpec,
    permissive_recovery_opts: droidsaw_apk::PermissiveRecoveryOpts,
) -> (&'static str, anyhow::Result<()>) {
    fn run<F>(
        path: &std::path::Path,
        budget_spec: BudgetSpec,
        permissive_recovery_opts: droidsaw_apk::PermissiveRecoveryOpts,
        f: F,
    ) -> anyhow::Result<()>
    where
        F: FnOnce(&CrossLayerContext) -> anyhow::Result<serde_json::Value>,
    {
        let hash = CrossLayerContext::input_hash(path)?;
        let mut budget = budget_spec.make_budget();
        droidsaw_common::diag::with_input_hash(&hash, || {
            let mut ctx = CrossLayerContext::parse_with_splits(
                path,
                budget_spec.no_auto_splits,
                Some(&mut budget),
            )?;
            ctx.permissive_recovery = permissive_recovery_opts;
            let value = f(&ctx)?;
            commands::print_json(&value)
        })
    }

    match command {
        // ── Top-level ────────────────────────────────────────────

        Commands::Info { path } => ("info", run(&path, budget_spec, permissive_recovery_opts, commands::info)),

        Commands::Audit {
            path,
            entropy,
            output,
            no_semgrep,
            format,
            mode,
            no_update_db,
            acquired_from,
            operator,
            case_ref,
            acquired_at,
            stix_feed,
            semgrep_args,
        } => (
            "audit",
            // `--format unsigned-evidence` short-circuits to the threat-model
            // pipeline: collect findings, write envelope + NDJSON + canonical
            // sqlite DB into `--output`. Default format keeps the existing
            // JSON-on-stdout shape.
            if format.as_deref() == Some("unsigned-evidence") {
                (|| -> anyhow::Result<()> {
                    let out_dir = output
                        .as_ref()
                        .ok_or_else(|| anyhow::anyhow!(
                            "--format=unsigned-evidence requires --output <dir>"
                        ))?;
                    let acquisition = droidsaw::threat_model::parse_acquisition_metadata(
                        acquired_from.as_deref(),
                        operator.as_deref(),
                        case_ref.as_deref(),
                        acquired_at.as_deref(),
                    )?;
                    // Load STIX bundles eagerly — surfacing parse errors
                    // here (before the audit pipeline runs) gives operators
                    // immediate feedback on a malformed feed instead of
                    // burning a full audit pass first.
                    let stix_indicators =
                        droidsaw::threat_model::stix::load_indicators_dedup(&stix_feed)?;
                    let hash = CrossLayerContext::input_hash(&path)?;
                    let mut budget = budget_spec.make_budget();
                    droidsaw_common::diag::with_input_hash(&hash, || {
                        let mut ctx = CrossLayerContext::parse_with_splits(
                            &path,
                            budget_spec.no_auto_splits,
                            Some(&mut budget),
                        )?;
                        ctx.permissive_recovery = permissive_recovery_opts;
                        let findings = commands::collect_findings(&ctx, entropy)?;
                        let paths = droidsaw::threat_model::write_unsigned_evidence(
                            &findings,
                            &acquisition,
                            env!("CARGO_PKG_VERSION"),
                            out_dir,
                        )?;
                        // One-line JSON summary on stdout — bench-friendly +
                        // agent-friendly. Includes loaded indicator count so
                        // the operator can verify the STIX feed parsed.
                        let summary = serde_json::json!({
                            "envelope_json": paths.envelope_json,
                            "findings_ndjson": paths.findings_ndjson,
                            "findings_db": paths.findings_db,
                            "finding_count": paths.finding_count,
                            "finding_set_hash": paths.finding_set_hash,
                            "stix_indicator_count": stix_indicators.len(),
                        });
                        commands::print_json(&summary)
                    })
                })()
            } else if let Some(unknown) = format {
                Err(anyhow::anyhow!("unknown --format value: {unknown:?}"))
            } else {
                // Modular-mode dispatch. `--no-semgrep` (legacy) and
                // `--mode=basic|trufflehog` both skip semgrep extraction.
                // Trufflehog subprocess + DB-write semantics live in the
                // MCP `run_core_audit` path; the CLI surface today emits
                // a JSON envelope with extraction metadata only, so the
                // CLI's full/semgrep modes match the prior "all-or-nothing"
                // shape (audit_full). The `no_update_db` flag is plumbed
                // through to the future `write_findings_db_with_run`
                // hook; today the CLI does not persist the audit JSON to
                // SQLite (that is the MCP audit handler's job), so the
                // flag is recorded for parity but does not change the
                // CLI emit. ParseBudget threads through `run` to bound
                // memory/time on adversarial input.
                (|| -> anyhow::Result<()> {
                    let parsed_mode = droidsaw_cli_contract::AuditMode::from_cli_str(&mode)
                        .ok_or_else(|| anyhow::anyhow!(
                            "unknown --mode value: {mode:?} (expected basic|full|semgrep|trufflehog)"
                        ))?;
                    // `--no-semgrep` is back-compat with scripts pre-dating
                    // `--mode`. It hard-overrides to a no-semgrep variant.
                    let effective_mode = if no_semgrep {
                        match parsed_mode {
                            droidsaw_cli_contract::AuditMode::Full => droidsaw_cli_contract::AuditMode::Trufflehog,
                            droidsaw_cli_contract::AuditMode::Semgrep => droidsaw_cli_contract::AuditMode::Basic,
                            m => m,
                        }
                    } else {
                        parsed_mode
                    };
                    let _ = no_update_db; // recorded for parity; future CLI sqlite-persist hook.
                    if effective_mode.runs_semgrep() {
                        run(&path, budget_spec, permissive_recovery_opts, |ctx| {
                            commands::audit_full_with_mode(
                                ctx,
                                entropy,
                                output.as_deref(),
                                &semgrep_args,
                                effective_mode,
                            )
                        })
                    } else {
                        // Drop semgrep_args silently when the mode skips
                        // semgrep — passing them with `--mode=basic` is
                        // not an error (caller may have a script that
                        // always sets them); they just don't fire. Pass
                        // the actual mode through so the `detectors`
                        // field reports `--mode=trufflehog` correctly
                        // (CLI dispatch for trufflehog is not yet
                        // wired; the field tags it `not_wired_into_cli`
                        // for that mode).
                        let _ = &semgrep_args;
                        run(&path, budget_spec, permissive_recovery_opts, |ctx| {
                            commands::audit_light_with_mode(ctx, entropy, effective_mode)
                        })
                    }
                })()
            },
        ),

        Commands::Decompile { path, target, js, all, class_index, search, out, overwrite } => (
            "decompile",
            // --all has three output shapes:
            // - `--out <dir>`: structured layout tree (per-DEX subdirs +
            //   per-function HBC files + meta.json + strings/), via
            //   `bulk_emit_to_dir`. Handles DEX, HBC, and hybrid inputs.
            // - `--js` (no --out): concatenated HBC JS to stdout.
            // - else: JSON envelope to stdout.
            // All other modes return JSON and go through `run`.
            if all && target.is_none() && class_index.is_none() && search.is_none() {
                (|| -> anyhow::Result<()> {
                    let hash = CrossLayerContext::input_hash(&path)?;
                    let mut budget = budget_spec.make_budget();
                    droidsaw_common::diag::with_input_hash(&hash, || {
                        let mut ctx = CrossLayerContext::parse_with_splits(
                            &path,
                            budget_spec.no_auto_splits,
                            Some(&mut budget),
                        )?;
                        ctx.permissive_recovery = permissive_recovery_opts;
                        if let Some(dir) = out.as_deref() {
                            if js {
                                anyhow::bail!(
                                    "--js outputs HBC to stdout; --out writes a layout tree to disk. \
                                     Choose one: drop --js for the layout tree, or drop --out for HBC-to-stdout."
                                );
                            }
                            let cmd_line = format!(
                                "decompile --all --out {}{}",
                                dir.display(),
                                if overwrite { " --overwrite" } else { "" },
                            );
                            let meta = commands::bulk_emit_to_dir(&ctx, dir, overwrite, &path, cmd_line)?;
                            commands::print_json(&meta)
                        } else if js {
                            // AuditFormat::HbcJs: concatenated raw JS to
                            // stdout; sentinel emitted on stderr via the
                            // `progress!` macro at end of clean run. The
                            // progress-prefix form makes the sentinel
                            // spoof-resistant against attacker-controlled
                            // string interpolation in other `progress!`
                            // calls (the prefix `droidsaw: ` is only
                            // produced by the macro, not by payload
                            // content).
                            let stdout = std::io::stdout();
                            let mut sink = stdout.lock();
                            commands::decompile_hbc_all_js_stream(&ctx, &mut sink)?;
                            drop(sink);
                            // The HbcJs sentinel is the full line
                            // `droidsaw: DROIDSAW_HBC_JS_END\n` — exactly
                            // what `commands::progress!`'s expansion
                            // (`eprintln!("droidsaw: {}", ...)`) produces
                            // for the token. Only this dedicated final
                            // call can produce that exact line;
                            // attacker-interpolated strings in other
                            // `progress!` calls get `droidsaw: <prefix>:
                            // <attacker>` which doesn't match
                            // `ends_with(sentinel)`. Emitted via
                            // `eprintln!` directly (not the macro, which
                            // is module-local to `commands`) so the
                            // byte-exact contract below is enforced here.
                            eprintln!("droidsaw: DROIDSAW_HBC_JS_END");
                            Ok(())
                        } else {
                            let value = commands::decompile(&ctx, None, js, true)?;
                            commands::print_json(&value)?;
                            // RAII sentinel emit: `SentinelGuard::new` starts a
                            // pending emit session; `commit()` writes the
                            // sentinel + flushes. Any early-return via `?`
                            // above this point leaves the guard undropped and
                            // un-committed → Drop runs silently, no sentinel
                            // on the stream, bench correctly registers the
                            // run as truncated.
                            //
                            // Layer-aware sentinel: DEX-only inputs reach this
                            // branch when `--all` runs without `--out` (the
                            // `dex_decompile_all` file-emit gate above didn't
                            // fire); HBC-only inputs reach it when `--all`
                            // runs without `--js`. Pick the matching
                            // AuditFormat so bench can distinguish envelopes
                            // by sentinel without parsing payload.
                            let stdout = std::io::stdout();
                            let mut sink = stdout.lock();
                            let format = if !ctx.dex.is_empty() {
                                droidsaw_cli_contract::AuditFormat::DexJson
                            } else {
                                droidsaw_cli_contract::AuditFormat::HbcJson
                            };
                            let guard = droidsaw_cli_contract::SentinelGuard::new(&mut sink, format);
                            guard.commit()?;
                            Ok(())
                        }
                    })
                })()
            } else {
                run(&path, budget_spec, permissive_recovery_opts, |ctx| {
                    if class_index.is_some() || search.is_some() {
                        commands::dex_decompile(ctx, class_index, search.as_deref())
                    } else if let Some(ref t) = target {
                        // Dispatch on the parsed input's namespace, not on
                        // the target string's shape. See `commands::DecompileRoute`.
                        match commands::classify_decompile_target(ctx, t)? {
                            commands::DecompileRoute::DexClass(s) => {
                                commands::dex_decompile(ctx, None, Some(s))
                            }
                            commands::DecompileRoute::HbcFunction(s) => {
                                commands::decompile(ctx, Some(s), js, all)
                            }
                        }
                    } else {
                        commands::decompile(ctx, None, js, false)
                    }
                })
            },
        ),

        Commands::Strings { path, search, min_length, limit, layer } => (
            "strings",
            run(&path, budget_spec, permissive_recovery_opts, |ctx| {
                commands::strings(ctx, search.as_deref(), min_length, limit, layer.as_deref())
            }),
        ),

        Commands::Xrefs { path, search, limit } => (
            "xrefs",
            run(&path, budget_spec, permissive_recovery_opts, |ctx| commands::xrefs(ctx, search.as_deref(), limit)),
        ),

        Commands::Manifest { path } => ("manifest", run(&path, budget_spec, permissive_recovery_opts, commands::manifest)),
        Commands::Signing { path } => ("signing", run(&path, budget_spec, permissive_recovery_opts, commands::signing)),

        Commands::Frida { path, search } => (
            "frida",
            run(&path, budget_spec, permissive_recovery_opts, |ctx| commands::frida(ctx, &search)),
        ),

        Commands::Diff { old, new } => (
            "diff",
            run(&old, budget_spec, permissive_recovery_opts, |ctx| commands::diff(ctx, &new)),
        ),

        Commands::DeobfStrings { args } => (
            "deobf-strings",
            run(&args.path.clone(), budget_spec, permissive_recovery_opts, |ctx| {
                commands::deobf_strings(ctx, &args)
            }),
        ),

        // ── Grouped subcommands ──────────────────────────────────

        Commands::Hbc { command } => (
            "hbc",
            match command {
                // Disassemble is the one Hbc subcommand that emits plain
                // text on stdout (a deterministic instruction stream
                // consumed by external-oracle differential parsers like
                // hbcdump). Everything else routes through the JSON
                // envelope.
                HbcCommands::Disassemble { path } => {
                    let stdout = std::io::stdout();
                    let mut sink = stdout.lock();
                    commands::hbc_disassemble(&path, &mut sink)
                }
                other => commands::hbc(other).and_then(|v| commands::print_json(&v)),
            },
        ),

        Commands::Dex { command } => (
            "dex",
            commands::dex(command).and_then(|v| commands::print_json(&v)),
        ),

        Commands::Inspect { command } => (
            "inspect",
            dispatch_inspect(command, budget_spec, permissive_recovery_opts),
        ),

        Commands::Scan { command } => (
            "scan",
            dispatch_scan(command, budget_spec, permissive_recovery_opts),
        ),

        Commands::Corpus { command } => (
            "corpus",
            dispatch_corpus(command),
        ),

        Commands::Triage { command } => (
            "triage",
            dispatch_triage(command),
        ),
    }
}

fn dispatch_triage(command: TriageCommands) -> anyhow::Result<()> {
    match command {
        TriageCommands::Promote(args) => {
            let value = commands::triage::promote(args)?;
            commands::print_json(&value)
        }
    }
}

fn dispatch_inspect(
    command: InspectCommands,
    budget_spec: BudgetSpec,
    permissive_recovery_opts: droidsaw_apk::PermissiveRecoveryOpts,
) -> anyhow::Result<()> {
    let ctx_run = |path: &std::path::Path, f: CtxJsonFn| {
        let hash = CrossLayerContext::input_hash(path)?;
        let mut budget = budget_spec.make_budget();
        droidsaw_common::diag::with_input_hash(&hash, || {
            let mut ctx = CrossLayerContext::parse_with_splits(
                path,
                budget_spec.no_auto_splits,
                Some(&mut budget),
            )?;
            ctx.permissive_recovery = permissive_recovery_opts;
            commands::print_json(&f(&ctx)?)
        })
    };
    match command {
        InspectCommands::Entries { path, search, limit } =>
            ctx_run(&path, Box::new(move |ctx| commands::entries(ctx, search.as_deref(), limit))),
        InspectCommands::Elf { path, search } =>
            ctx_run(&path, Box::new(move |ctx| commands::elf(ctx, search.as_deref()))),
        InspectCommands::Resources { path, search, limit } =>
            ctx_run(&path, Box::new(move |ctx| commands::resources(ctx, search.as_deref(), limit))),
        InspectCommands::Webview { path, search, extract } =>
            ctx_run(&path, Box::new(move |ctx| commands::webview_assets(ctx, search.as_deref(), extract.as_deref()))),
    }
}

fn dispatch_scan(
    command: ScanCommands,
    budget_spec: BudgetSpec,
    permissive_recovery_opts: droidsaw_apk::PermissiveRecoveryOpts,
) -> anyhow::Result<()> {
    let ctx_run = |path: &std::path::Path, f: CtxJsonFn| {
        let hash = CrossLayerContext::input_hash(path)?;
        let mut budget = budget_spec.make_budget();
        droidsaw_common::diag::with_input_hash(&hash, || {
            let mut ctx = CrossLayerContext::parse_with_splits(
                path,
                budget_spec.no_auto_splits,
                Some(&mut budget),
            )?;
            ctx.permissive_recovery = permissive_recovery_opts;
            commands::print_json(&f(&ctx)?)
        })
    };
    match command {
        ScanCommands::Yara { path, rules, target, limit } =>
            ctx_run(&path, Box::new(move |ctx| commands::yara(ctx, None, rules.as_deref(), &target, limit))),
        ScanCommands::Sbom { path } =>
            ctx_run(&path, Box::new(commands::sbom)),
        ScanCommands::Trufflehog { path, min_length } => {
            let hash = CrossLayerContext::input_hash(&path)?;
            let mut budget = budget_spec.make_budget();
            droidsaw_common::diag::with_input_hash(&hash, || {
                let mut ctx = CrossLayerContext::parse_with_splits(
                    &path,
                    budget_spec.no_auto_splits,
                    Some(&mut budget),
                )?;
                ctx.permissive_recovery = permissive_recovery_opts;
                let stdout = std::io::stdout();
                let mut out = stdout.lock();
                commands::trufflehog(&ctx, min_length, &mut out)
            })
        }
        ScanCommands::Semgrep { path, output, persist, db, semgrep_args } =>
            ctx_run(&path, Box::new(move |ctx| {
                commands::scan_semgrep(ctx, output.as_deref(), &semgrep_args, persist, db.as_deref())
            })),
        ScanCommands::Export { path, output } =>
            ctx_run(&path, Box::new(move |ctx| commands::export(ctx, &output))),
    }
}

fn dispatch_corpus(command: CorpusCommands) -> anyhow::Result<()> {
    match command {
        CorpusCommands::Ingest { paths, output, tag, no_skip_existing } =>
            commands::corpus_ingest(&paths, &output, tag.as_deref(), !no_skip_existing)
                .and_then(|v| commands::print_json(&v)),
        CorpusCommands::Scan { paths, min_severity } => {
            let stdout = std::io::stdout();
            let mut out = stdout.lock();
            commands::scan_corpus(&paths, &min_severity, &mut out)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::resolve_permissive_recovery_opts;

    #[test]
    fn permissive_recovery_empty_yields_default() {
        let opts = resolve_permissive_recovery_opts(&[]).expect("empty must succeed");
        assert!(!opts.accept_multi_root);
        assert!(!opts.accept_unclosed_elements);
        assert!(!opts.accept_orphan_end_element);
    }

    #[test]
    fn permissive_recovery_single_known_opt() {
        let opts = resolve_permissive_recovery_opts(&["multi_root".into()])
            .expect("known opt must succeed");
        assert!(opts.accept_multi_root);
        assert!(!opts.accept_unclosed_elements);
        assert!(!opts.accept_orphan_end_element);
    }

    #[test]
    fn permissive_recovery_multiple_known_opts() {
        let opts = resolve_permissive_recovery_opts(&[
            "multi_root".into(),
            "orphan_end_element".into(),
        ])
        .expect("multiple known opts must succeed");
        assert!(opts.accept_multi_root);
        assert!(!opts.accept_unclosed_elements);
        assert!(opts.accept_orphan_end_element);
    }

    #[test]
    fn permissive_recovery_all_alias() {
        let opts = resolve_permissive_recovery_opts(&["all".into()])
            .expect("'all' alias must succeed");
        assert!(opts.accept_multi_root);
        assert!(opts.accept_unclosed_elements);
        assert!(opts.accept_orphan_end_element);
    }

    #[test]
    fn permissive_recovery_unknown_opt_errs_with_menu() {
        let err = resolve_permissive_recovery_opts(&["bogus".into()])
            .expect_err("unknown opt must Err");
        assert!(err.contains("bogus"), "error must name the bad opt: {err}");
        assert!(err.contains("multi_root"), "menu must list known opts: {err}");
        assert!(err.contains("unclosed_elements"));
        assert!(err.contains("orphan_end_element"));
    }
}