droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
use std::path::PathBuf;

use serde_json::{json, Value};

use crate::context::CrossLayerContext;

/// Argument specification for `droidsaw deobf-strings`.
///
/// Exactly one of `--int-range` or `--args-json` must be supplied to
/// define the argument sets. When neither is given the driver runs with
/// an empty set (which produces an empty result — a "method found"
/// sanity check).
#[derive(clap::Args, Debug)]
pub struct DeobfStringsArgs {
    /// Path to APK or DEX file.
    pub path: PathBuf,

    /// Target class in JVM descriptor form (e.g. `Lcom/example/Obf;`).
    #[arg(long, value_name = "DESCRIPTOR")]
    pub class: String,

    /// Target method name (e.g. `decrypt`).
    #[arg(long, value_name = "NAME")]
    pub method: String,

    /// Optional proto shorty string for disambiguation when the class
    /// has overloaded methods with the same name (e.g. `SI`).
    #[arg(long, value_name = "SHORTY")]
    pub proto_shorty: Option<String>,

    /// Enumerate integer arguments in the range `start..=end` (inclusive).
    /// Creates one `[Value::Int(i)]` arg-set per integer in the range.
    /// Cannot be combined with `--args-json`.
    ///
    /// Example: `--int-range 0..255` produces 256 single-int arg-sets.
    #[arg(long, value_name = "START..END", conflicts_with = "args_json")]
    pub int_range: Option<String>,

    /// Explicit arg-sets as a JSON array of arrays.
    /// Each inner array is one invocation; each element is a `Value` object:
    /// `{"Int": 42}`, `{"Str": "hello"}`, `{"Void": null}`.
    /// Cannot be combined with `--int-range`.
    ///
    /// Example: `--args-json '[[{"Int":0}],[{"Int":1}],[{"Int":2}]]'`
    #[arg(long, value_name = "JSON", conflicts_with = "int_range")]
    pub args_json: Option<String>,

    /// Per-invocation instruction-count budget. Default: 10,000.
    #[arg(long, default_value = "10000")]
    pub budget: u32,
}

/// Run the bytecode emulator over a named DEX method with the supplied
/// argument sets to recover plaintext strings.
///
/// Returns JSON:
/// ```json
/// {
///   "target": {"class": "...", "method": "...", "proto_shorty": null},
///   "pairs": [{"args": [...], "plaintext": "..."}],
///   "summary": {
///     "plaintext_count": N,
///     "halt_budget_exceeded_count": N,
///     "unsupported_count": N
///   },
///   "_meta": {"count": N, "truncated": false, "hint": "...", "related": [...]}
/// }
/// ```
///
/// When no pairs are recovered (empty result or zero arg-sets), the
/// function returns a non-error JSON object with an explicit
/// `"plaintext_count": 0` rather than a silent exit 0.
pub fn deobf_strings(
    ctx: &CrossLayerContext,
    args: &DeobfStringsArgs,
) -> anyhow::Result<Value> {
    use droidsaw_dex::emulator::driver::{DeobfDriver, DriverError, MethodTarget};

    // Build the MethodTarget.
    let target = if let Some(s) = args.proto_shorty.as_deref() {
        MethodTarget::with_proto(args.class.clone(), args.method.clone(), s.to_owned())
    } else {
        MethodTarget::new(args.class.clone(), args.method.clone())
    };

    // Build arg-sets from CLI arguments.
    let arg_sets = build_arg_sets(&args.int_range, &args.args_json)?;

    // Find the first DEX layer that contains the target class.
    // Iterate over all DEX layers; the first successful run wins.
    // If every layer returns ClassNotFound, surface that as an error.
    let mut last_err: Option<DriverError> = None;
    for dex in &ctx.dex {
        match DeobfDriver::run(dex, &target, &arg_sets, args.budget) {
            Ok(result) => {
                // Serialise pairs: args are a Vec<EmuValue>; render as JSON.
                let pairs: Vec<Value> = result
                    .plaintext_pairs
                    .iter()
                    .map(|(arg_set, plaintext)| {
                        let args_json: Vec<Value> = arg_set
                            .iter()
                            .map(emu_value_to_json)
                            .collect();
                        json!({
                            "args": args_json,
                            "plaintext": plaintext,
                        })
                    })
                    .collect();

                let plaintext_count = pairs.len();
                let hint = if result.halt_budget_exceeded_count > 0 {
                    format!(
                        "{} runs hit the budget ({} instructions); raise --budget if coverage is partial",
                        result.halt_budget_exceeded_count, args.budget
                    )
                } else if plaintext_count == 0 {
                    "No strings recovered; try --int-range or --args-json with argument values the method expects".to_owned()
                } else {
                    "Use --int-range to enumerate all inputs; check unsupported_count for partial coverage".to_owned()
                };

                return Ok(json!({
                    "target": {
                        "class": args.class,
                        "method": args.method,
                        "proto_shorty": args.proto_shorty,
                    },
                    "pairs": pairs,
                    "summary": {
                        "plaintext_count": plaintext_count,
                        "halt_budget_exceeded_count": result.halt_budget_exceeded_count,
                        "unsupported_count": result.unsupported_count,
                    },
                    "_meta": super::meta(
                        plaintext_count,
                        false,
                        &hint,
                        &["decompile", "strings", "xrefs"],
                    ),
                }));
            }
            Err(DriverError::ClassNotFound { .. }) => {
                last_err = Some(DriverError::ClassNotFound {
                    descriptor: args.class.clone(),
                });
                // Try next DEX layer.
            }
            Err(e) => {
                return Err(anyhow::anyhow!("deobf-strings driver error: {e}"));
            }
        }
    }

    // No DEX layer found the class.
    if ctx.dex.is_empty() {
        anyhow::bail!("no DEX layer found in {}", args.path.display());
    }
    match last_err {
        Some(e) => Err(anyhow::anyhow!("{e}")),
        None => anyhow::bail!("class not found: {}", args.class),
    }
}

/// Parse the `--int-range` or `--args-json` CLI arguments into a
/// `Vec<ArgSet>`.
fn build_arg_sets(
    int_range: &Option<String>,
    args_json: &Option<String>,
) -> anyhow::Result<Vec<Vec<droidsaw_dex::emulator::Value>>> {
    use droidsaw_dex::emulator::Value as EmuValue;

    if let Some(range_str) = int_range.as_deref() {
        // Parse "start..end" or "start..=end" (both treated as inclusive end).
        // `str::find` returns a byte offset; the separators "..=" / ".." are
        // all ASCII, so the byte boundary is always on a char boundary.
        let (start_str, end_str, inclusive) = if let Some(pos) = range_str.find("..=") {
            let end_start = pos.saturating_add(3);
            let s = range_str.get(..pos).ok_or_else(|| {
                anyhow::anyhow!("--int-range: bad start slice in {:?}", range_str)
            })?;
            let e = range_str.get(end_start..).ok_or_else(|| {
                anyhow::anyhow!("--int-range: bad end slice in {:?}", range_str)
            })?;
            (s, e, true)
        } else if let Some(pos) = range_str.find("..") {
            let end_start = pos.saturating_add(2);
            let s = range_str.get(..pos).ok_or_else(|| {
                anyhow::anyhow!("--int-range: bad start slice in {:?}", range_str)
            })?;
            let e = range_str.get(end_start..).ok_or_else(|| {
                anyhow::anyhow!("--int-range: bad end slice in {:?}", range_str)
            })?;
            (s, e, false)
        } else {
            anyhow::bail!(
                "--int-range must be in the form START..END or START..=END; got {:?}",
                range_str
            );
        };
        let start: i32 = start_str
            .parse()
            .map_err(|_e| anyhow::anyhow!("--int-range: START {start_str:?} is not a valid i32: {_e}"))?;
        let end: i32 = end_str
            .parse()
            .map_err(|_e| anyhow::anyhow!("--int-range: END {end_str:?} is not a valid i32: {_e}"))?;

        // Cap to prevent accidental 2^31 allocations.
        const MAX_INT_RANGE: i32 = 1_000_000;
        let range_size = if inclusive {
            end.saturating_sub(start).saturating_add(1)
        } else {
            end.saturating_sub(start)
        };
        if range_size > MAX_INT_RANGE {
            anyhow::bail!(
                "--int-range: range size {range_size} exceeds maximum {MAX_INT_RANGE}; narrow the range"
            );
        }
        if range_size <= 0 {
            return Ok(vec![]);
        }

        let end_exclusive = if inclusive {
            end.saturating_add(1)
        } else {
            end
        };
        // Iterate start..end_exclusive with i32 arithmetic.
        let mut sets: Vec<Vec<EmuValue>> = Vec::new();
        let mut i = start;
        while i < end_exclusive {
            sets.push(vec![EmuValue::Int(i)]);
            i = i.saturating_add(1);
        }
        Ok(sets)
    } else if let Some(json_str) = args_json.as_deref() {
        // Parse JSON array of arrays of Value.
        let outer: serde_json::Value = serde_json::from_str(json_str)
            .map_err(|e| anyhow::anyhow!("--args-json parse error: {e}"))?;
        let arr = outer
            .as_array()
            .ok_or_else(|| anyhow::anyhow!("--args-json must be a JSON array of arrays"))?;
        arr.iter()
            .map(|inner| {
                let inner_arr = inner.as_array().ok_or_else(|| {
                    anyhow::anyhow!("--args-json: each element must be a JSON array")
                })?;
                inner_arr.iter().map(json_to_emu_value).collect()
            })
            .collect()
    } else {
        // No arg-sets provided — run with empty set (class/method presence check).
        Ok(vec![])
    }
}

/// Convert a `droidsaw_dex::emulator::Value` to a `serde_json::Value`.
fn emu_value_to_json(v: &droidsaw_dex::emulator::Value) -> Value {
    use droidsaw_dex::emulator::Value as EmuValue;
    match v {
        EmuValue::Int(i) => json!({"Int": i}),
        EmuValue::Wide(w) => json!({"Wide": w}),
        EmuValue::Str(s) => json!({"Str": s}),
        EmuValue::Array(a) => json!({"Array": a}),
        EmuValue::Void => json!({"Void": null}),
    }
}

/// Parse a `serde_json::Value` into a `droidsaw_dex::emulator::Value`.
fn json_to_emu_value(
    v: &serde_json::Value,
) -> anyhow::Result<droidsaw_dex::emulator::Value> {
    use droidsaw_dex::emulator::Value as EmuValue;
    let obj = v
        .as_object()
        .ok_or_else(|| anyhow::anyhow!("each Value must be a JSON object like {{\"Int\":42}}"))?;
    if let Some(i) = obj.get("Int") {
        let n = i.as_i64().ok_or_else(|| anyhow::anyhow!("Int value must be an integer"))?;
        #[allow(
            clippy::cast_possible_truncation,
            clippy::as_conversions,
            reason = "INTENT: JSON-supplied seed values for the emulator's Int slot. The user-supplied JSON literal is treated as a 32-bit DEX register value (DEX `int` is 32-bit by format); high bits, if any, are explicitly discarded."
        )]
        let truncated = n as i32;
        return Ok(EmuValue::Int(truncated));
    }
    if let Some(w) = obj.get("Wide") {
        let n = w.as_i64().ok_or_else(|| anyhow::anyhow!("Wide value must be an integer"))?;
        return Ok(EmuValue::Wide(n));
    }
    if let Some(s) = obj.get("Str") {
        let s = s.as_str().ok_or_else(|| anyhow::anyhow!("Str value must be a string"))?;
        return Ok(EmuValue::Str(s.to_owned()));
    }
    if obj.contains_key("Void") {
        return Ok(EmuValue::Void);
    }
    anyhow::bail!("unrecognized Value variant in --args-json; expected Int/Wide/Str/Void")
}