droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
use serde::Serialize;
use serde_json::{json, Value};

use crate::context::CrossLayerContext;

use super::meta;

/// Count structural `{` and `}` braces on a single line, ignoring
/// braces that appear inside `"..."` string literals or after a `//`
/// line comment. Block comments (`/* */`) are not handled — they're
/// rare in decompiled output and a stray block comment with `{` would
/// just bias the depth tracker for that one method (the outline cap
/// still bounds total emitted lines).
///
/// Returns `(opens, closes)` so callers can compute depth deltas
/// without re-scanning the line.
fn count_structural_braces(line: &str) -> (usize, usize) {
    let mut opens: usize = 0;
    let mut closes: usize = 0;
    let mut in_str = false;
    let mut escaped = false;
    let bytes = line.as_bytes();
    let mut i = 0;
    while let Some(&c) = bytes.get(i) {
        if escaped {
            escaped = false;
            i = i.saturating_add(1);
            continue;
        }
        if in_str {
            match c {
                b'\\' => escaped = true,
                b'"' => in_str = false,
                _ => {}
            }
        } else {
            // Line-comment detection: `//` outside a string ends the line.
            if c == b'/' && bytes.get(i.saturating_add(1)).copied() == Some(b'/') {
                break;
            }
            match c {
                b'"' => in_str = true,
                b'{' => opens = opens.saturating_add(1),
                b'}' => closes = closes.saturating_add(1),
                _ => {}
            }
        }
        i = i.saturating_add(1);
    }
    (opens, closes)
}

/// Extract the method name from a Java method-signature line (the
/// line carrying the opening `{` of a method body).
///
/// Strategy: take everything before the first `(`, then the last
/// whitespace-separated identifier. Works on:
/// - `    public Minimal(int x) {` → `"Minimal"` (constructor)
/// - `    public static String hello(int n) {` → `"hello"`
/// - `    public int getX() {` → `"getX"`
///
/// Returns `None` if the line has no `(` (e.g. it isn't a method
/// signature) or the prefix has no identifier.
fn extract_method_name(signature_line: &str) -> Option<String> {
    let before_paren = signature_line.split_once('(')?.0;
    before_paren.split_whitespace().last().map(|s| s.to_string())
}

/// Apply the outline-mode cap to a decompiled class source. Keeps the
/// class header + field decls + method signatures + first
/// `lines_per_method` body lines; replaces remaining body lines with
/// a `// ... N more lines elided` marker so the caller knows
/// truncation happened.
///
/// Pure string-rewriter at the MCP boundary; format-specific
/// knowledge stays in the bundle crate. Brace tracking via
/// `count_structural_braces` skips `{`/`}` inside `"..."` literals
/// or after `//`.
pub fn apply_outline_filter(source: &str, lines_per_method: usize) -> String {
    let mut out = String::new();
    let mut depth: usize = 0;
    let mut body_kept: usize = 0;
    let mut body_total: usize = 0;

    for line in source.lines() {
        let (opens, closes) = count_structural_braces(line);
        let new_depth = depth.saturating_add(opens).saturating_sub(closes);
        let was_in_body = depth >= 2;
        let now_in_body = new_depth >= 2;

        if was_in_body && !now_in_body {
            // Exiting a method body. Emit the elision marker if any
            // body lines were dropped, then emit this closing line
            // (which sits at class level after the brace count).
            let elided = body_total.saturating_sub(body_kept);
            if elided > 0 {
                out.push_str(&format!("        // ... {elided} more lines elided\n"));
            }
            out.push_str(line);
            out.push('\n');
            body_kept = 0;
            body_total = 0;
        } else if was_in_body {
            // Inside a method body.
            body_total = body_total.saturating_add(1);
            if body_kept < lines_per_method {
                out.push_str(line);
                out.push('\n');
                body_kept = body_kept.saturating_add(1);
            }
        } else {
            // At class level (or above). Always emit; the line itself
            // may be the method signature opening the body.
            out.push_str(line);
            out.push('\n');
            if !was_in_body && now_in_body {
                body_kept = 0;
                body_total = 0;
            }
        }

        depth = new_depth;
    }

    out
}

/// Apply the methods-filter to a decompiled class source. Drops every
/// method whose name is not in `methods_keep`; preserves the class
/// header, field decls, and any methods whose name matches.
///
/// Method-name match is exact on the last whitespace-separated
/// identifier before `(` (see `extract_method_name`). Overloads with
/// the same name all fire — overload disambiguation by descriptor is
/// out of scope.
pub fn apply_methods_filter(source: &str, methods_keep: &[String]) -> String {
    let mut out = String::new();
    let mut depth: usize = 0;
    let mut skipping = false;
    let mut skip_until_depth: usize = 0;

    for line in source.lines() {
        let (opens, closes) = count_structural_braces(line);
        let new_depth = depth.saturating_add(opens).saturating_sub(closes);

        if skipping {
            // Stay in skip-mode until depth returns to the level we
            // were at when we entered the skipped method.
            if new_depth <= skip_until_depth {
                skipping = false;
            }
            depth = new_depth;
            continue;
        }

        // Detect entering a method body: depth was 1, becomes 2+.
        if depth == 1 && new_depth >= 2 {
            let name = extract_method_name(line);
            let keep = name
                .as_ref()
                .map(|n| methods_keep.iter().any(|m| m == n))
                .unwrap_or(true);
            if !keep {
                skipping = true;
                skip_until_depth = depth;
                depth = new_depth;
                continue;
            }
        }

        out.push_str(line);
        out.push('\n');
        depth = new_depth;
    }

    out
}

#[derive(Serialize)]
struct StringEntry {
    layer: String,
    index: u32,
    /// ELF section name (`.rodata` or `.dynstr`); `null` for DEX/HBC entries.
    #[serde(skip_serializing_if = "Option::is_none")]
    section: Option<&'static str>,
    /// `.so` library name; `null` for DEX/HBC entries.
    #[serde(skip_serializing_if = "Option::is_none")]
    so_name: Option<String>,
    value: String,
    length: usize,
}

#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`dex_idx + 1` is usize+1 bounded by ctx.dex.len() ≤ isize::MAX; `j as u32` valid per DEX format spec capping string pool at u32::MAX.")]
pub fn strings(
    ctx: &CrossLayerContext,
    search: Option<&str>,
    min_length: Option<usize>,
    limit: Option<usize>,
    layer_filter: Option<&str>,
) -> anyhow::Result<Value> {
    // RAII drain guard: hbc.string_as_str_or_empty / string_get can
    // emit hermes findings on adversarial input. See xrefs.rs.
    let _drain_guard = crate::context::HermesFindingDrainGuard::install_discard();

    // `--layer hbc` (and `hbc strings`) name the hbc layer explicitly:
    // a present-but-unparseable bundle is a hard typed error, not an
    // empty result. Unfiltered / other-layer requests stay contained —
    // the broken layer simply contributes no rows.
    if layer_filter == Some("hbc") {
        ctx.ensure_hbc_parsed()?;
    }

    let re = search.map(regex::Regex::new).transpose()?;
    // For native layer, default min_length to 4 (matches `strings` convention);
    // for all other layers default to 0 (preserve existing behaviour).
    let default_min = if layer_filter == Some("native") { 4 } else { 0 };
    let min_len = min_length.unwrap_or(default_min);

    let emit_hbc = layer_filter.is_none_or(|l| l == "hbc");
    let emit_dex = layer_filter.is_none_or(|l| l.starts_with("dex"));
    let emit_native = layer_filter.is_none_or(|l| l == "native");
    let emit_arsc = layer_filter.is_none_or(|l| l == "arsc");

    let mut out: Vec<StringEntry> = Vec::new();
    let mut truncated = false;

    'outer: {
        if emit_hbc
            && let Some(hbc_owned) = ctx.hbc.as_ref()
        {
            let hbc = hbc_owned.hbc();
            for i in 0..hbc.string_count {
                // Lenient policy: CLI render site — corrupted entries
                // produce `""` so the rest of the strings dump still
                // surfaces. Typed signal preserved via the
                // `HermesFinding` side-channel from `string_get` for
                // `audit`-stream consumers.
                let s: String = hbc.string_as_str_or_empty(i).into_owned();
                if s.len() < min_len {
                    continue;
                }
                if let Some(ref re) = re
                    && !re.is_match(&s)
                {
                    continue;
                }
                let len = s.len();
                out.push(StringEntry {
                    layer: "hbc".to_string(),
                    index: i,
                    section: None,
                    so_name: None,
                    value: s,
                    length: len,
                });
                if let Some(cap) = limit
                    && out.len() >= cap
                {
                    truncated = true;
                    break 'outer;
                }
            }
        }
        if emit_dex {
            for (dex_idx, dex) in ctx.dex.iter().enumerate() {
                let layer_label = format!("dex{}", dex_idx + 1);
                // Filter shape (matches the `--layer` CLI help text):
                //   None        — emit every layer
                //   Some("dex") — emit every DEX layer (the wildcard form)
                //   Some("dexN")— emit only the Nth DEX (exact match)
                // The bare "dex" branch was previously absent; with only
                // the exact-match check, `--layer dex` silently emitted 0
                // even though the `emit_dex` gate above had already opted
                // into the DEX branch on the same filter value.
                if let Some(filter) = layer_filter
                    && filter != "dex"
                    && filter != layer_label.as_str()
                {
                    continue;
                }
                for (j, entry) in dex.strings.iter().enumerate() {
                    let s = entry.as_str_lossy();
                    if s.len() < min_len {
                        continue;
                    }
                    if let Some(ref re) = re
                        && !re.is_match(s)
                    {
                        continue;
                    }
                    #[allow(
                        clippy::cast_possible_truncation,
                        reason = "PROOF: j enumerates dex.strings (a DEX string pool); DEX format caps string_ids_size at u32::MAX, so usize→u32 is lossless on every supported target."
                    )]
                    let index = j as u32;
                    out.push(StringEntry {
                        layer: layer_label.clone(),
                        index,
                        section: None,
                        so_name: None,
                        value: s.to_string(),
                        length: s.len(),
                    });
                    if let Some(cap) = limit
                        && out.len() >= cap
                    {
                        truncated = true;
                        break 'outer;
                    }
                }
            }
        }
        if emit_arsc
            && let Some(apk) = ctx.apk.as_ref()
            && let Some(rt) = apk.resources.as_ref()
        {
            for (j, s) in rt.global_strings.iter().enumerate() {
                if s.len() < min_len {
                    continue;
                }
                if let Some(ref re) = re
                    && !re.is_match(s)
                {
                    continue;
                }
                // PROOF: `j` is usize bounded by rt.global_strings.len()
                // ≤ usize::MAX; widening to u32 for the StringEntry index
                #[allow(clippy::as_conversions, clippy::cast_possible_truncation, reason = "`j` is usize bounded by rt.global_strings.len() ≤ usize::MAX; widening to u32 for the StringEntry index truncates by design — large pools (>2³²) are unreachable because the arsc format encodes pool counts in u32.")]
                let idx = j as u32;
                out.push(StringEntry {
                    layer: "arsc".to_string(),
                    index: idx,
                    section: None,
                    so_name: None,
                    value: s.clone(),
                    length: s.len(),
                });
                if let Some(cap) = limit
                    && out.len() >= cap
                {
                    truncated = true;
                    break 'outer;
                }
            }
        }
        if emit_native
            && let Some(apk) = ctx.apk.as_ref()
        {
            'native: for (_lib_path, elf_info) in &apk.elf_info {
                // Derive the .so filename from the lib_path key (format: "apk:lib/abi/libfoo.so")
                let so_name = _lib_path
                    .rsplit('/')
                    .next()
                    .unwrap_or(_lib_path.as_str())
                    .to_string();
                // Emit .rodata strings then .dynstr strings for this library.
                for elf_str in elf_info
                    .rodata_strings
                    .iter()
                    .chain(elf_info.dynstr_strings.iter())
                {
                    let s = &elf_str.value;
                    if s.len() < min_len {
                        continue;
                    }
                    if let Some(ref re) = re
                        && !re.is_match(s)
                    {
                        continue;
                    }
                    out.push(StringEntry {
                        layer: "native".to_string(),
                        index: elf_str.offset,
                        section: Some(elf_str.section),
                        so_name: Some(so_name.clone()),
                        value: s.clone(),
                        length: s.len(),
                    });
                    if let Some(cap) = limit
                        && out.len() >= cap
                    {
                        truncated = true;
                        break 'native;
                    }
                }
            }
        }
    }

    let count = out.len();
    let payload = json!({
        "strings": out,
        "_meta": meta(
            count,
            truncated,
            "use --search to filter, --layer (hbc|dex<N>|native|arsc) to restrict, --limit to cap",
            &["xrefs", "frida", "trufflehog"],
        ),
    });
    Ok(payload)
}