use serde::Serialize;
use serde_json::{json, Value};
use crate::context::CrossLayerContext;
use super::meta;
#[derive(Serialize)]
struct DecompiledFn {
layer: String,
function_id: u32,
name: String,
source: String,
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "HBC slice bounds: u64 sum `.min(hbc_data.len() as u64)` then narrow `as usize` is proven ≤ usize::MAX; `end + 256` bounded by slice cap isize::MAX; `i + 1` dex-layer label bounded by ctx.dex.len(); `class_idx.0 as usize` widens u32→usize losslessly for bounds-checked `.get()` on type_descriptors (matches `dex_decompile_filtered` and `dex_decompile_all` conventions). Note: the single typed-Err site (`fid + 1` range-end at fid..fid+1) is handled with `checked_add` in the typed-Err commit; remaining raw arith in this function is proven-by-construction per above.")]
pub fn decompile(
ctx: &CrossLayerContext,
target: Option<&str>,
js: bool,
all: bool,
) -> anyhow::Result<Value> {
// RAII drain guard: closes the I/O-`?`-before-explicit-drain gap
// (see semgrep.rs for rationale). The function explicitly drains
// findings at end and embeds them in returned JSON; the guard
// catches early-exit paths (`anyhow::bail!`, propagated `?`).
let _drain_guard = crate::context::HermesFindingDrainGuard::install_discard();
let mut out: Vec<DecompiledFn> = Vec::new();
if let Some(hbc_owned) = ctx.hbc.as_ref() {
let hbc = hbc_owned.hbc();
let range = if all {
Some(0..hbc.function_count)
} else if let Some(t) = target {
let fid: u32 = t
.parse()
.map_err(|e| anyhow::anyhow!("invalid function ID: {t}: {e}"))?;
// typed-Err: user-supplied `fid` from CLI parse; `fid == u32::MAX`
// would wrap under raw `fid + 1`, producing an empty range and a
// silent no-op. Surface the overflow as a typed error instead.
let fid_end = fid
.checked_add(1)
.ok_or_else(|| anyhow::anyhow!("function id {fid} exceeds maximum (u32::MAX - 1)"))?;
Some(fid..fid_end)
} else if !ctx.dex.is_empty() {
// Hybrid APK (DEX + HBC) with no HBC selector: skip the HBC
// contribution and let the DEX branch below run. Prevents the
// brief's "no silent drop" gate from being violated when a
// React Native APK has both layers but the operator didn't
// pick an HBC function.
None
} else {
anyhow::bail!("specify a function ID or --all");
};
let hbc_data = hbc_owned.bytes();
// `range = None` is the hybrid-APK skip path (HBC present but no
// HBC selector + DEX also present): emit zero HBC entries and
// let the DEX branch below contribute.
if let Some(range) = range {
for fid in range {
if let Some((fname, source)) = decompile_hbc_one(hbc, hbc_data, fid, js) {
out.push(DecompiledFn {
layer: "hbc".to_string(),
function_id: fid,
name: fname,
source,
});
}
}
}
}
if !ctx.dex.is_empty() {
// Per-class JSON envelope entries: `function_id = class_idx.0`,
// `name = descriptor` (JVM form, e.g. `Lcom/example/Foo;`),
// `source = full Java text for the class (all methods inline)`.
// Iteration + census + shadow gate mirror `dex_decompile_filtered`
// and `dex_decompile_all` so the emitted class set matches what
// `dex classes`, `dex_decompile` (search/class-index), and
// `dex decompile --out <dir>` enumerate. Hybrid APKs (HBC + DEX)
// contribute both layers — the HBC branch above runs first when
// ctx.hbc is Some, this branch runs unconditionally when DEX is
// non-empty, so the operator gets a union envelope rather than a
// silent drop of one layer.
let _ = all;
for (i, dex) in ctx.dex.iter().enumerate() {
let layer = format!("dex{}", i + 1);
let Some(dex_data) = ctx.dex_bytes(i) else {
continue;
};
// Amortize r8_inversion::build_trampoline_census across all
// class_defs in this DEX (mirrors `dex_decompile_filtered` /
// `dex_decompile_all`).
let census = droidsaw_dex::r8_inversion::build_trampoline_census(dex);
for (class_defs_idx, class_def) in dex.class_defs.iter().enumerate() {
// Shadow gate: duplicate-class_idx rows surface as distinct
// entries to the operator while parser-level resolution is
// first-wins. Skip shadowed rows so the envelope set matches
// the canonical resolution (mirrors `dex_decompile_filtered`
// / `dex_decompile_all`).
if dex.class_def_is_shadowed(class_defs_idx) {
continue;
}
// WHY unwrap_or_default vs ok_or_else: per-class JSON paths
// (`dex_decompile_filtered`) use empty-string fallback so a
// single malformed class_idx doesn't sink the whole envelope.
// The text-stream sibling (`dex_decompile_all`) uses
// ok_or_else because partial stdout text is worse than a
// clean Err there. JSON envelopes tolerate one empty
// descriptor; consumers see name="" and can detect the bad
// row from source content.
let descriptor = dex
.type_descriptors
.get(class_def.class_idx.0 as usize)
.cloned()
.unwrap_or_default();
let source = droidsaw_dex::classes::decompile_class_with_census(
dex, dex_data, class_def, &census,
);
out.push(DecompiledFn {
layer: layer.clone(),
function_id: class_def.class_idx.0,
name: descriptor,
source,
});
}
}
}
if ctx.hbc.is_none() && ctx.dex.is_empty() {
// Surface the recorded typed parse error for a present-but-
// unparseable bundle instead of claiming the target carries
// no bytecode at all.
ctx.ensure_hbc_parsed()?;
anyhow::bail!("no bytecode found in target");
}
// Drain decompile-time HermesFinding emissions (see semgrep.rs for
// rationale: thread-local channel, cross-tenant leak defense via
// bundling into the per-command output).
let hermes_findings = CrossLayerContext::drain_hermes_findings();
let count = out.len();
let (hint, related): (&str, &[&str]) = match (ctx.hbc.is_some(), !ctx.dex.is_empty()) {
(true, true) => (
"Hybrid APK envelope: HBC `function_id`-keyed entries (layer=`hbc`) + DEX per-class entries (layer=`dex<n>`); narrow with `droidsaw hbc functions`, `dex classes`, `--target`, `--class-index`, or `--search`",
&["hbc functions", "dex classes", "xrefs", "frida"],
),
(true, false) => (
"--all decompiles every function; use `droidsaw hbc functions` first to pick an ID",
&["hbc functions", "xrefs", "frida"],
),
(false, true) => (
"DEX envelope: one entry per class (function_id=class_idx, name=descriptor, source=Java); use `droidsaw dex classes` to browse or `--class-index`/`--search` to narrow",
&["dex classes", "xrefs", "frida"],
),
// (false, false) is structurally unreachable — the bail above
// fires when both layers are absent, so the function returns
// before reaching this match. Fall through to a no-op hint
// rather than `unreachable!` to keep the panic-class lint clean.
(false, false) => ("", &[][..]),
};
let payload = json!({
"functions": out,
"findings": hermes_findings,
"_meta": meta(count, false, hint, related),
});
Ok(payload)
}
/// Decompile a single HBC function by id. Returns `Some((name, source))`
/// when the full pipeline (decode → CFG → SSA → optimize → structure →
/// emit) succeeds; `None` when the function has size 0, when bounds
/// check the slice fails, or when any stage of the pipeline returns
/// `Err`. Matches the lenient skip-on-failure policy used by every
/// other HBC bulk path.
///
/// `js` selects the emit format: `true` → `emit_js` (valid JavaScript),
/// `false` → `structured.emit` (a higher-level pseudocode form).
///
/// Caller is responsible for HermesFinding drain-guard installation
/// (this fn does not install one) — needed when the per-function call
/// happens inside an outer loop that handles drain semantics once at
/// the loop boundary.
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, clippy::cast_possible_truncation, reason = "PROOF: u32 offset+size widened via u64::from cannot overflow u64; .min(hbc_data.len() as u64) bounded by an addressable buffer (≤ usize::MAX on supported targets); `end + 256` capped by `.min(hbc_data.len())`; `f.offset as usize` widens u32→usize losslessly on 64-bit targets and is bounds-checked by the subsequent `.get(...)` slice.")]
pub(super) fn decompile_hbc_one(
hbc: &droidsaw_hermes::parser::HbcFile<'_>,
hbc_data: &[u8],
fid: u32,
js: bool,
) -> Option<(String, String)> {
let f = hbc.function_get(fid);
if f.size == 0 {
return None;
}
let end = (u64::from(f.offset) + u64::from(f.size)).min(hbc_data.len() as u64) as usize;
let code_end = (end + 256).min(hbc_data.len());
let code = hbc_data.get(f.offset as usize..code_end)?;
let decode_slice = hbc_data.get(f.offset as usize..end)?;
let instructions = droidsaw_hermes::decompile::decode::decode_function(
decode_slice,
hbc.opcode_version(),
)
.ok()?;
let exc_count = hbc.function_exception_count(fid);
let mut exc_handlers = Vec::new();
for i in 0..exc_count {
let eh = hbc.function_exception_get(fid, i);
exc_handlers.push(droidsaw_hermes::decompile::cfg::ExcHandler {
start: eh.start,
end: eh.end,
target: eh.target,
});
}
let cfg = droidsaw_hermes::decompile::cfg::Cfg::build(&instructions, &exc_handlers, code).ok()?;
let ssa = droidsaw_hermes::decompile::ssa::build_ssa(&cfg, f.frame_size).ok()?;
let get_str = |id: u32| -> String {
if id < hbc.string_count {
hbc.string_as_str_or_empty(id).into_owned()
} else {
format!("<{id}>")
}
};
let get_literal = |a: u8, b: u32, c: u32, d: u32| -> (u8, u32, i32, f64) {
let v = hbc.literal_get(a, b, c, d);
(v.tag, v.str_id, v.ival, v.dval)
};
let get_shape = |i: u32| -> (u32, u32) {
match hbc.object_shape_get(i) {
Some(s) => (s.key_buffer_offset, s.num_props),
None => (0, 0),
}
};
let get_func_name = |fid2: u32| -> String {
if fid2 < hbc.function_count {
let fi = hbc.function_get(fid2);
if fi.name_id < hbc.string_count {
return hbc.string_as_str_or_empty(fi.name_id).into_owned();
}
}
String::new()
};
let get_bigint = |idx: u32| -> Option<String> { hbc.bigint_as_str(idx) };
let ssa = droidsaw_hermes::decompile::optimize::optimize(
ssa,
&get_str,
&get_literal,
&get_shape,
&get_func_name,
&get_bigint,
);
let fname = if f.name_id < hbc.string_count {
hbc.string_as_str_or_empty(f.name_id).into_owned()
} else {
String::new()
};
let exc_map: std::collections::BTreeMap<u32, u32> = cfg
.blocks
.values()
.filter_map(|b| b.exc_handler.map(|h| (b.id, h)))
.collect();
let structured = droidsaw_hermes::decompile::structure::structure_function_with_exc(
&ssa,
fname.clone(),
f.param_count,
f.flags,
&exc_map,
);
let source = if js {
droidsaw_hermes::decompile::emit::emit_js(&structured, &get_str)
} else {
structured.emit(&get_str)
};
Some((fname, source))
}
/// Public per-function HBC decompile for callers that iterate fids
/// themselves (`bulk_emit`-style). Skips drain-guard install per call —
/// the caller is responsible for guard lifetime at the outer loop. Avoids
/// the JSON envelope reconstruction cycle that calling `decompile()` per
/// fid would incur (the dominant cost when emitting all functions).
pub fn decompile_hbc_function(
ctx: &CrossLayerContext,
fid: u32,
js: bool,
) -> Option<(String, String)> {
let hbc_owned = ctx.hbc.as_ref()?;
decompile_hbc_one(hbc_owned.hbc(), hbc_owned.bytes(), fid, js)
}
/// Streaming variant of `decompile` for the `AuditFormat::HbcJs` contract:
/// writes concatenated JavaScript source directly to `sink`, one function
/// per block with a `// ===function <fid> (<name>)===\n` delimiter header.
/// No JSON envelope. No intermediate `Vec<DecompiledFn>` collection —
/// per-function emit flushes to the caller's writer so 368 MB React
/// Native decompiles never materialize in the Rust heap.
///
/// Caller (main.rs) invokes this when `--all --js <hbc>` is requested
/// AND input is HBC (not DEX). DEX bulk-decompile routes through the
/// separate `dex_decompile_all` path.
///
/// Errors: returns anyhow Err on I/O failure writing to `sink`. Per-
/// function decode/CFG/SSA failures are silently skipped (same policy
/// as the JSON-path `decompile` function) — partial emit is acceptable
/// for HbcJs, and bench's sentinel-on-stderr gate catches absent-
/// sentinel mid-stream crashes.
//
// PROOF: `f.offset as u64 + f.size as u64` is u32+u32 → u64, cannot
// overflow u64; `.min(hbc_data.len() as u64) as usize` is bounded by
// the input's payload size (≤ usize::MAX).
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`f.offset as u64 + f.size as u64` is u32+u32 → u64, cannot overflow u64; `.min(hbc_data.len() as u64) as usize` is bounded by the input's payload size (≤ usize::MAX).")]
pub fn decompile_hbc_all_js_stream(
ctx: &CrossLayerContext,
sink: &mut dyn std::io::Write,
) -> anyhow::Result<()> {
// RAII drain guard: critical for the streaming variant — `writeln!`
// / `sink.write_all` / `sink.flush?` can fail mid-loop (SIGPIPE
// when consumer hangs up the pipe; ENOSPC if writing to a
// bounded sink). Without this guard, the in-flight findings from
// optimize::optimize() leak into the next bundle on the same
// blocking-pool worker.
let _drain_guard = crate::context::HermesFindingDrainGuard::install_discard();
let Some(hbc_owned) = ctx.hbc.as_ref() else {
// Concatenated-JS emit names the hbc layer: a present-but-
// unparseable bundle surfaces its typed parse error.
ctx.ensure_hbc_parsed()?;
anyhow::bail!(
"no HBC payload in target; concatenated-JS emit requires Hermes bytecode"
);
};
let hbc = hbc_owned.hbc();
let hbc_data = hbc_owned.bytes();
for fid in 0..hbc.function_count {
let f = hbc.function_get(fid);
if f.size == 0 {
continue;
}
#[allow(
clippy::cast_possible_truncation,
reason = "PROOF: bounded by hbc_data.len() via .min(); usize→u64→usize roundtrip lossless on every supported target."
)]
let end = (u64::from(f.offset) + u64::from(f.size)).min(hbc_data.len() as u64) as usize;
let code_end = (end + 256).min(hbc_data.len());
let Some(code) = hbc_data.get(f.offset as usize..code_end) else {
continue;
};
let Some(decode_slice) = hbc_data.get(f.offset as usize..end) else {
continue;
};
let Ok(instructions) = droidsaw_hermes::decompile::decode::decode_function(
decode_slice,
hbc.opcode_version(),
) else {
continue;
};
let exc_count = hbc.function_exception_count(fid);
let mut exc_handlers = Vec::new();
for i in 0..exc_count {
let eh = hbc.function_exception_get(fid, i);
exc_handlers.push(droidsaw_hermes::decompile::cfg::ExcHandler {
start: eh.start,
end: eh.end,
target: eh.target,
});
}
let Ok(cfg) = droidsaw_hermes::decompile::cfg::Cfg::build(&instructions, &exc_handlers, code) else {
continue;
};
let Ok(ssa) = droidsaw_hermes::decompile::ssa::build_ssa(&cfg, f.frame_size) else {
continue;
};
let get_str = |id: u32| -> String {
if id < hbc.string_count {
hbc.string_as_str_or_empty(id).into_owned()
} else {
format!("<{id}>")
}
};
let get_literal = |a: u8, b: u32, c: u32, d: u32| -> (u8, u32, i32, f64) {
let v = hbc.literal_get(a, b, c, d);
(v.tag, v.str_id, v.ival, v.dval)
};
let get_shape = |i: u32| -> (u32, u32) {
match hbc.object_shape_get(i) {
Some(s) => (s.key_buffer_offset, s.num_props),
None => (0, 0),
}
};
let get_func_name = |fid2: u32| -> String {
if fid2 < hbc.function_count {
let fi = hbc.function_get(fid2);
if fi.name_id < hbc.string_count {
return hbc.string_as_str_or_empty(fi.name_id).into_owned();
}
}
String::new()
};
let get_bigint = |idx: u32| -> Option<String> { hbc.bigint_as_str(idx) };
let ssa = droidsaw_hermes::decompile::optimize::optimize(
ssa,
&get_str,
&get_literal,
&get_shape,
&get_func_name,
&get_bigint,
);
let fname = if f.name_id < hbc.string_count {
hbc.string_as_str_or_empty(f.name_id).into_owned()
} else {
String::new()
};
let exc_map: std::collections::BTreeMap<u32, u32> = cfg
.blocks
.values()
.filter_map(|b| b.exc_handler.map(|h| (b.id, h)))
.collect();
let structured = droidsaw_hermes::decompile::structure::structure_function_with_exc(
&ssa,
fname.clone(),
f.param_count,
f.flags,
&exc_map,
);
let source = droidsaw_hermes::decompile::emit::emit_js(&structured, &get_str);
// Per-function delimiter + body. Note: `fname` is the function
// symbol from the HBC string table — unescaped ASCII in practice.
// A malicious HBC could inject newlines or the sentinel token
// into fname; that would land on STDOUT only, not stderr, and
// the sentinel gate runs against stderr per
// `AuditFormat::terminator_channel(HbcJs) == Stderr`.
writeln!(sink, "// ===function {fid} ({fname})===")?;
sink.write_all(source.as_bytes())?;
if !source.ends_with('\n') {
writeln!(sink)?;
}
}
sink.flush()?;
// Streaming variant has no JSON envelope to attach findings to —
// discard at end-of-command to defend against cross-tenant TLS leak
// on tokio `spawn_blocking` worker reuse. The next parse on this
// thread also calls `discard_findings` at entry, so this is belt
// + suspenders.
droidsaw_hermes::finding::discard_findings();
Ok(())
}