use std::fs;
use std::path::{Path, PathBuf};
use std::time::{SystemTime, UNIX_EPOCH};
use anyhow::{anyhow, bail, Context};
use serde_json::{json, Value};
use sha2::{Digest, Sha256};
use crate::context::CrossLayerContext;
const HBC_ID_WIDTH: usize = 6;
pub fn bulk_emit_to_dir(
ctx: &CrossLayerContext,
dir: &Path,
overwrite: bool,
input_path: &Path,
command_line: String,
) -> anyhow::Result<Value> {
if ctx.hbc.is_none() && ctx.dex.is_empty() {
ctx.ensure_hbc_parsed()?;
bail!("no bytecode found in target");
}
let started_at = chrono::Utc::now();
if dir.exists() {
if !dir.is_dir() {
bail!("--out target exists and is not a directory: {}", dir.display());
}
let non_empty = fs::read_dir(dir)
.with_context(|| format!("reading --out dir: {}", dir.display()))?
.next()
.is_some();
if non_empty && !overwrite {
bail!(
"--out dir {} is not empty; pass --overwrite or pick a fresh dir",
dir.display()
);
}
} else {
fs::create_dir_all(dir)
.with_context(|| format!("creating --out dir: {}", dir.display()))?;
}
let input_bytes = fs::read(input_path)
.with_context(|| format!("reading input for sha256: {}", input_path.display()))?;
let input_sha = hex_sha256(&input_bytes);
let input_size = input_bytes.len();
drop(input_bytes);
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
struct ClassEmit {
classes: u64,
methods: u64,
sources_bytes: u64,
}
let per_dex: Vec<anyhow::Result<Option<Value>>> = (0..ctx.dex.len())
.into_par_iter()
.map(|i| -> anyhow::Result<Option<Value>> {
debug_assert!(i < ctx.dex.len(), "minted dex index out of range");
let Some(dex) = ctx.dex.get(i) else {
return Ok(None);
};
let Some(dex_data) = ctx.dex_bytes(i) else {
return Ok(None);
};
let apk_entry = dex_entry_name(ctx, i);
let subdir = sanitize_subdir(&apk_entry);
let dex_root = dir.join("dex").join(&subdir);
let sources_root = dex_root.join("sources");
fs::create_dir_all(&sources_root)
.with_context(|| format!("creating {}", sources_root.display()))?;
let census = droidsaw_dex::r8_inversion::build_trampoline_census(dex);
let class_results: Vec<anyhow::Result<ClassEmit>> = dex
.class_defs
.par_iter()
.enumerate()
.map(|(class_defs_idx, class_def)| -> anyhow::Result<ClassEmit> {
if dex.class_def_is_shadowed(class_defs_idx) {
return Ok(ClassEmit { classes: 0, methods: 0, sources_bytes: 0 });
}
let Some(descriptor) = usize::try_from(class_def.class_idx.0)
.ok()
.and_then(|idx| dex.type_descriptors.get(idx))
.cloned()
else {
return Ok(ClassEmit { classes: 0, methods: 0, sources_bytes: 0 });
};
let Some(rel) = safe_class_file_rel_path(&descriptor) else {
return Ok(ClassEmit { classes: 0, methods: 0, sources_bytes: 0 });
};
let source = droidsaw_dex::classes::decompile_class_with_census(
dex, dex_data, class_def, &census,
);
let full = sources_root.join(&rel);
if let Some(parent) = full.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("creating {}", parent.display()))?;
}
fs::write(&full, source.as_bytes())
.with_context(|| format!("writing {}", full.display()))?;
Ok(ClassEmit {
classes: 1,
methods: count_class_methods(dex, class_def),
sources_bytes: u64::try_from(source.len()).unwrap_or(u64::MAX),
})
})
.collect();
let mut classes_emitted: u64 = 0;
let mut methods_emitted: u64 = 0;
let mut sources_files: u64 = 0;
let mut sources_bytes: u64 = 0;
for r in class_results {
let emit = r?;
classes_emitted = classes_emitted.saturating_add(emit.classes);
methods_emitted = methods_emitted.saturating_add(emit.methods);
sources_files = sources_files.saturating_add(emit.classes);
sources_bytes = sources_bytes.saturating_add(emit.sources_bytes);
}
let dex_sha = hex_sha256(dex_data);
let dex_size = dex_data.len();
Ok(Some(json!({
"apk_entry": apk_entry,
"subdir": subdir,
"sha256": dex_sha,
"size_bytes": dex_size,
"classes_emitted": classes_emitted,
"methods_emitted": methods_emitted,
"formats": {
"sources": {"files": sources_files, "bytes": sources_bytes},
},
})))
})
.collect();
let mut dex_files_meta: Vec<Value> = Vec::with_capacity(ctx.dex.len());
let mut total_dex_classes: u64 = 0;
let mut total_dex_methods: u64 = 0;
for r in per_dex {
let Some(v) = r? else {
continue;
};
let classes = v
.pointer("/classes_emitted")
.and_then(Value::as_u64)
.unwrap_or(0);
let methods = v
.pointer("/methods_emitted")
.and_then(Value::as_u64)
.unwrap_or(0);
total_dex_classes = total_dex_classes.saturating_add(classes);
total_dex_methods = total_dex_methods.saturating_add(methods);
dex_files_meta.push(v);
}
let hbc_meta = if let Some(hbc_owned) = ctx.hbc.as_ref() {
let hbc_root = dir.join("hbc");
fs::create_dir_all(&hbc_root)
.with_context(|| format!("creating {}", hbc_root.display()))?;
let hbc = hbc_owned.hbc();
let function_count = hbc.function_count;
let bundle_bytes = hbc_owned.bytes();
let hbc_sha = hex_sha256(bundle_bytes);
let hbc_size = bundle_bytes.len();
struct HbcEmit {
functions: u64,
js_bytes: u64,
}
let fn_results: Vec<anyhow::Result<HbcEmit>> = (0..function_count)
.into_par_iter()
.map(|fid| -> anyhow::Result<HbcEmit> {
let _drain_guard =
crate::context::HermesFindingDrainGuard::install_discard();
let Some((raw_name, source)) =
super::decompile::decompile_hbc_function(ctx, fid, true)
else {
return Ok(HbcEmit { functions: 0, js_bytes: 0 });
};
let fname = hbc_function_filename(fid, &raw_name, "js");
let full = hbc_root.join(&fname);
fs::write(&full, source.as_bytes())
.with_context(|| format!("writing {}", full.display()))?;
Ok(HbcEmit {
functions: 1,
js_bytes: u64::try_from(source.len()).unwrap_or(u64::MAX),
})
})
.collect();
let mut functions_emitted: u64 = 0;
let mut js_files: u64 = 0;
let mut js_bytes: u64 = 0;
for r in fn_results {
let emit = r?;
functions_emitted = functions_emitted.saturating_add(emit.functions);
js_files = js_files.saturating_add(emit.functions);
js_bytes = js_bytes.saturating_add(emit.js_bytes);
}
Some(json!({
"sha256": hbc_sha,
"size_bytes": hbc_size,
"bundle_version": hbc.opcode_version(),
"functions_emitted": functions_emitted,
"formats": {
"js": {"files": js_files, "bytes": js_bytes},
},
}))
} else {
None
};
let strings_meta = write_strings_dump(ctx, dir)?;
let completed_at = chrono::Utc::now();
let wall_ms = completed_at
.signed_duration_since(started_at)
.num_milliseconds()
.max(0);
let wall_seconds = f64::from(i32::try_from(wall_ms).unwrap_or(i32::MAX)) / 1000.0;
let mut layers = serde_json::Map::new();
if !ctx.dex.is_empty() {
layers.insert(
"dex".to_string(),
json!({
"files": dex_files_meta,
"totals": {
"classes_emitted": total_dex_classes,
"methods_emitted": total_dex_methods,
},
}),
);
}
if let Some(h) = hbc_meta {
layers.insert("hbc".to_string(), h);
}
layers.insert(
"hbc_parse_error".to_string(),
json!(ctx.hbc_parse_error.as_ref().map(|f| f.message())),
);
let apk_meta = ctx.apk.as_ref().map(|apk| {
let manifest = apk.decode_manifest();
let manifest_json = manifest.map(|m| {
json!({
"package": m.package,
"version_code": m.version_code,
"version_name": m.version_name,
"min_sdk": m.min_sdk,
"target_sdk": m.target_sdk,
"target_sdk_int": m.target_sdk_int,
})
});
let signing_json = apk.signing_info().ok().map(|s| {
json!({
"v1_present": s.v1_cert.is_some(),
"v2_present": s.v2_present,
"v3_present": s.v3_present,
"v4_present": s.v4_present,
"source_stamp_present": s.source_stamp_present,
"signers_count": s.signers.len(),
"v1_cert_sha256_fingerprint": s.v1_cert.as_ref().map(|c| c.sha256_fingerprint.clone()),
"v1_cert_subject": s.v1_cert.as_ref().map(|c| c.subject.clone()),
"v1_cert_not_before": s.v1_cert.as_ref().map(|c| c.not_before.clone()),
"v1_cert_not_after": s.v1_cert.as_ref().map(|c| c.not_after.clone()),
})
});
json!({
"manifest": manifest_json,
"signing": signing_json,
})
});
let meta = json!({
"schema_version": 2,
"input": {
"path": input_path.to_string_lossy(),
"sha256": input_sha,
"size_bytes": input_size,
},
"apk": apk_meta,
"droidsaw": {
"version": env!("CARGO_PKG_VERSION"),
"rev": option_env!("DROIDSAW_GIT_REV").unwrap_or("unknown"),
"build_profile": if cfg!(debug_assertions) { "debug" } else { "release" },
},
"command": command_line,
"started_at": started_at.to_rfc3339(),
"completed_at": completed_at.to_rfc3339(),
"wall_seconds": wall_seconds,
"layers": layers,
"strings": strings_meta,
});
let meta_path = dir.join("meta.json");
let serialized = serde_json::to_string_pretty(&meta)?;
fs::write(&meta_path, &serialized)
.with_context(|| format!("writing {}", meta_path.display()))?;
Ok(meta)
}
fn hex_sha256(bytes: &[u8]) -> String {
let mut h = Sha256::new();
h.update(bytes);
let digest = h.finalize();
digest.iter().map(|byte| format!("{byte:02x}")).collect()
}
fn dex_entry_name(ctx: &CrossLayerContext, i: usize) -> String {
if let Some(apk) = ctx.apk.as_ref() {
if let Some(entry) = apk.dex.get(i) {
return entry.name.clone();
}
}
if i == 0 {
return "classes.dex".to_string();
}
format!("classes{}.dex", i.saturating_add(1))
}
fn sanitize_subdir(entry: &str) -> String {
let no_ext = entry.strip_suffix(".dex").unwrap_or(entry);
let sanitized: String = no_ext
.chars()
.map(|c| if c.is_ascii_alphanumeric() || c == '_' || c == '-' { c } else { '_' })
.collect();
if sanitized.is_empty() || sanitized.chars().all(|c| c == '_') {
return "unnamed_dex".to_string();
}
sanitized
}
fn count_class_methods(
dex: &droidsaw_dex::DexFile,
class_def: &droidsaw_dex::ids::ClassDefItem,
) -> u64 {
dex.class_datas
.get(&class_def.class_data_off)
.map(|cd| {
u64::try_from(
cd.direct_methods
.len()
.saturating_add(cd.virtual_methods.len()),
)
.unwrap_or(u64::MAX)
})
.unwrap_or(0)
}
fn safe_class_file_rel_path(descriptor: &str) -> Option<PathBuf> {
let inner = descriptor
.strip_prefix('L')
.and_then(|s| s.strip_suffix(';'))
.unwrap_or(descriptor);
let raw = PathBuf::from(inner);
let mut out = PathBuf::new();
for comp in raw.components() {
match comp {
std::path::Component::Normal(seg) => out.push(seg),
_ => return None,
}
}
if out.as_os_str().is_empty() {
return None;
}
out.set_extension("java");
Some(out)
}
fn hbc_function_filename(fid: u32, raw_name: &str, ext: &str) -> String {
let safe: String = raw_name
.chars()
.map(|c| if c.is_ascii_alphanumeric() || c == '_' || c == '-' { c } else { '_' })
.take(80)
.collect();
format!("f{fid:0width$}_{safe}.{ext}", width = HBC_ID_WIDTH)
}
fn write_strings_dump(ctx: &CrossLayerContext, dir: &Path) -> anyhow::Result<Value> {
let strings_root = dir.join("strings");
fs::create_dir_all(&strings_root)?;
let mut out = serde_json::Map::new();
for (layer, present) in [
("dex", !ctx.dex.is_empty()),
("hbc", ctx.hbc.is_some()),
] {
if !present {
out.insert(layer.to_string(), Value::Null);
continue;
}
let v = super::strings::strings(ctx, None, None, None, Some(layer))?;
let path = strings_root.join(format!("{layer}.txt"));
let entries = v
.get("strings")
.and_then(Value::as_array)
.ok_or_else(|| anyhow!("strings command did not return a `strings` array"))?;
let mut buf = String::new();
let mut lines: u64 = 0;
for entry in entries {
if let Some(s) = entry.get("value").and_then(Value::as_str) {
buf.push_str(s);
buf.push('\n');
lines = lines.saturating_add(1);
}
}
let bytes = u64::try_from(buf.len()).unwrap_or(u64::MAX);
fs::write(&path, &buf)?;
let sha = hex_sha256(buf.as_bytes());
out.insert(
layer.to_string(),
json!({"lines": lines, "bytes": bytes, "sha256": sha}),
);
}
let native_present = ctx
.apk
.as_ref()
.map(|a| !a.native_libs.is_empty())
.unwrap_or(false);
if native_present {
let v = super::strings::strings(ctx, None, None, None, Some("native"))?;
let path = strings_root.join("native.txt");
let entries = v
.get("strings")
.and_then(Value::as_array)
.ok_or_else(|| anyhow!("strings command did not return a `strings` array"))?;
let mut buf = String::new();
let mut lines: u64 = 0;
for entry in entries {
if let Some(s) = entry.get("value").and_then(Value::as_str) {
buf.push_str(s);
buf.push('\n');
lines = lines.saturating_add(1);
}
}
let bytes = u64::try_from(buf.len()).unwrap_or(u64::MAX);
fs::write(&path, &buf)?;
let sha = hex_sha256(buf.as_bytes());
out.insert(
"native".to_string(),
json!({"lines": lines, "bytes": bytes, "sha256": sha}),
);
} else {
out.insert("native".to_string(), Value::Null);
}
Ok(Value::Object(out))
}
#[allow(dead_code, reason = "exported for the SystemTime epoch alias if future commits need raw epoch fallback when chrono is unavailable")]
fn unix_epoch_seconds() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used, reason = "test module")]
mod path_safety_tests {
use super::*;
#[test]
fn safe_class_file_rel_path_accepts_normal_descriptor() {
let p = safe_class_file_rel_path("Lcom/example/Foo;").unwrap();
assert_eq!(p, PathBuf::from("com/example/Foo.java"));
}
#[test]
fn safe_class_file_rel_path_accepts_inner_class_dollar() {
let p = safe_class_file_rel_path("Lcom/example/Foo$Bar;").unwrap();
assert_eq!(p, PathBuf::from("com/example/Foo$Bar.java"));
}
#[test]
fn safe_class_file_rel_path_rejects_parent_dir_traversal() {
assert!(safe_class_file_rel_path("L../etc/passwd;").is_none());
assert!(safe_class_file_rel_path("L../../etc/passwd;").is_none());
assert!(safe_class_file_rel_path("Lcom/../../etc/passwd;").is_none());
}
#[test]
fn safe_class_file_rel_path_rejects_root_absolute() {
assert!(safe_class_file_rel_path("L/etc/passwd;").is_none());
}
#[test]
fn safe_class_file_rel_path_rejects_empty() {
assert!(safe_class_file_rel_path("L;").is_none());
assert!(safe_class_file_rel_path("").is_none());
}
#[test]
fn sanitize_subdir_rejects_dot_traversal() {
assert_ne!(sanitize_subdir("..dex"), "..");
assert_ne!(sanitize_subdir("..dex"), ".");
assert_ne!(sanitize_subdir(".dex"), "");
assert_eq!(sanitize_subdir(".dex"), "unnamed_dex");
assert_eq!(sanitize_subdir("foo.bar.dex"), "foo_bar");
}
#[test]
fn sanitize_subdir_replaces_colon_and_slash() {
assert_eq!(sanitize_subdir("my-split:classes.dex"), "my-split_classes");
assert_eq!(sanitize_subdir("assets/longtail/classes.dex"), "assets_longtail_classes");
}
#[test]
fn hbc_function_filename_caps_at_80_chars_after_sanitize() {
let raw = "a".repeat(200);
let f = hbc_function_filename(42, &raw, "js");
assert_eq!(f.len(), 91, "filename should cap at f<6>_<80 chars>.<ext>; got {f}");
assert!(f.starts_with("f000042_"));
assert!(f.ends_with(".js"));
}
#[test]
fn hbc_function_filename_sanitizes_non_ascii_and_path_chars() {
let f = hbc_function_filename(7, "../etc/passwd\0🤖", "js");
assert_eq!(f, "f000007____etc_passwd__.js");
assert!(!f.contains('/'));
assert!(!f.contains(".."));
}
}