use std::path::PathBuf;
use clap::Subcommand;
use droidsaw_apk::{Manifest, ParseConfig, UnknownChunk};
use droidsaw_common::{entropy::shannon_entropy, Finding, Layer, Severity};
use serde::Serialize;
use serde_json::{json, Value};
use crate::{analysis, context::CrossLayerContext};
pub mod audit_envelope;
pub mod credentials_fp;
pub mod honeycomb_fp;
pub mod triage;
mod frida;
pub use frida::frida;
#[doc(hidden)]
pub use frida::{
__fuzz_build_method_flags, proto_to_overload, smali_field_to_dotted, smali_split_args,
};
mod sbom;
pub use sbom::sbom;
pub mod sbom_openvex;
mod trufflehog;
pub use trufflehog::trufflehog;
mod semgrep;
pub use semgrep::{scan_semgrep, semgrep};
mod diff;
pub use diff::diff;
mod xrefs;
pub use xrefs::xrefs;
mod corpus;
pub use corpus::{corpus_ingest, scan_corpus};
mod deobf_strings;
pub use deobf_strings::{deobf_strings, DeobfStringsArgs};
mod strings;
pub use strings::{apply_methods_filter, apply_outline_filter, strings};
mod decompile;
mod bulk_emit;
pub use decompile::{decompile, decompile_hbc_all_js_stream, decompile_hbc_function};
pub use bulk_emit::bulk_emit_to_dir;
mod export;
pub use export::{
ensure_findings_db_schema, export, finding_signature_hash, migrate_findings_schema_to_current,
triage_finding, write_credentials_db, write_cross_layer_taint_flows_db,
write_finding_xrefs_db, write_findings_db, write_findings_db_with_run, write_semgrep_db,
write_taint_flows_db, write_xrefs_db, FINDINGS_SCHEMA_REV,
};
use export::hex_nibble;
pub use triage::{PromoteArgs, TriageCommands};
pub enum DecompileRoute<'a> {
DexClass(&'a str),
HbcFunction(&'a str),
}
pub fn classify_decompile_target<'a>(
ctx: &CrossLayerContext,
target: &'a str,
) -> anyhow::Result<DecompileRoute<'a>> {
let has_dex = !ctx.dex.is_empty();
let has_hbc = ctx.hbc.is_some();
match (has_dex, has_hbc) {
(false, false) => {
ctx.ensure_hbc_parsed()?;
anyhow::bail!("no bytecode found in target")
}
(true, false) => {
if target.parse::<u32>().is_ok() {
ctx.ensure_hbc_parsed()?;
}
Ok(DecompileRoute::DexClass(target))
}
(false, true) => Ok(DecompileRoute::HbcFunction(target)),
(true, true) => {
let looks_dex = (target.starts_with('L') && target.ends_with(';'))
|| target.contains('.')
|| target.contains('/');
if target.parse::<u32>().is_ok() {
Ok(DecompileRoute::HbcFunction(target))
} else if looks_dex {
Ok(DecompileRoute::DexClass(target))
} else {
anyhow::bail!(
"ambiguous target {target:?} for hybrid DEX+HBC input; \
pass a JVM descriptor (Lcom/foo/Bar;), Java FQCN \
(com.foo.Bar), or a numeric Hermes function id"
)
}
}
}
}
#[doc(hidden)]
pub fn normalize_dex_class_search(s: &str) -> String {
let starts_with_l = s.starts_with('L');
let has_semicolon = s.contains(';');
let has_slash = s.contains('/');
let has_real_regex_chars = s.contains('(')
|| s.contains('[')
|| s.contains('*')
|| s.contains('?')
|| s.contains('^');
if starts_with_l || has_semicolon || has_slash || has_real_regex_chars {
return s.to_string();
}
if s.contains('.') {
let descriptor = format!("L{};", s.replace('.', "/"));
regex::escape(&descriptor)
} else {
format!("{};", regex::escape(s))
}
}
#[derive(Subcommand)]
pub enum HbcCommands {
Info {
path: PathBuf,
},
Functions {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
},
Decompile {
path: PathBuf,
func_id: Option<u32>,
#[arg(long)]
js: bool,
#[arg(long)]
all: bool,
},
Strings {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
},
Disassemble {
path: PathBuf,
},
}
#[derive(Subcommand)]
pub enum DexCommands {
Classes {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
},
Methods {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
#[arg(long)]
implementations: bool,
},
Strings {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
},
}
#[derive(Subcommand)]
pub enum InspectCommands {
Entries {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
#[arg(long)]
limit: Option<usize>,
},
Elf {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
},
Resources {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
#[arg(long)]
limit: Option<usize>,
},
Webview {
path: PathBuf,
#[arg(short, long)]
search: Option<String>,
#[arg(long)]
extract: Option<String>,
},
}
#[derive(Subcommand)]
pub enum ScanCommands {
Yara {
path: PathBuf,
#[arg(short, long)]
rules: Option<PathBuf>,
#[arg(short, long, default_value = "all")]
target: String,
#[arg(long)]
limit: Option<usize>,
},
Sbom { path: PathBuf },
Trufflehog {
path: PathBuf,
#[arg(short = 'n', long, default_value = "8")]
min_length: usize,
},
Semgrep {
path: PathBuf,
#[arg(short, long)]
output: Option<PathBuf>,
#[arg(long)]
persist: bool,
#[arg(long, value_name = "PATH")]
db: Option<PathBuf>,
#[command(flatten)]
semgrep_args: crate::semgrep::SemgrepArgs,
},
Export {
path: PathBuf,
#[arg(short, long)]
output: String,
},
}
#[derive(Subcommand)]
pub enum CorpusCommands {
Ingest {
paths: Vec<PathBuf>,
#[arg(short, long)]
output: String,
#[arg(long)]
tag: Option<String>,
#[arg(long)]
no_skip_existing: bool,
},
Scan {
paths: Vec<PathBuf>,
#[arg(long, default_value = "info")]
min_severity: String,
},
}
pub fn progress_enabled() -> bool {
use std::sync::OnceLock;
static ENABLED: OnceLock<bool> = OnceLock::new();
*ENABLED.get_or_init(|| match std::env::var("DROIDSAW_PROGRESS") {
Ok(v) => {
let trimmed = v.trim();
!trimmed.is_empty()
&& !trimmed.eq_ignore_ascii_case("0")
&& !trimmed.eq_ignore_ascii_case("false")
&& !trimmed.eq_ignore_ascii_case("no")
&& !trimmed.eq_ignore_ascii_case("off")
}
Err(_) => false,
})
}
macro_rules! progress {
($($arg:tt)*) => {{
if $crate::commands::progress_enabled() {
eprintln!("droidsaw: {}", format_args!($($arg)*));
}
}};
}
pub(super) use progress;
pub fn print_json<T: Serialize>(value: &T) -> anyhow::Result<()> {
println!("{}", serde_json::to_string_pretty(value)?);
Ok(())
}
pub(super) fn meta(count: usize, truncated: bool, hint: &str, related: &[&str]) -> Value {
json!({
"count": count,
"truncated": truncated,
"hint": hint,
"related": related,
"tool_version": env!("CARGO_PKG_VERSION"),
})
}
#[derive(Serialize)]
struct InfoJson {
path: String,
package: Option<String>,
version_name: Option<String>,
layers: Layers,
loaded_splits: Vec<String>,
}
#[derive(Serialize)]
struct Layers {
has_manifest: bool,
hbc_present: bool,
hbc_parse_error: Option<String>,
dex_count: usize,
native_lib_abis: Vec<String>,
hbc_version: Option<u32>,
hbc_function_count: Option<u32>,
hbc_string_count: Option<u32>,
dex_versions: Vec<String>,
}
pub fn info(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let (package, version_name) = manifest_id(ctx);
let native_lib_abis: Vec<String> = ctx
.apk
.as_ref()
.map(|a| a.native_libs.keys().cloned().collect())
.unwrap_or_default();
let (hbc_version, hbc_function_count, hbc_string_count) = ctx
.hbc
.as_ref()
.map(|h| h.hbc())
.map(|h| (Some(h.version), Some(h.function_count), Some(h.string_count)))
.unwrap_or((None, None, None));
let dex_versions: Vec<String> = ctx
.dex
.iter()
.map(|d| d.header.version().to_string())
.collect();
let info = InfoJson {
path: ctx.path.clone(),
package,
version_name,
layers: Layers {
has_manifest: ctx.apk.as_ref().is_some_and(|a| a.manifest_raw.is_some()),
hbc_present: ctx.hbc.is_some(),
hbc_parse_error: ctx.hbc_parse_error.as_ref().map(|f| f.message()),
dex_count: ctx.dex.len(),
native_lib_abis,
hbc_version,
hbc_function_count,
hbc_string_count,
dex_versions,
},
loaded_splits: ctx.loaded_split_names.clone(),
};
let out = json!({
"info": info,
"_meta": meta(
1,
false,
"use `droidsaw manifest` for permissions/components, `droidsaw apk-info` for a full one-shot report",
&["manifest", "signing", "apk-info", "audit"],
),
});
Ok(out)
}
pub fn entries(
ctx: &CrossLayerContext,
search: Option<&str>,
limit: Option<usize>,
) -> anyhow::Result<Value> {
let filter = match search {
Some(pat) => Some(regex::Regex::new(pat)?),
None => None,
};
let apk = ctx.require_apk()?;
let all: Vec<&String> = apk
.entries
.iter()
.filter(|e| filter.as_ref().is_none_or(|r| r.is_match(e)))
.collect();
let total = all.len();
let (truncated, entries): (bool, Vec<&String>) = match limit {
Some(n) if all.len() > n => (true, all.into_iter().take(n).collect()),
_ => (false, all),
};
let count = entries.len();
let anomalies: Vec<Value> = apk
.zip_anomalies
.iter()
.map(|a| {
json!({
"kind": format!("{:?}", a.kind),
"detail": a.detail,
})
})
.collect();
let out = json!({
"entries": entries,
"entry_total": total,
"zip_anomalies": anomalies,
"_meta": meta(
count,
truncated,
"use --search regex to target entries (e.g. 'META-INF|assets/'); anomalies detect repackaging, master-key duplicates, and out-of-order writes",
&["strings", "apk_info", "audit"],
),
});
Ok(out)
}
pub fn elf(ctx: &CrossLayerContext, search: Option<&str>) -> anyhow::Result<Value> {
let filter = match search {
Some(pat) => Some(regex::Regex::new(pat)?),
None => None,
};
let mut libs: Vec<Value> = Vec::new();
for (path, info) in &ctx.require_apk()?.elf_info {
if filter.as_ref().is_some_and(|r| !r.is_match(path)) {
continue;
}
let mut jni: Vec<&String> = info.jni_exports.iter().collect();
jni.sort();
let mut imports: Vec<&String> = info.imported_symbols.iter().collect();
imports.sort();
libs.push(json!({
"path": path,
"soname": info.soname,
"sections_stripped": info.sections_stripped,
"section_count": info.section_count,
"has_text_section": info.has_text_section,
"has_rwx_segment": info.has_rwx_segment,
"is_pie": info.is_pie,
"nx_stack": info.nx_stack,
"has_relro": info.has_relro,
"has_bind_now": info.has_bind_now,
"has_stack_canary": info.has_stack_canary,
"has_fortify_source": info.has_fortify_source,
"has_symtab": info.has_symtab,
"has_debug_sections": info.has_debug_sections,
"debug_section_names": info.debug_section_names,
"needed_libraries": info.needed_libraries,
"runpath": info.runpath,
"android_ndk_version": info.android_ndk_version,
"android_target_sdk": info.android_target_sdk,
"init_array_count": info.init_array_count,
"init_array_addrs": info.init_array_addrs,
"jni_exports": jni,
"imported_symbols": imports,
}));
}
let count = libs.len();
let out = json!({
"libs": libs,
"_meta": meta(
count,
false,
"filter with --search (e.g. 'arm64' or 'libcrypto'); has_debug_sections and sections_stripped=false are the biggest RE leaks",
&["audit", "entries", "strings"],
),
});
Ok(out)
}
pub fn webview_assets(
ctx: &CrossLayerContext,
search: Option<&str>,
extract: Option<&str>,
) -> anyhow::Result<Value> {
let apk = ctx.require_apk()?;
if let Some(target) = extract {
let bytes = apk.webview_assets.get(target).ok_or_else(|| {
anyhow::anyhow!(
"no webview asset at path {target:?} — call without `extract` first to list"
)
})?;
let out = json!({
"asset": {
"path": target,
"size": bytes.len(),
"content_utf8": String::from_utf8_lossy(bytes),
},
"_meta": meta(
1,
false,
"content_utf8 is lossy — for binary payloads prefer a path on disk",
&["webview_assets", "strings"],
),
});
return Ok(out);
}
let filter = match search {
Some(pat) => Some(regex::Regex::new(pat)?),
None => None,
};
let mut assets: Vec<Value> = Vec::new();
for (path, bytes) in &apk.webview_assets {
if filter.as_ref().is_some_and(|r| !r.is_match(path)) {
continue;
}
assets.push(json!({
"path": path,
"size": bytes.len(),
}));
}
let count = assets.len();
let out = json!({
"assets": assets,
"_meta": meta(
count,
false,
"empty for non-hybrid apps; call again with `extract=<path>` to read one asset's content",
&["entries", "strings", "audit"],
),
});
Ok(out)
}
pub fn resources(
ctx: &CrossLayerContext,
search: Option<&str>,
limit: Option<usize>,
) -> anyhow::Result<Value> {
use droidsaw_apk::resources::ResourceValue;
let apk = ctx.require_apk()?;
let table = match apk.resources.as_ref() {
Some(t) => t,
None => {
return Ok(json!({
"entries": [],
"_meta": meta(
0,
false,
"no resources.arsc parsed — not an APK or arsc was malformed",
&["entries", "strings", "audit"],
),
}));
}
};
let filter = match search {
Some(pat) => Some(regex::Regex::new(pat)?),
None => None,
};
let mut rows: Vec<Value> = Vec::new();
let mut total = 0usize;
for (id, entry) in table.iter() {
let Some(val) = entry.best_value() else {
continue;
};
let value_str = match val {
ResourceValue::String(s) => s.as_str(),
_ => continue,
};
total = total.saturating_add(1);
if let Some(ref r) = filter
&& !r.is_match(&entry.key)
&& !r.is_match(value_str)
{
continue;
}
rows.push(json!({
"id": format!("0x{id:08x}"),
"key": entry.key,
"value": value_str,
}));
}
let (truncated, rows) = match limit {
Some(n) if rows.len() > n => (true, rows.into_iter().take(n).collect::<Vec<_>>()),
_ => (false, rows),
};
let count = rows.len();
let out = json!({
"entries": rows,
"entry_total": total,
"_meta": meta(
count,
truncated,
"only string-typed resources are returned; use a regex like '(api|secret|token|key)' to find credential leaks in values.xml",
&["strings", "audit", "entries"],
),
});
Ok(out)
}
#[derive(Serialize)]
struct ExportedComponentJson {
name: String,
kind: String,
permission: Option<String>,
intent_filters: Vec<IntentFilterJson>,
exported_reason: String,
}
#[derive(Serialize)]
struct IntentFilterJson {
actions: Vec<String>,
categories: Vec<String>,
data_schemes: Vec<String>,
data_hosts: Vec<String>,
data_paths: Vec<DataPathJson>,
}
#[derive(Serialize)]
struct DataPathJson {
kind: String,
value: String,
}
#[derive(Serialize)]
struct ManifestJson {
package: String,
version_name: String,
version_code: String,
min_sdk: String,
target_sdk: String,
debuggable: bool,
uses_cleartext_traffic: Option<bool>,
allow_backup: Option<bool>,
permissions: Vec<String>,
component_count: usize,
exported_count: usize,
exported_components: Vec<ExportedComponentJson>,
finding_count: usize,
findings: Vec<Finding>,
}
pub fn manifest(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let mut cfg = ParseConfig::lenient();
cfg.permissive_recovery = ctx.permissive_recovery;
manifest_with_config(ctx, &cfg)
}
pub fn manifest_with_config(
ctx: &CrossLayerContext,
cfg: &ParseConfig,
) -> anyhow::Result<Value> {
let raw = ctx
.require_apk()?
.manifest_raw
.as_ref()
.ok_or_else(|| anyhow::anyhow!("no AndroidManifest.xml found in APK"))?;
let (manifest, unknown_chunks, _findings) = Manifest::from_binary_xml_with_config(raw, cfg)?;
let findings = manifest.security_findings();
let exported_components_list = manifest.exported_components();
let exported_count = exported_components_list.len();
let exported_components: Vec<ExportedComponentJson> = exported_components_list
.iter()
.map(|c| {
let kind = c.kind.to_string();
let exported_reason = if c.exported == Some(true) {
"explicit".to_string()
} else {
"implicit".to_string()
};
let intent_filters: Vec<IntentFilterJson> = c
.intent_filters
.iter()
.map(|f| IntentFilterJson {
actions: f.actions.clone(),
categories: f.categories.clone(),
data_schemes: f.data_schemes.clone(),
data_hosts: f.data_hosts.clone(),
data_paths: f
.data_paths
.iter()
.map(|(k, v)| DataPathJson {
kind: k.clone(),
value: v.clone(),
})
.collect(),
})
.collect();
ExportedComponentJson {
name: c.name.clone(),
kind,
permission: c.permission.clone(),
intent_filters,
exported_reason,
}
})
.collect();
let m = ManifestJson {
package: manifest.package.clone(),
version_name: manifest.version_name.clone(),
version_code: manifest.version_code,
min_sdk: manifest.min_sdk,
target_sdk: manifest.target_sdk,
debuggable: manifest.debuggable,
uses_cleartext_traffic: manifest.uses_cleartext_traffic,
allow_backup: manifest.allow_backup,
permissions: manifest.permissions.clone(),
component_count: manifest.components.len(),
exported_count,
exported_components,
finding_count: findings.len(),
findings,
};
let warnings = unknown_chunks_to_meta(&unknown_chunks);
let mut meta_obj = meta(
1,
false,
"use `droidsaw signing` for certs, `droidsaw audit` for full cross-layer findings",
&["signing", "audit", "apk-info"],
);
if let Some(map) = meta_obj.as_object_mut()
&& !warnings.is_empty()
{
map.insert("warnings".to_string(), Value::Array(warnings));
map.insert(
"lenient_recovery".to_string(),
Value::Bool(cfg.lenient_unknown_chunks),
);
}
let out = json!({
"manifest": m,
"_meta": meta_obj,
});
Ok(out)
}
fn unknown_chunks_to_meta(chunks: &[UnknownChunk]) -> Vec<Value> {
chunks
.iter()
.map(|uc| {
let mut obj = serde_json::Map::new();
obj.insert(
"chunk_type".to_string(),
Value::String(format!("0x{:04x}", uc.chunk_type)),
);
obj.insert(
"offset".to_string(),
Value::Number(serde_json::Number::from(uc.offset)),
);
if uc.chunk_type == 0x0104 {
obj.insert(
"note".to_string(),
Value::String(
"commercial-obfuscator marker (DexGuard-class); \
see audit `COMMERCIAL_OBFUSCATOR_DETECTED` finding"
.to_string(),
),
);
}
Value::Object(obj)
})
.collect()
}
#[derive(Serialize)]
struct SigningJson {
v1_present: bool,
v2_present: bool,
v3_present: bool,
v4_present: bool,
v1_subject: Option<String>,
v1_algorithm: Option<String>,
v1_key_size_bits: Option<u32>,
v1_sha256_fingerprint: Option<String>,
v1_not_before: Option<String>,
v1_not_after: Option<String>,
finding_count: usize,
findings: Vec<Finding>,
signers: Vec<SignerJson>,
}
#[derive(Serialize)]
struct SignerJson {
scheme: &'static str,
cert_sha256: Option<String>,
public_key_algorithm: String,
public_key_size_bits: u32,
public_key_rsa_exponent: Option<String>,
public_key_ec_curve: Option<String>,
public_key_ec_point: Option<String>,
signature_results: Vec<SignatureResult>,
signed_data_outcome: SignatureOutcome,
}
#[derive(Serialize)]
struct SignatureResult {
algorithm: u32,
algorithm_name: String,
outcome: SignatureOutcome,
}
#[derive(Serialize)]
#[serde(tag = "status", rename_all = "snake_case")]
enum SignatureOutcome {
Valid,
Invalid { reason: String },
Unsupported { scheme: String },
}
impl From<&droidsaw_apk::signing::SignatureValidity> for SignatureOutcome {
fn from(v: &droidsaw_apk::signing::SignatureValidity) -> Self {
use droidsaw_apk::signing::SignatureValidity;
match v {
SignatureValidity::Valid => SignatureOutcome::Valid,
SignatureValidity::Invalid { reason } => {
SignatureOutcome::Invalid { reason: reason.to_string() }
}
SignatureValidity::Unsupported { scheme } => {
SignatureOutcome::Unsupported { scheme: scheme.clone() }
}
}
}
}
pub(super) fn to_hex(bytes: &[u8]) -> String {
let mut s = String::with_capacity(bytes.len().saturating_mul(2));
for b in bytes {
use std::fmt::Write;
#[allow(
clippy::let_underscore_must_use,
reason = "write! to String is infallible; the unit Result must be discarded"
)]
let _ = write!(s, "{b:02x}");
}
s
}
pub fn resolve_signer_summary(apk: &droidsaw_apk::Apk) -> (Vec<Value>, &'static str) {
match apk.signing_info() {
Ok(sig) => (build_signer_summary(&sig), "ok"),
Err(_) => (Vec::new(), "parse_failed"),
}
}
pub fn build_signer_summary(sig: &droidsaw_apk::signing::SigningInfo) -> Vec<Value> {
use droidsaw_apk::signing::{extract_subject_cn, extract_subject_organization, SigningScheme};
let mut out: Vec<Value> = Vec::new();
if let Some(cert) = sig.v1_cert.as_ref() {
out.push(json!({
"signing_scheme": "v1",
"subject_cn": extract_subject_cn(&cert.subject),
"subject_o": extract_subject_organization(&cert.subject),
"sha256_fingerprint": cert.sha256_fingerprint,
"not_before": cert.not_before,
"not_after": cert.not_after,
}));
}
for signer in &sig.signers {
let scheme = match signer.scheme {
SigningScheme::V2 => "v2",
SigningScheme::V3 => "v3",
};
out.push(json!({
"signing_scheme": scheme,
"subject_cn": Value::Null,
"subject_o": Value::Null,
"sha256_fingerprint": signer.cert_sha256,
"not_before": Value::Null,
"not_after": Value::Null,
}));
}
out
}
fn build_signer_json(signer: &droidsaw_apk::signing::SignerInfo) -> SignerJson {
use droidsaw_apk::signing::SigningScheme;
let scheme = match signer.scheme {
SigningScheme::V2 => "v2",
SigningScheme::V3 => "v3",
};
let is_rsa = signer.public_key_algorithm == "RSA";
let is_ec = signer.public_key_algorithm == "EC";
let signature_results: Vec<SignatureResult> = signer
.signatures
.iter()
.zip(signer.signature_algorithms.iter())
.zip(signer.signature_results.iter())
.map(|(((algo_id, _), name), verified)| SignatureResult {
algorithm: *algo_id,
algorithm_name: name.clone(),
outcome: verified.into(),
})
.collect();
SignerJson {
scheme,
cert_sha256: signer.cert_sha256.clone(),
public_key_algorithm: signer.public_key_algorithm.clone(),
public_key_size_bits: signer.public_key_size_bits,
public_key_rsa_exponent: is_rsa.then(|| to_hex(&signer.public_key_rsa_exponent)),
public_key_ec_curve: is_ec.then(|| signer.public_key_ec_curve.clone()),
public_key_ec_point: is_ec.then(|| to_hex(&signer.public_key_ec_point)),
signed_data_outcome: (&signer.signature_verified).into(),
signature_results,
}
}
fn signing_findings_context_from(
ctx: &CrossLayerContext,
) -> droidsaw_apk::signing::SigningFindingsContext {
let loaded_split_count =
u32::try_from(ctx.loaded_split_names.len()).unwrap_or(u32::MAX);
droidsaw_apk::signing::SigningFindingsContext { loaded_split_count }
}
pub fn signing(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let sig = ctx.require_apk()?.signing_info()?;
let findings = sig.security_findings_at_with_context(
std::time::SystemTime::now(),
signing_findings_context_from(ctx),
);
let v1_subject = sig.v1_cert.as_ref().map(|c| c.subject.clone());
let v1_algorithm = sig.v1_cert.as_ref().map(|c| c.key_algorithm.clone());
let v1_key_size_bits = sig.v1_cert.as_ref().map(|c| c.key_size_bits);
let v1_sha256_fingerprint = sig.v1_cert.as_ref().map(|c| c.sha256_fingerprint.clone());
let v1_not_before = sig.v1_cert.as_ref().map(|c| c.not_before.clone());
let v1_not_after = sig.v1_cert.as_ref().map(|c| c.not_after.clone());
let finding_count = findings.len();
let signers: Vec<SignerJson> = sig.signers.iter().map(build_signer_json).collect();
let s = SigningJson {
v1_present: sig.v1_cert.is_some(),
v2_present: sig.v2_present,
v3_present: sig.v3_present,
v4_present: sig.v4_present,
v1_subject,
v1_algorithm,
v1_key_size_bits,
v1_sha256_fingerprint,
v1_not_before,
v1_not_after,
finding_count,
findings,
signers,
};
let out = json!({
"signing": s,
"_meta": meta(
1,
false,
"cert details plus v2/v3/v4 presence; pair with `droidsaw manifest` for permission context",
&["manifest", "audit", "apk-info"],
),
});
Ok(out)
}
pub fn apk_info(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let apk = ctx.require_apk()?;
let findings = collect_apk_findings(ctx, DEFAULT_ENTROPY_THRESHOLD_BITS);
let (package, version_name) = manifest_id(ctx);
let (signer_summary, signer_summary_status) = resolve_signer_summary(apk);
let apk_info = build_apk_info_response(
ApkInfoContext {
path: &apk.path,
package,
version_name,
hbc_present: ctx.hbc.is_some(),
dex_count: ctx.dex.len(),
native_lib_abis: apk.native_libs.keys().cloned().collect(),
dex_entries: &apk.dex,
signer_summary,
signer_summary_status,
},
&findings,
);
let out = json!({
"apk_info": apk_info,
"_meta": meta(
1,
false,
"summary report with severity tally + top critical/high findings; full findings + queryable DB via `audit` + `query`",
&["info", "manifest", "signing", "audit"],
),
});
Ok(out)
}
pub const APK_INFO_TOP_FINDINGS_CAP: usize = 5;
pub struct ApkInfoContext<'a> {
pub path: &'a str,
pub package: Option<String>,
pub version_name: Option<String>,
pub hbc_present: bool,
pub dex_count: usize,
pub native_lib_abis: Vec<String>,
pub dex_entries: &'a [droidsaw_apk::DexEntry],
pub signer_summary: Vec<Value>,
pub signer_summary_status: &'static str,
}
pub fn build_apk_info_response(ctx: ApkInfoContext<'_>, findings: &[Finding]) -> Value {
let ApkInfoContext {
path,
package,
version_name,
hbc_present,
dex_count,
native_lib_abis,
dex_entries,
signer_summary,
signer_summary_status,
} = ctx;
use droidsaw_common::Severity;
use sha2::{Digest, Sha256};
let layer_map: Vec<Value> = dex_entries
.iter()
.enumerate()
.map(|(i, entry)| {
let layer_id = format!("dex{}", i.saturating_add(1));
let size_bytes = entry.data.len();
let sha256 = {
let digest = Sha256::digest(&entry.data);
let mut out = String::with_capacity(64);
for byte in digest.iter() {
let hi = byte >> 4;
let lo = byte & 0x0f;
out.push(hex_nibble(hi));
out.push(hex_nibble(lo));
}
out
};
json!({
"layer_id": layer_id,
"entry": entry.name,
"size_bytes": size_bytes,
"sha256": sha256,
})
})
.collect();
let mut severity_summary = std::collections::BTreeMap::<String, usize>::new();
for f in findings {
let c = severity_summary.entry(format!("{:?}", f.severity)).or_insert(0);
*c = c.saturating_add(1);
}
let top_findings: Vec<Value> = findings
.iter()
.filter(|f| matches!(f.severity, Severity::Critical | Severity::High))
.take(APK_INFO_TOP_FINDINGS_CAP)
.map(|f| json!({
"severity": format!("{:?}", f.severity),
"id": f.id,
"detail": f.detail,
"cwe": f.cwe,
}))
.collect();
json!({
"path": path,
"package": package,
"version_name": version_name,
"layers": {
"hbc_present": hbc_present,
"dex_count": dex_count,
"native_lib_abis": native_lib_abis,
},
"layer_map": layer_map,
"finding_count": findings.len(),
"severity_summary": severity_summary,
"top_findings": top_findings,
"signer_summary": signer_summary,
"signer_summary_status": signer_summary_status,
})
}
#[derive(Serialize)]
struct HbcInfo {
version: u32,
function_count: u32,
string_count: u32,
}
#[derive(Serialize)]
struct HbcFunctionEntry {
id: u32,
name: String,
param_count: u32,
offset: u32,
size: u32,
}
pub fn hbc_info(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let hbc_owned = ctx.require_hbc()?;
let hbc = hbc_owned.hbc();
Ok(json!({
"hbc": HbcInfo {
version: hbc.version,
function_count: hbc.function_count,
string_count: hbc.string_count,
},
"_meta": meta(
1,
false,
"use `droidsaw hbc functions` to list functions, `droidsaw hbc decompile` to get source",
&["hbc functions", "hbc decompile", "hbc strings"],
),
}))
}
pub fn hbc_functions(
ctx: &CrossLayerContext,
search: Option<&str>,
) -> anyhow::Result<Value> {
let hbc_owned = ctx.require_hbc()?;
let hbc = hbc_owned.hbc();
let re = search.map(regex::Regex::new).transpose()?;
let mut functions: Vec<HbcFunctionEntry> = Vec::new();
for fid in 0..hbc.function_count {
let f = hbc.function_get(fid);
let name = if f.name_id < hbc.string_count {
hbc.string_as_str_or_empty(f.name_id).into_owned()
} else {
String::new()
};
if let Some(ref re) = re
&& !re.is_match(&name)
{
continue;
}
functions.push(HbcFunctionEntry {
id: fid,
name,
param_count: f.param_count,
offset: f.offset,
size: f.size,
});
}
let count = functions.len();
Ok(json!({
"functions": functions,
"_meta": meta(
count,
false,
"use `droidsaw hbc decompile --func-id <id>` to see source",
&["hbc decompile", "hbc strings", "xrefs"],
),
}))
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`cjs_module_count as usize` is u32→usize widening; always safe.")]
pub fn module_list(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let hbc_owned = ctx.require_hbc()?;
let hbc = hbc_owned.hbc();
let mut modules = Vec::with_capacity(hbc.cjs_module_count as usize);
for i in 0..hbc.cjs_module_count {
let Some(m) = hbc.cjs_module_get(i) else {
continue;
};
let path = if m.symbol_id < hbc.string_count {
hbc.string_as_str_or_empty(m.symbol_id).into_owned()
} else {
format!("<{}>", m.symbol_id)
};
modules.push(json!({
"index": i,
"path": path,
"function_id": m.func_offset,
}));
}
let count = modules.len();
Ok(json!({
"modules": modules,
"_meta": meta(
count,
false,
"empty for Metro static-resolution bundles; use `strings --search node_modules` to find dependencies the other way",
&["hbc functions", "npm-packages", "strings"],
),
}))
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "DEX struct-field indices (m_idx.0, class_idx.0) widen u32→usize for bounds-checked `.get()`; `dex_idx + 1` is usize+1 bounded by Vec::len.")]
pub fn native_modules(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let hbc_owned = ctx.require_hbc()?;
let hbc = hbc_owned.hbc();
let hbc_data = hbc_owned.bytes();
let scan = droidsaw_hermes::scanner::scan_parsed_with_mode(
hbc,
hbc_data,
&droidsaw_hermes::scanner::ScanMode {
xrefs: true,
callgraph: false,
},
);
let mut nm_ids: Vec<u32> = Vec::new();
for i in 0..hbc.string_count {
if hbc.string_as_str_or_empty(i) == "NativeModules" {
nm_ids.push(i);
}
}
use std::collections::BTreeMap;
let mut results: BTreeMap<u32, (String, Vec<String>)> = BTreeMap::new();
for &nm_id in &nm_ids {
let Some(func_ids) = scan.string_refs.get(&nm_id) else {
continue;
};
for &fid in func_ids {
if fid >= hbc.function_count {
continue;
}
let f = hbc.function_get(fid);
let fname = if f.name_id < hbc.string_count {
hbc.string_as_str_or_empty(f.name_id).into_owned()
} else {
String::new()
};
let entry = results.entry(fid).or_insert_with(|| (fname.clone(), Vec::new()));
for (&sid, refs) in &scan.string_refs {
if sid >= hbc.string_count || !refs.contains(&fid) {
continue;
}
let Ok(Some(sd)) = hbc.string_get(sid) else {
continue;
};
if sd.kind == 1 {
let name = hbc.string_as_str_or_empty(sid).into_owned();
if name != "NativeModules" && !name.is_empty() {
entry.1.push(name);
}
}
}
}
}
let bridge = analysis::bridge::BridgeResolver::resolve(ctx);
let bridges: Vec<Value> = results
.into_iter()
.map(|(fid, (fname, mut modules))| {
modules.sort();
modules.dedup();
let java_targets: Vec<Value> = modules.iter().flat_map(|module_name| {
let targets = bridge.by_method.get(module_name.as_str());
targets.into_iter().flat_map(move |ts| {
ts.iter().filter_map(move |(dex_idx, m_idx)| {
let dex = ctx.dex.get(*dex_idx)?;
let m_id_item = dex.methods.get(m_idx.0 as usize)?;
let class = dex.type_descriptors
.get(m_id_item.class_idx.0 as usize)
.cloned()
.unwrap_or_default();
let method = dex.get_string(m_id_item.name_idx)
.unwrap_or_default()
.to_string();
Some(json!({
"js_module": module_name,
"java_class": class,
"java_method": method,
"dex": format!("dex{}", dex_idx + 1),
}))
})
})
}).collect();
json!({
"function_id": fid,
"function_name": fname,
"modules": modules,
"java_targets": java_targets,
})
})
.collect();
let count = bridges.len();
Ok(json!({
"bridges": bridges,
"_meta": meta(
count,
false,
"java_targets maps each JS module name to its @ReactMethod Java class+method. \
Empty if the bundle doesn't use NativeModules or no @ReactMethod annotations found.",
&["xrefs", "dex_classes", "dex_decompile", "hbc_functions"],
),
}))
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "HBC bytecode region validated in u64 — `end = f.offset as u64 + f.size as u64` + `if end > hbc_data.len() as u64 { bail }` guard — before narrowing `as usize`; u32→u64/i64 widening cannot overflow.")]
pub fn disasm(ctx: &CrossLayerContext, func_id: u32) -> anyhow::Result<Value> {
let hbc_owned = ctx.require_hbc()?;
let hbc = hbc_owned.hbc();
let hbc_data = hbc_owned.bytes();
if func_id >= hbc.function_count {
anyhow::bail!(
"function index {func_id} out of range (valid: 0..{}); use `hbc_functions` to list available ids",
hbc.function_count.saturating_sub(1)
);
}
let f = hbc.function_get(func_id);
let fname = if f.name_id < hbc.string_count {
hbc.string_as_str_or_empty(f.name_id).into_owned()
} else {
String::new()
};
let end = u64::from(f.offset) + u64::from(f.size);
#[allow(
clippy::cast_possible_truncation,
reason = "PROOF: `.get()` returns None on out-of-range, propagated as the typed error below. On 32-bit targets `end as usize` may truncate, but the truncated slice is still bounds-checked by `.get()` — a truncated end past hbc_data.len() yields None, never UB."
)]
let code = hbc_data
.get(f.offset as usize..end as usize)
.ok_or_else(|| {
anyhow::anyhow!("function bytecode region extends past buffer end — corrupt bundle?")
})?;
let instructions =
droidsaw_hermes::decompile::decode::decode_function(code, hbc.opcode_version())
.map_err(|e| anyhow::anyhow!("failed to decode Hermes function {func_id}: {e}"))?;
use droidsaw_hermes::decompile::decode::Operand;
let insts: Vec<Value> = instructions
.iter()
.map(|inst| {
let operands: Vec<String> = inst
.operands
.iter()
.map(|op| match op {
Operand::Reg(r) => format!("r{r}"),
Operand::Reg32(r) => format!("r{r}"),
Operand::UInt(v) => {
if *v < hbc.string_count {
format!("\"{}\"", hbc.string_as_str_or_empty(*v))
} else {
format!("{v}")
}
}
Operand::Int(v) => format!("{v}"),
Operand::Double(v) => format!("{v}"),
Operand::Addr(rel) => format!("→0x{:04x}", i64::from(inst.offset) + i64::from(*rel)),
})
.collect();
json!({
"offset": inst.offset,
"opcode": inst.name,
"operands": operands,
})
})
.collect();
let count = insts.len();
Ok(json!({
"function_id": func_id,
"name": fname,
"param_count": f.param_count,
"size": f.size,
"instructions": insts,
"_meta": meta(
count,
false,
"raw instruction stream — use `decompile --func-id <id>` for higher-level view, `hbc_functions --search <re>` to find targets",
&["decompile", "hbc_functions", "xrefs"],
),
}))
}
pub fn npm_packages(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let hbc_owned = ctx.require_hbc()?;
let hbc = hbc_owned.hbc();
let re = regex::Regex::new(r"(?:^|/)node_modules/(@[^/]+/[^/]+|[^/]+)")?;
use std::collections::BTreeMap;
let mut packages: BTreeMap<String, usize> = BTreeMap::new();
for i in 0..hbc.string_count {
let value = hbc.string_as_str_or_empty(i);
if let Some(caps) = re.captures(&value)
&& let Some(name) = caps.get(1)
{
let refcount = packages.entry(name.as_str().to_string()).or_insert(0);
*refcount = refcount.saturating_add(1);
}
}
let mut rows: Vec<Value> = packages
.into_iter()
.map(|(name, count)| json!({ "name": name, "ref_count": count }))
.collect();
rows.sort_by_key(|v| {
v.get("name")
.and_then(|n| n.as_str())
.map(|s| s.to_lowercase())
.unwrap_or_default()
});
let count = rows.len();
Ok(json!({
"packages": rows,
"_meta": meta(
count,
false,
"Hermes-only SBOM extracted from the string pool; for the full APK-level SBOM use `droidsaw sbom`",
&["sbom", "strings", "hbc functions"],
),
}))
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`fi`/`ci` widen u32→usize for `func_names` index, each bounded by the `ci < hbc.function_count` filter + func_names.len() == function_count.")]
pub fn call_graph(
ctx: &CrossLayerContext,
search: Option<&str>,
limit: Option<usize>,
) -> anyhow::Result<Value> {
let hbc_owned = ctx.require_hbc()?;
let hbc = hbc_owned.hbc();
let hbc_data = hbc_owned.bytes();
let scan = droidsaw_hermes::scanner::scan_parsed_with_mode(
hbc,
hbc_data,
&droidsaw_hermes::scanner::ScanMode {
xrefs: false,
callgraph: true,
},
);
let re = search.map(regex::Regex::new).transpose()?;
let cap = limit.unwrap_or(50);
let func_names: Vec<String> = (0..hbc.function_count)
.map(|i| {
let f = hbc.function_get(i);
if f.name_id < hbc.string_count {
hbc.string_as_str_or_empty(f.name_id).into_owned()
} else {
String::new()
}
})
.collect();
let mut edges: Vec<Value> = Vec::new();
let mut truncated = false;
for fi in 0..hbc.function_count {
let Some(callees) = scan.call_graph.get(&fi) else {
continue;
};
if callees.is_empty() {
continue;
}
let Some(caller_name) = func_names.get(fi as usize) else {
continue;
};
if let Some(ref re) = re
&& !re.is_match(caller_name)
{
continue;
}
let callees_json: Vec<Value> = callees
.iter()
.filter(|&&ci| ci < hbc.function_count)
.filter_map(|&ci| {
func_names.get(ci as usize).map(|name| {
json!({
"id": ci,
"name": name,
})
})
})
.collect();
edges.push(json!({
"caller": {
"id": fi,
"name": caller_name,
},
"callees": callees_json,
}));
if edges.len() >= cap {
truncated = true;
break;
}
}
let count = edges.len();
Ok(json!({
"edges": edges,
"_meta": meta(
count,
truncated,
"Hermes call graph via scanner::scan_parsed_with_mode(callgraph=true); anonymous callers have empty `name`",
&["hbc_functions", "decompile", "xrefs"],
),
}))
}
pub const OUTLINE_LINES_PER_METHOD: usize = 20;
#[derive(Debug, Clone, Copy, Default)]
pub enum DecompileMode {
#[default]
Full,
Outline,
}
pub fn dex_decompile(
ctx: &CrossLayerContext,
class_index: Option<usize>,
search: Option<&str>,
) -> anyhow::Result<Value> {
dex_decompile_filtered(ctx, class_index, search, DecompileMode::Full, None)
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` (dex-layer 1-based label) is usize+1 bounded by ctx.dex.len() ≤ isize::MAX; `class_idx.0 as usize` widens u32→usize for `.get()` lookup.")]
pub fn dex_decompile_filtered(
ctx: &CrossLayerContext,
class_index: Option<usize>,
search: Option<&str>,
mode: DecompileMode,
methods_filter: Option<&[String]>,
) -> anyhow::Result<Value> {
if class_index.is_none() && search.is_none() {
anyhow::bail!(
"specify `class_index` (from `dex classes`) or `search` (regex on class descriptor); class descriptors use JVM format: Lcom/example/Foo;"
);
}
let normalized_search: Option<String> = search.map(normalize_dex_class_search);
let re = normalized_search.as_deref().map(regex::Regex::new).transpose()?;
let mut decompiled: Vec<Value> = Vec::new();
let mut close_matches: Vec<String> = Vec::new();
let mut global_idx: usize = 0;
for (i, dex) in ctx.dex.iter().enumerate() {
let layer = format!("dex{}", i + 1);
let Some(dex_data) = ctx.dex_bytes(i) else {
continue;
};
let census = droidsaw_dex::r8_inversion::build_trampoline_census(dex);
for (class_defs_idx, class_def) in dex.class_defs.iter().enumerate() {
if dex.class_def_is_shadowed(class_defs_idx) {
continue;
}
let descriptor = dex
.type_descriptors
.get(class_def.class_idx.0 as usize)
.cloned()
.unwrap_or_default();
let matched = match (class_index, re.as_ref()) {
(Some(target), _) => global_idx == target,
(None, Some(re)) => re.is_match(&descriptor),
_ => false,
};
if matched {
let raw_source =
droidsaw_dex::classes::decompile_class_with_census(dex, dex_data, class_def, &census);
let after_methods = match methods_filter {
Some(list) => apply_methods_filter(&raw_source, list),
None => raw_source,
};
let source = match mode {
DecompileMode::Full => after_methods,
DecompileMode::Outline => {
apply_outline_filter(&after_methods, OUTLINE_LINES_PER_METHOD)
}
};
decompiled.push(json!({
"layer": layer,
"class_index": global_idx,
"descriptor": descriptor,
"source": source,
}));
} else if let Some(raw) = search {
if close_matches.len() < 5
&& descriptor.to_lowercase().contains(&raw.to_lowercase())
{
close_matches.push(descriptor.clone());
}
}
global_idx = global_idx.saturating_add(1);
}
}
if decompiled.is_empty() {
let hint = if close_matches.is_empty() {
format!(
"no matching class for {search:?}; \
use JVM descriptor format (Lcom/example/Foo;) or a bare class name. \
Call `dex_classes` with a broad search to browse available descriptors."
)
} else {
format!(
"no matching class for {search:?}; \
use JVM descriptor format (Lcom/example/Foo;) or a bare class name. \
Close matches:\n{}",
close_matches
.iter()
.map(|s| format!(" {s}"))
.collect::<Vec<_>>()
.join("\n")
)
};
anyhow::bail!("{hint}");
}
let count = decompiled.len();
Ok(json!({
"classes": decompiled,
"_meta": meta(
count,
false,
"search decompiles every match — narrow with a tighter regex to avoid large output",
&["dex classes", "dex strings", "decompile"],
),
}))
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` label is usize+1 bounded by ctx.dex.len() ≤ isize::MAX; `class_idx.0 as usize` widens u32→usize for bounds-checked `.get()`. method_count arithmetic: saturating_add + saturating_mul throughout; no wrapping or overflow possible on any platform.")]
pub fn dex_decompile_dry_run(
ctx: &CrossLayerContext,
class_index: Option<usize>,
search: Option<&str>,
) -> anyhow::Result<Value> {
if class_index.is_none() && search.is_none() {
anyhow::bail!(
"specify `class_index` (from `dex classes`) or `search` (regex on class descriptor); class descriptors use JVM format: Lcom/example/Foo;"
);
}
let normalized_search: Option<String> = search.map(normalize_dex_class_search);
let re = normalized_search.as_deref().map(regex::Regex::new).transpose()?;
let mut matches: Vec<Value> = Vec::new();
let mut global_idx: usize = 0;
for (i, dex) in ctx.dex.iter().enumerate() {
let layer = format!("dex{}", i + 1);
for (class_defs_idx, class_def) in dex.class_defs.iter().enumerate() {
if dex.class_def_is_shadowed(class_defs_idx) {
continue;
}
let descriptor = dex
.type_descriptors
.get(class_def.class_idx.0 as usize)
.cloned()
.unwrap_or_default();
let matched = match (class_index, re.as_ref()) {
(Some(target), _) => global_idx == target,
(None, Some(re)) => re.is_match(&descriptor),
_ => false,
};
if matched {
let method_count: usize = dex
.class_datas
.get(&class_def.class_data_off)
.map(|cd| {
cd.direct_methods
.len()
.saturating_add(cd.virtual_methods.len())
})
.unwrap_or(0);
let estimated_output_tokens: usize = method_count.saturating_mul(80);
matches.push(json!({
"layer": layer,
"class_index": global_idx,
"class_descriptor": descriptor,
"estimated_method_count": method_count,
"estimated_output_tokens": estimated_output_tokens,
}));
}
global_idx = global_idx.saturating_add(1);
}
}
let count = matches.len();
Ok(json!({
"classes": matches,
"_meta": meta(
count,
false,
"dry_run=true — no source emitted; tighten regex then call decompile without dry_run",
&["decompile", "dex classes", "xrefs"],
),
}))
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`class_idx.0 as usize` widens u32→usize for `.get()` bounds-checked lookup on type_descriptors.")]
pub fn dex_decompile_all(
ctx: &CrossLayerContext,
out: &mut dyn std::io::Write,
out_dir: Option<&std::path::Path>,
) -> anyhow::Result<()> {
if ctx.dex.is_empty() {
anyhow::bail!("no DEX layers to decompile; this command requires an APK or raw .dex input");
}
let mut count = 0usize;
for (i, dex) in ctx.dex.iter().enumerate() {
let Some(dex_data) = ctx.dex_bytes(i) else {
continue;
};
let census = droidsaw_dex::r8_inversion::build_trampoline_census(dex);
for (class_defs_idx, class_def) in dex.class_defs.iter().enumerate() {
if dex.class_def_is_shadowed(class_defs_idx) {
continue;
}
let descriptor = dex
.type_descriptors
.get(class_def.class_idx.0 as usize)
.cloned()
.ok_or_else(|| {
anyhow::anyhow!(
"class_idx {} OOB in type_descriptors",
class_def.class_idx.0
)
})?;
let source = droidsaw_dex::classes::decompile_class_with_census(dex, dex_data, class_def, &census);
if let Some(dir) = out_dir {
let rel = class_file_rel_path(&descriptor);
let full = dir.join(&rel);
if let Some(parent) = full.parent() {
std::fs::create_dir_all(parent)?;
}
std::fs::write(&full, &source)?;
} else {
writeln!(out, "// ==class {descriptor}==")?;
out.write_all(source.as_bytes())?;
if !source.ends_with('\n') {
writeln!(out)?;
}
}
count = count.saturating_add(1);
}
}
if let Some(dir) = out_dir {
writeln!(out, "{{\"decompiled\":{count},\"out_dir\":{:?}}}", dir.display().to_string())?;
}
Ok(())
}
fn class_file_rel_path(descriptor: &str) -> std::path::PathBuf {
let inner = descriptor
.strip_prefix('L')
.and_then(|s| s.strip_suffix(';'))
.unwrap_or(descriptor);
let mut p = std::path::PathBuf::from(inner);
p.set_extension("java");
p
}
pub fn hbc(cmd: HbcCommands) -> anyhow::Result<Value> {
match cmd {
HbcCommands::Info { path } => parse_with_diag_scope(&path, hbc_info),
HbcCommands::Functions { path, search } => {
parse_with_diag_scope(&path, |ctx| hbc_functions(ctx, search.as_deref()))
}
HbcCommands::Decompile { path, func_id, js, all } => parse_with_diag_scope(&path, |ctx| {
ctx.ensure_hbc_parsed()?;
decompile(ctx, func_id.map(|id| id.to_string()).as_deref(), js, all)
}),
HbcCommands::Strings { path, search } => {
parse_with_diag_scope(&path, |ctx| strings(ctx, search.as_deref(), None, None, Some("hbc")))
}
HbcCommands::Disassemble { .. } => Err(anyhow::anyhow!(
"hbc disassemble must be dispatched via hbc_disassemble_to (plain-text output, not JSON)"
)),
}
}
pub fn hbc_disassemble_to<W: std::io::Write>(
ctx: &CrossLayerContext,
sink: &mut W,
) -> anyhow::Result<()> {
use droidsaw_hermes::parser::UnrecognizedReason;
let hbc_owned = ctx.require_hbc()?;
let hbc = hbc_owned.hbc();
let data = hbc_owned.bytes();
let version = hbc.opcode_version();
for fid in 0..hbc.function_count {
let f = hbc.function_get(fid);
let name = if f.name_id < hbc.string_count {
let s = hbc.string_as_str_or_empty(f.name_id).into_owned();
if s.is_empty() { String::from("<unnamed>") } else { s }
} else {
String::from("<unnamed>")
};
writeln!(
sink,
"function {} {:?} arity={} registers={} @ pc={}",
fid, name, f.param_count, f.frame_size, f.offset,
)?;
if hbc.is_function_unrecognized(fid) {
let unrec = hbc.unrecognized_functions();
match unrec
.binary_search_by_key(&fid, |u| u.func_idx)
.ok()
.and_then(|pos| unrec.get(pos))
.map(|u| u.reason)
{
Some(UnrecognizedReason::OverflowedHeaderOutOfBounds { large_off, buf_len }) => {
writeln!(
sink,
"; unrecognized — function region not decoded \
(overflow large-header OOB: large_off={large_off}, buf_len={buf_len})"
)?;
}
Some(_) | None => {
writeln!(sink, "; unrecognized — function region not decoded")?;
}
}
continue;
}
let Some(end) = f.offset.checked_add(f.size) else {
writeln!(sink, "; function-body bounds overflow (offset+size)")?;
continue;
};
let (Ok(start_us), Ok(end_us)) =
(usize::try_from(f.offset), usize::try_from(end))
else {
writeln!(sink, "; function-body bounds out of usize range")?;
continue;
};
if end_us > data.len() || start_us > end_us {
writeln!(sink, "; function-body bounds exceed file size")?;
continue;
}
let code = match data.get(start_us..end_us) {
Some(c) => c,
None => {
writeln!(sink, "; function-body slice unavailable")?;
continue;
}
};
match droidsaw_hermes::decompile::decode::decode_function(code, version) {
Ok(insts) => {
for inst in &insts {
write!(sink, "{} {}", inst.offset, inst.name)?;
for operand in &inst.operands {
sink.write_all(b" ")?;
write_operand(sink, operand)?;
}
writeln!(sink)?;
}
}
Err(e) => {
writeln!(sink, "; decode failed: {e}")?;
}
}
}
sink.flush()?;
Ok(())
}
fn write_operand<W: std::io::Write>(
sink: &mut W,
operand: &droidsaw_hermes::decompile::decode::Operand,
) -> std::io::Result<()> {
use droidsaw_hermes::decompile::decode::Operand;
match operand {
Operand::Reg(r) => write!(sink, "r{r}"),
Operand::Reg32(r) => write!(sink, "r{r}"),
Operand::UInt(v) => write!(sink, "{v}"),
Operand::Int(v) => write!(sink, "{v}"),
Operand::Addr(v) => write!(sink, "{v}"),
Operand::Double(v) => write!(sink, "{v:?}"),
}
}
pub fn hbc_disassemble<W: std::io::Write>(
path: &std::path::Path,
sink: &mut W,
) -> anyhow::Result<()> {
parse_with_diag_scope(path, |ctx| hbc_disassemble_to(ctx, sink))
}
fn parse_with_diag_scope<F, R>(path: &std::path::Path, f: F) -> anyhow::Result<R>
where
F: FnOnce(&CrossLayerContext) -> anyhow::Result<R>,
{
let hash = CrossLayerContext::input_hash(path)?;
droidsaw_common::diag::with_input_hash(&hash, || {
let ctx = CrossLayerContext::parse(path, None)?;
f(&ctx)
})
}
#[derive(Serialize)]
struct DexClassEntry {
layer: String,
name: String,
superclass: Option<String>,
}
#[derive(Serialize)]
struct DexMethodEntry {
layer: String,
class: String,
name: String,
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` is usize+1 bounded by ctx.dex.len(); struct-field indices widen u32→usize for `.get()` bounds-checked lookup.")]
pub fn dex_classes(
ctx: &CrossLayerContext,
search: Option<&str>,
) -> anyhow::Result<Value> {
let re = search.map(regex::Regex::new).transpose()?;
let mut classes: Vec<DexClassEntry> = Vec::new();
for (i, dex) in ctx.dex.iter().enumerate() {
let layer = format!("dex{}", i + 1);
for (class_defs_idx, cd) in dex.class_defs.iter().enumerate() {
if dex.class_def_is_shadowed(class_defs_idx) {
continue;
}
let name = dex
.type_descriptors
.get(cd.class_idx.0 as usize)
.cloned()
.unwrap_or_default();
if let Some(ref re) = re
&& !re.is_match(&name)
{
continue;
}
let superclass = cd
.superclass_idx
.and_then(|s| dex.type_descriptors.get(s.0 as usize))
.cloned();
classes.push(DexClassEntry {
layer: layer.clone(),
name,
superclass,
});
}
}
let count = classes.len();
Ok(json!({
"classes": classes,
"_meta": meta(
count,
false,
"use `droidsaw dex methods` to list methods, `droidsaw xrefs -s <name>` to find string refs",
&["dex methods", "dex strings", "xrefs"],
),
}))
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` is usize+1 bounded by ctx.dex.len(); struct-field indices (class_idx.0, name_idx.0) widen u32→usize for `.get()` lookup.")]
pub fn dex_methods(
ctx: &CrossLayerContext,
search: Option<&str>,
implementations: bool,
) -> anyhow::Result<Value> {
let re = search.map(regex::Regex::new).transpose()?;
let mut methods: Vec<DexMethodEntry> = Vec::new();
for (i, dex) in ctx.dex.iter().enumerate() {
let layer = format!("dex{}", i + 1);
if implementations {
for (class_defs_idx, cd_def) in dex.class_defs.iter().enumerate() {
if dex.class_def_is_shadowed(class_defs_idx) {
continue;
}
let Some(cd) = dex.class_datas.get(&cd_def.class_data_off) else {
continue;
};
for em in cd.direct_methods.iter().chain(cd.virtual_methods.iter()) {
let Some(m) = dex.methods.get(em.method_idx.0 as usize) else {
continue;
};
let class = dex
.type_descriptors
.get(m.class_idx.0 as usize)
.cloned()
.unwrap_or_default();
let name = dex
.strings
.get(m.name_idx.0 as usize)
.map(|e| e.as_str_lossy().to_string())
.unwrap_or_default();
let full = format!("{class}->{name}");
if let Some(ref re) = re
&& !re.is_match(&full)
{
continue;
}
methods.push(DexMethodEntry {
layer: layer.clone(),
class,
name,
});
}
}
} else {
for m in &dex.methods {
let class = dex
.type_descriptors
.get(m.class_idx.0 as usize)
.cloned()
.unwrap_or_default();
let name = dex
.strings
.get(m.name_idx.0 as usize)
.map(|e| e.as_str_lossy().to_string())
.unwrap_or_default();
let full = format!("{class}->{name}");
if let Some(ref re) = re
&& !re.is_match(&full)
{
continue;
}
methods.push(DexMethodEntry {
layer: layer.clone(),
class,
name,
});
}
}
}
let count = methods.len();
let hint = if implementations {
"implementations view: declared methods via class_data_item (includes abstract/native; compare against jadx)"
} else {
"use `droidsaw xrefs -s <name>` to find string references to a method"
};
Ok(json!({
"methods": methods,
"_meta": meta(
count,
false,
hint,
&["dex classes", "dex strings", "xrefs"],
),
}))
}
pub fn dex(cmd: DexCommands) -> anyhow::Result<Value> {
match cmd {
DexCommands::Classes { path, search } => {
parse_with_diag_scope(&path, |ctx| dex_classes(ctx, search.as_deref()))
}
DexCommands::Methods {
path,
search,
implementations,
} => parse_with_diag_scope(&path, |ctx| {
dex_methods(ctx, search.as_deref(), implementations)
}),
DexCommands::Strings { path, search } => {
parse_with_diag_scope(&path, |ctx| strings(ctx, search.as_deref(), None, None, None))
}
}
}
pub fn yara(
ctx: &CrossLayerContext,
rules_src: Option<&str>,
rules_path: Option<&std::path::Path>,
target: &str,
limit: Option<usize>,
) -> anyhow::Result<Value> {
use droidsaw_apk::yara_scan::{
bundled_rules, compile_rules_str, compile_rules_str_restricted, load_rules_from_dir,
};
enum RulesHolder {
Owned(Box<droidsaw_apk::yara_scan::Rules>),
Borrowed(&'static droidsaw_apk::yara_scan::Rules),
}
impl std::ops::Deref for RulesHolder {
type Target = droidsaw_apk::yara_scan::Rules;
fn deref(&self) -> &Self::Target {
match self {
RulesHolder::Owned(r) => r,
RulesHolder::Borrowed(r) => r,
}
}
}
let mut rules_are_bundled = false;
let holder = if let Some(src) = rules_src {
progress!("compiling inline yara rules ({:?} bytes) [restricted]", src.len());
RulesHolder::Owned(Box::new(compile_rules_str_restricted(&[("inline", src)])?))
} else {
match rules_path {
None => {
progress!("no --rules supplied, using bundled ruleset (credential + packer)");
let r = bundled_rules()
.ok_or_else(|| anyhow::anyhow!("bundled rules failed to compile"))?;
rules_are_bundled = true;
RulesHolder::Borrowed(r)
}
Some(p) => {
if !p.exists() {
anyhow::bail!(
"rules path does not exist: {} — pass --rules <file-or-dir>",
p.display()
);
}
if p.is_dir() {
progress!("loading yara rules from directory {:?}", p);
RulesHolder::Owned(Box::new(load_rules_from_dir(p)?))
} else {
progress!("loading yara rule file {:?}", p);
let src = std::fs::read_to_string(p)?;
let ns = p.file_stem().and_then(|s| s.to_str()).unwrap_or("user");
RulesHolder::Owned(Box::new(compile_rules_str(&[(ns, &src)])?))
}
}
}
};
let rules: &droidsaw_apk::yara_scan::Rules = &holder;
let apk = ctx.require_apk()?;
progress!("scanning {:?} with yara-x", apk.path);
let hbc_asset_paths = collect_hbc_asset_paths(&apk.path);
progress!(
"yara: {:?} HBC asset paths detected for per-string-table scanning",
hbc_asset_paths.len()
);
let (all, _yara_overflow_finding) = apk.yara_scan_with_skip(rules, &hbc_asset_paths);
progress!(
"yara produced {:?} raw matches (HBC assets skipped here, scanned per-string below: {:?})",
all.len(),
hbc_asset_paths.len()
);
let mut bundle_aware: Vec<_> = all
.into_iter()
.filter(|m| !is_hbc_asset_match(&m.target, &hbc_asset_paths))
.collect();
for hbc_path in &hbc_asset_paths {
let per_string = scan_hbc_per_string(rules, &apk.path, hbc_path, rules_are_bundled);
progress!(
"yara: hermes-bundle {:?} produced {:?} per-string matches",
hbc_path,
per_string.len()
);
bundle_aware.extend(per_string);
}
let filtered: Vec<_> = match target {
"all" => bundle_aware,
"manifest" | "dex" | "resources" | "native" | "assets" => bundle_aware
.into_iter()
.filter(|m| yara_target_matches(target, &m.target))
.collect(),
other => anyhow::bail!(
"--target must be one of manifest|dex|resources|native|assets|all (got {other:?})"
),
};
let (truncated, matches) = match limit {
Some(n) if filtered.len() > n => (true, filtered.into_iter().take(n).collect::<Vec<_>>()),
_ => (false, filtered),
};
let count = matches.len();
let rows: Vec<Value> = matches
.iter()
.map(|m| {
let metadata: serde_json::Map<String, Value> = m
.metadata
.iter()
.map(|(k, v)| (k.clone(), Value::String(v.clone())))
.collect();
json!({
"rule": m.rule,
"namespace": m.namespace,
"target": m.target,
"tags": m.tags,
"metadata": metadata,
})
})
.collect();
let out = json!({
"matches": rows,
"_meta": meta(
count,
truncated,
"pass --rules <.yar|.yara|dir>; scope with --target manifest|dex|resources|native|assets|all",
&["strings", "audit", "sbom", "trufflehog"],
),
});
Ok(out)
}
const HERMES_MAGIC_4: [u8; 4] = [0xc6, 0x1f, 0xbc, 0x03];
fn collect_hbc_asset_paths(apk_path: &str) -> Vec<String> {
use std::io::Read;
let Ok(file) = std::fs::File::open(apk_path) else {
return Vec::new();
};
let Ok(mut archive) = zip::ZipArchive::new(file) else {
return Vec::new();
};
let mut out = Vec::new();
for i in 0..archive.len() {
let Ok(mut entry) = archive.by_index(i) else {
continue;
};
let name = entry.name().to_string();
let is_so = name.starts_with("lib/") && name.ends_with(".so");
let is_asset = name.starts_with("assets/")
|| (!is_so
&& !name.ends_with(".dex")
&& !name.ends_with(".arsc")
&& !name.ends_with(".xml")
&& !name.starts_with("META-INF/"));
if !is_asset {
continue;
}
let mut head = [0u8; 4];
if entry.read_exact(&mut head).is_err() {
continue;
}
if head == HERMES_MAGIC_4 {
out.push(name);
}
}
out
}
fn is_hbc_asset_match(match_target: &str, hbc_paths: &[String]) -> bool {
let Some(stripped) = match_target.strip_prefix("asset:") else {
return false;
};
hbc_paths.iter().any(|p| p == stripped)
}
fn scan_hbc_per_string(
rules: &droidsaw_apk::yara_scan::Rules,
apk_path: &str,
hbc_asset_path: &str,
bundled_anchor_prefilter: bool,
) -> Vec<droidsaw_apk::yara_scan::YaraMatch> {
let _drain_guard = crate::context::HermesFindingDrainGuard::install_discard();
use std::io::Read;
use droidsaw_apk::yara_scan::scan_bytes_with_scanner;
let Ok(file) = std::fs::File::open(apk_path) else {
return Vec::new();
};
let Ok(mut archive) = zip::ZipArchive::new(file) else {
return Vec::new();
};
let Ok(mut entry) = archive.by_name(hbc_asset_path) else {
return Vec::new();
};
const HBC_ENTRY_MAX: u64 = 128 * 1024 * 1024;
let mut buf = Vec::new();
if entry.by_ref().take(HBC_ENTRY_MAX).read_to_end(&mut buf).is_err() {
return Vec::new();
}
if buf.is_empty() {
return Vec::new();
}
let mut scanner = droidsaw_apk::yara_scan::Scanner::new(rules);
let hbc = match droidsaw_hermes::parser::HbcFile::parse(&buf, None) {
Ok(h) => h,
Err(_) => {
return scan_bytes_with_scanner(
&mut scanner,
&format!("hermes-fallback-raw:{}", hbc_asset_path),
&buf,
);
}
};
let mut matches = Vec::new();
for i in 0..hbc.string_count {
let s = hbc.string_as_str_or_empty(i);
if s.is_empty() {
continue;
}
if bundled_anchor_prefilter
&& !droidsaw_apk::yara_scan::string_contains_credential_prefix(&s)
{
continue;
}
let target = format!("hermes-string:{}:{}", hbc_asset_path, i);
matches.extend(scan_bytes_with_scanner(&mut scanner, &target, s.as_bytes()));
}
matches
}
fn yara_target_matches(filter: &str, match_target: &str) -> bool {
match filter {
"manifest" => match_target == "manifest",
"dex" => match_target.starts_with("dex:"),
"resources" => match_target.starts_with("resources.arsc"),
"native" => match_target.starts_with("so:"),
"assets" => {
match_target.starts_with("asset:")
|| match_target.starts_with("hermes-string:")
|| match_target.starts_with("hermes-fallback-raw:")
}
_ => false,
}
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` dex-layer label in progress! format; bounded by ctx.dex.len().")]
fn build_detectors_status(
mode: droidsaw_cli_contract::AuditMode,
trufflehog_result: Option<&Value>,
) -> Value {
let semgrep_on_path = which_binary_in_path("semgrep");
let trufflehog_on_path = which_binary_in_path("trufflehog");
let yara = json!({
"status": "ran",
"note": "bundled credential / packer / crypto / antianalysis rules; \
Aho-Corasick prefilter on per-string HBC scan; \
results coalesced by (rule, asset)",
});
let semgrep = if mode.runs_semgrep() {
json!({
"status": "extracted",
"binary_on_path": semgrep_on_path,
"note": if semgrep_on_path {
"source extracted; run the returned `command` hint to scan, \
or use the MCP `audit` tool for end-to-end subprocess invocation"
} else {
"source extracted; `semgrep` binary NOT on PATH — \
install semgrep, then run the returned `command` hint"
},
})
} else {
json!({
"status": "skipped_by_mode",
"mode": mode.as_cli_str(),
"binary_on_path": semgrep_on_path,
"note": "pass `--mode=full` or `--mode=semgrep` to extract source for semgrep",
})
};
let trufflehog = match trufflehog_result {
Some(r) if r.get("ran").and_then(|b| b.as_bool()).unwrap_or(false) => {
json!({
"status": "ran",
"binary_on_path": trufflehog_on_path,
"hit_count": r.get("hit_count").cloned().unwrap_or(json!(0)),
"verified_count": r.get("verified_count").cloned().unwrap_or(json!(0)),
"credentials_written": r.get("credentials_written").cloned().unwrap_or(json!(0)),
"note": "credentials persisted to per-input ./droidsaw-<stem>.db; \
query with `droidsaw query credentials`",
})
}
Some(r) if r.get("error").is_some() => {
json!({
"status": "error",
"binary_on_path": trufflehog_on_path,
"error": r.get("error").cloned().unwrap_or(Value::Null),
"note": "trufflehog subprocess invocation failed; see error field",
})
}
Some(_) if !trufflehog_on_path => {
json!({
"status": "binary_missing",
"binary_on_path": false,
"note": "trufflehog binary NOT on PATH — install trufflehog, \
then re-run with `--mode=full` or `--mode=trufflehog`",
})
}
Some(_) => {
json!({
"status": "no_strings_extracted",
"binary_on_path": trufflehog_on_path,
"note": "no strings ≥ min_length extracted from input layers; \
nothing for trufflehog to consume",
})
}
None => {
json!({
"status": "skipped_by_mode",
"mode": mode.as_cli_str(),
"binary_on_path": trufflehog_on_path,
"note": "pass `--mode=full` or `--mode=trufflehog` to invoke \
the trufflehog subprocess",
})
}
};
json!({
"yara": yara,
"semgrep": semgrep,
"trufflehog": trufflehog,
})
}
fn which_binary_in_path(name: &str) -> bool {
std::env::var_os("PATH")
.map(|path| std::env::split_paths(&path).any(|dir| dir.join(name).is_file()))
.unwrap_or(false)
}
pub fn audit_light(ctx: &CrossLayerContext, entropy_threshold: f32) -> anyhow::Result<Value> {
audit_light_with_mode(ctx, entropy_threshold, droidsaw_cli_contract::AuditMode::Basic)
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` dex-layer label in progress! format; bounded by ctx.dex.len().")]
pub fn audit_light_with_mode(
ctx: &CrossLayerContext,
entropy_threshold: f32,
mode: droidsaw_cli_contract::AuditMode,
) -> anyhow::Result<Value> {
let mut findings = collect_apk_findings(ctx, entropy_threshold);
if let Some(hbc_owned) = ctx.hbc.as_ref() {
let hbc = hbc_owned.hbc();
let hbc_data = hbc_owned.bytes();
let _ = droidsaw_hermes::scanner::scan_parsed(hbc, hbc_data);
for i in 0..hbc.string_count {
let s = hbc.string_as_str_or_empty(i);
let ent = shannon_entropy(s.as_bytes());
if ent >= entropy_threshold {
progress!(
"hbc high-entropy string #{i:?}: entropy={ent:.3} value={s:?}"
);
}
}
}
for (i, dex) in ctx.dex.iter().enumerate() {
for (j, entry) in dex.strings.iter().enumerate() {
let ent = shannon_entropy(entry.raw_bytes());
if ent >= entropy_threshold {
let s = entry.as_str_lossy();
progress!(
"dex{:?} high-entropy string #{j:?}: entropy={ent:.3} value={s:?}",
i + 1
);
}
}
}
findings.sort_by(|a, b| a.severity.cmp(&b.severity).then_with(|| a.id.cmp(&b.id)));
let trufflehog_result: Option<Value> = if mode.runs_trufflehog() {
let input = std::path::PathBuf::from(&ctx.path);
let stem = input
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or("unknown");
let db_path = std::path::PathBuf::from(format!("./droidsaw-{stem}.db"));
Some(crate::trufflehog::run::run_and_persist(
ctx,
crate::trufflehog::run::DEFAULT_MIN_LENGTH,
&db_path,
None,
None,
)?)
} else {
None
};
let count = findings.len();
let mut severity_summary = std::collections::BTreeMap::<String, u64>::new();
for f in &findings {
let c = severity_summary.entry(format!("{:?}", f.severity)).or_insert(0);
*c = c.saturating_add(1);
}
let apk_summary: Option<audit_envelope::ApkSummary> = {
let has_hbc = ctx.hbc.is_some();
let hbc_bytes: u64 = ctx
.hbc
.as_ref()
.map(|h| h.bytes().len())
.unwrap_or(0)
.try_into()
.unwrap_or(u64::MAX);
let hbc_function_count: u32 = ctx
.hbc
.as_ref()
.map(|h| h.hbc().function_count)
.unwrap_or(0);
let dex_methods_total: u64 = ctx.dex.iter().fold(0u64, |acc, df| {
let per_dex: u64 = df.class_datas.values().fold(0u64, |a, cd| {
a.saturating_add(cd.direct_methods.len() as u64)
.saturating_add(cd.virtual_methods.len() as u64)
});
acc.saturating_add(per_dex)
});
let dex_classes_total: u64 = ctx
.dex
.iter()
.fold(0u64, |acc, df| acc.saturating_add(df.class_defs.len() as u64));
if let Some(apk) = ctx.apk.as_ref() {
let dex_count = apk.dex.len().try_into().unwrap_or(u32::MAX);
let dex_total_bytes: u64 = apk
.dex
.iter()
.fold(0u64, |acc, entry| acc.saturating_add(entry.data.len() as u64));
Some(audit_envelope::ApkSummary {
has_hbc,
hbc_bytes,
hbc_function_count,
dex_count,
dex_total_bytes,
dex_methods_total,
dex_classes_total,
})
} else if !ctx.dex.is_empty() {
let dex_count = ctx.dex.len().try_into().unwrap_or(u32::MAX);
let dex_total_bytes: u64 = ctx
.dex_direct_bytes
.as_ref()
.map(|b| b.len() as u64)
.unwrap_or(0);
Some(audit_envelope::ApkSummary {
has_hbc,
hbc_bytes,
hbc_function_count,
dex_count,
dex_total_bytes,
dex_methods_total,
dex_classes_total,
})
} else if has_hbc {
Some(audit_envelope::ApkSummary {
has_hbc,
hbc_bytes,
hbc_function_count,
dex_count: 0,
dex_total_bytes: 0,
dex_methods_total: 0,
dex_classes_total: 0,
})
} else {
None
}
};
let taint_flow_count =
audit_envelope::AuditEnvelope::count_taint_flow_findings(&findings);
let severity_by_gauge = audit_envelope::AuditEnvelope::stratify_by_gauge(&findings);
let top_findings = audit_envelope::AuditEnvelope::rank_top_findings(
&findings,
audit_envelope::TOP_FINDINGS_CAP,
);
let envelope = audit_envelope::AuditEnvelope {
schema_version: audit_envelope::AUDIT_ENVELOPE_VERSION,
findings,
finding_count: count as u64,
findings_emitted: count as u64,
taint_flow_count,
severity_summary,
severity_by_gauge,
top_findings,
truncated: false,
db_path: None,
db_queries: None,
finding_xrefs_written: None,
detectors: Some(build_detectors_status(mode, trufflehog_result.as_ref())),
trufflehog: trufflehog_result,
semgrep: None,
timings_ms: None,
apk_summary,
meta: audit_envelope::AuditMeta {
count: count as u64,
truncated: false,
hint: "filter by severity via `jq '.findings[] | select(.severity==\"High\")'`; \
pair with `export` to query in sqlite"
.to_string(),
related: vec![
"scan-corpus".to_string(),
"export".to_string(),
"manifest".to_string(),
"signing".to_string(),
],
thread_pool_size: rayon::current_num_threads(),
},
};
Ok(serde_json::to_value(&envelope)?)
}
pub fn audit_full(
ctx: &CrossLayerContext,
entropy_threshold: f32,
output: Option<&std::path::Path>,
semgrep_args: &crate::semgrep::SemgrepArgs,
) -> anyhow::Result<Value> {
audit_full_with_mode(
ctx,
entropy_threshold,
output,
semgrep_args,
droidsaw_cli_contract::AuditMode::Full,
)
}
pub fn audit_full_with_mode(
ctx: &CrossLayerContext,
entropy_threshold: f32,
output: Option<&std::path::Path>,
semgrep_args: &crate::semgrep::SemgrepArgs,
mode: droidsaw_cli_contract::AuditMode,
) -> anyhow::Result<Value> {
let mut out = audit_light_with_mode(ctx, entropy_threshold, mode)?;
let sg = semgrep(ctx, output, semgrep_args)?;
if let Some(obj) = out.as_object_mut() {
obj.insert("semgrep".into(), sg);
}
Ok(out)
}
pub fn collect_findings(ctx: &CrossLayerContext, entropy_threshold: f32) -> anyhow::Result<Vec<Finding>> {
let mut findings = collect_apk_findings(ctx, entropy_threshold);
findings.sort_by(|a, b| a.severity.cmp(&b.severity).then_with(|| a.id.cmp(&b.id)));
Ok(findings)
}
const FINDING_CAP_PER_KIND: usize = 256;
fn sanitize_detail(s: &str) -> String {
s.chars()
.map(|c| {
if c == '\t' || c == '\n' || (u32::from(c) >= 0x20 && u32::from(c) <= 0x7E) {
c
} else {
'?'
}
})
.collect()
}
fn cap_findings(mut v: Vec<Finding>, id: &'static str, layer: Layer) -> Vec<Finding> {
if v.len() > FINDING_CAP_PER_KIND {
let dropped = v.len().saturating_sub(FINDING_CAP_PER_KIND);
v.truncate(FINDING_CAP_PER_KIND);
let detail = format!(
"truncated to {FINDING_CAP_PER_KIND} entries; {dropped} additional Findings of the same kind dropped to defend the audit envelope against unbounded-input DoS"
);
let mut trunc = Finding::new(id, layer, Severity::Info, detail);
trunc.confidence = droidsaw_common::finding::Confidence::Verified;
v.push(trunc);
}
v
}
pub const DEFAULT_ENTROPY_THRESHOLD_BITS: f32 = 4.5;
fn classify_dynamic_loading_token(s: &str) -> Option<(String, String)> {
const KNOWN: &[(&str, &str)] = &[
("Ldalvik/system/DexClassLoader;", "dalvik.system.DexClassLoader"),
("Ldalvik/system/PathClassLoader;", "dalvik.system.PathClassLoader"),
(
"Ldalvik/system/InMemoryDexClassLoader;",
"dalvik.system.InMemoryDexClassLoader",
),
];
for (desc, dot) in KNOWN {
if s == *desc || s == *dot {
return Some(((*desc).to_string(), (*dot).to_string()));
}
}
for (desc, dot) in KNOWN {
let bare = dot.rsplit('.').next().unwrap_or(dot);
if s == bare {
return Some(((*desc).to_string(), (*dot).to_string()));
}
}
if s.starts_with('L') && s.ends_with(';') && s.len() >= 3 {
let inner = s.get(1..s.len().saturating_sub(1))?;
if !inner.is_empty() {
let dot_form = inner.replace('/', ".");
return Some((s.to_string(), dot_form));
}
}
if s.contains('.') && !s.contains('/') && !s.contains(' ') {
let descriptor = format!("L{};", s.replace('.', "/"));
return Some((descriptor, s.to_string()));
}
None
}
fn dynamic_loading_token_referenced(
xrefs: &droidsaw_dex::xrefs::Xrefs,
s: &str,
) -> bool {
if xrefs.string_to_methods.contains_key(s) {
return true;
}
if let Some((descriptor, dot_form)) = classify_dynamic_loading_token(s) {
if xrefs.type_xrefs.contains_key(&descriptor) {
return true;
}
if xrefs.string_to_methods.contains_key(&dot_form) {
return true;
}
if xrefs.string_to_methods.contains_key(&descriptor) {
return true;
}
}
false
}
fn partition_dynamic_loading_strings(
xrefs: &droidsaw_dex::xrefs::Xrefs,
strings: &[String],
) -> (Vec<String>, Vec<String>, Vec<String>) {
let mut deduped: Vec<String> = strings.to_vec();
deduped.sort();
deduped.dedup();
let mut referenced_class_load: Vec<String> = Vec::new();
let mut unreferenced_class_load: Vec<String> = Vec::new();
let mut non_class_load: Vec<String> = Vec::new();
for s in deduped {
if classify_dynamic_loading_token(&s).is_some() {
if dynamic_loading_token_referenced(xrefs, &s) {
referenced_class_load.push(s);
} else {
unreferenced_class_load.push(s);
}
} else {
non_class_load.push(s);
}
}
(referenced_class_load, unreferenced_class_load, non_class_load)
}
fn apply_dynamic_loading_partition_to_finding(
finding: &mut Finding,
referenced_class_load: &[String],
unreferenced_class_load: &[String],
non_class_load: &[String],
) {
let surviving_count = referenced_class_load
.len()
.saturating_add(non_class_load.len());
if surviving_count == 0 {
finding.severity = Severity::Info;
let unreferenced_list = unreferenced_class_load.join(", ");
finding.detail = format!(
"Type descriptor(s) present in DEX string pool but no function references \
(no allocation, cast, const-class, or reflective load) — likely metadata / \
type-pool artefact, not a dynamic-load call site: {unreferenced_list}"
);
return;
}
let mut surviving: Vec<&String> = Vec::with_capacity(surviving_count);
surviving.extend(referenced_class_load.iter());
surviving.extend(non_class_load.iter());
let surviving_joined = surviving
.iter()
.map(|s| s.as_str())
.collect::<Vec<_>>()
.join(", ");
if unreferenced_class_load.is_empty() {
finding.detail = format!(
"Dynamic code loading in DEX strings (function references confirmed via \
dex xref index): {surviving_joined}"
);
} else {
let unreferenced_joined = unreferenced_class_load.join(", ");
finding.detail = format!(
"Dynamic code loading in DEX strings (function references confirmed via \
dex xref index): {surviving_joined}; additionally, the following type \
descriptors appeared in the string pool but had no function references \
and were not counted toward this finding: {unreferenced_joined}"
);
}
}
fn gate_dynamic_loading(
ctx: &CrossLayerContext,
apk: &droidsaw_apk::Apk,
findings: &mut [Finding],
) {
let Some(idx) = findings.iter().position(|f| f.id == "DYNAMIC_LOADING") else {
return;
};
let mut xrefs = droidsaw_dex::xrefs::Xrefs::default();
let mut any_index_built = false;
for (i, dex) in ctx.dex.iter().enumerate() {
let Some(raw) = ctx.dex_bytes(i) else {
continue;
};
match droidsaw_dex::xrefs::Xrefs::build(dex, raw) {
Ok(other) => {
xrefs.merge(other);
any_index_built = true;
}
Err(e) => {
progress!(
"dex{:?}: xrefs build failed (DYNAMIC_LOADING gate skipped for this dex): {:?}",
i.saturating_add(1),
e
);
}
}
}
if !any_index_built {
return;
}
let (referenced_class_load, unreferenced_class_load, non_class_load) =
partition_dynamic_loading_strings(&xrefs, &apk.dynamic_loading_strings);
let Some(finding) = findings.get_mut(idx) else {
return;
};
apply_dynamic_loading_partition_to_finding(
finding,
&referenced_class_load,
&unreferenced_class_load,
&non_class_load,
);
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "HBC bytecode region validated by `end_u64 = f.offset as u64 + f.size as u64` + `if end_u64 > hbc_data.len() as u64 { continue }` guard before narrowing `as usize`.")]
pub(super) fn collect_apk_findings(ctx: &CrossLayerContext, entropy_threshold: f32) -> Vec<Finding> {
let mut findings = Vec::new();
findings.extend(cap_findings(
ctx.hermes_findings.clone(),
"HBC_FINDINGS_TRUNCATED",
Layer::Hbc,
));
if let Some(failure) = ctx.hbc_parse_error.as_ref() {
findings.push(Finding::new(
"HBC_BUNDLE_UNPARSEABLE",
Layer::Hbc,
Severity::Medium,
sanitize_detail(&format!(
"Hermes bundle present but unparseable; HBC-layer analysis is blind on a live code path: {}",
failure.message()
)),
));
}
if let Some(apk) = ctx.apk.as_ref() {
use droidsaw_apk::apk::ZipAnomalyKind;
use droidsaw_common::finding::{Confidence, Layer, Severity};
let mut zip_findings: Vec<Finding> = Vec::new();
for anomaly in &apk.zip_anomalies {
let (id, severity) = match &anomaly.kind {
ZipAnomalyKind::DuplicateEntry => ("APK_ZIP_DUPLICATE_ENTRY", Severity::High),
ZipAnomalyKind::ManifestNotFirst => ("APK_ZIP_MANIFEST_NOT_FIRST", Severity::Low),
ZipAnomalyKind::OutOfOrderEntries => ("APK_ZIP_OUT_OF_ORDER_ENTRIES", Severity::Low),
ZipAnomalyKind::SuspiciousEocdComment { .. } => {
("APK_ZIP_SUSPICIOUS_EOCD_COMMENT", Severity::Medium)
}
ZipAnomalyKind::DoubleEocd { .. } => ("APK_ZIP_DOUBLE_EOCD", Severity::High),
ZipAnomalyKind::NonZeroDiskNumber { .. } => {
("APK_ZIP_NON_ZERO_DISK_NUMBER", Severity::Medium)
}
ZipAnomalyKind::FeatureModuleOverflow { .. } => {
("APK_ZIP_FEATURE_MODULE_OVERFLOW", Severity::Low)
}
ZipAnomalyKind::CrossSplitTrustMismatch { .. } => {
("APK_CROSS_SPLIT_TRUST_MISMATCH", Severity::High)
}
ZipAnomalyKind::DuplicateSplitPrefix { .. } => {
("APK_DUPLICATE_SPLIT_PREFIX", Severity::Medium)
}
ZipAnomalyKind::MalformedCertEnvelope { .. } => {
("APK_MALFORMED_CERT_ENVELOPE", Severity::Medium)
}
ZipAnomalyKind::BaseSplitNotFirst { .. } => {
("APK_BASE_SPLIT_NOT_FIRST", Severity::High)
}
ZipAnomalyKind::LfhCentralSizeMismatch { .. } => {
("APK_ZIP_LFH_SIZE_MISMATCH", Severity::High)
}
ZipAnomalyKind::LfhCentralMethodMismatch { .. } => {
("APK_ZIP_LFH_METHOD_MISMATCH", Severity::High)
}
ZipAnomalyKind::EncryptionFlagSet { .. } => {
("APK_ZIP_ENCRYPTION_FLAG_SET", Severity::Critical)
}
ZipAnomalyKind::LeadingNonZipBytes { .. } => {
("APK_ZIP_LEADING_NON_ZIP_BYTES", Severity::High)
}
};
let mut f = Finding::new(id, Layer::Apk, severity, sanitize_detail(&anomaly.detail));
f.confidence = Confidence::Verified;
zip_findings.push(f);
}
findings.extend(cap_findings(
zip_findings,
"APK_ZIP_ANOMALIES_TRUNCATED",
Layer::Apk,
));
let hbc_asset_paths = collect_hbc_asset_paths(&apk.path);
let mut hbc_extra_yara_matches: Vec<droidsaw_apk::yara_scan::YaraMatch> = Vec::new();
if !hbc_asset_paths.is_empty()
&& let Some(rules) = droidsaw_apk::yara_scan::bundled_credential_rules()
{
for hbc_path in &hbc_asset_paths {
hbc_extra_yara_matches.extend(scan_hbc_per_string(
rules, &apk.path, hbc_path, true,
));
}
}
let mut apk_findings =
apk.audit_security_with_yara_overrides(&hbc_asset_paths, &hbc_extra_yara_matches);
gate_dynamic_loading(ctx, apk, &mut apk_findings);
findings.extend(apk_findings);
if let Ok(signing) = apk.signing_info() {
findings.extend(signing.security_findings_at_with_context(
std::time::SystemTime::now(),
signing_findings_context_from(ctx),
));
}
if let Some(raw) = apk.manifest_raw.as_ref()
&& let Ok((_, unknown_chunks, _)) =
Manifest::from_binary_xml_with_config(raw, &ParseConfig::lenient())
{
for uc in &unknown_chunks {
if uc.chunk_type == 0x0104 {
findings.push(
Finding::new(
"COMMERCIAL_OBFUSCATOR_DETECTED",
Layer::Apk,
Severity::Info,
format!(
"AXML chunk type 0x0104 at offset {} — \
commercial-obfuscator marker (DexGuard-class). \
This is an enrichment signal, not a vulnerability; \
companion `UNKNOWN_SIGNING_BLOCK 0xfa1cfabe` \
promotes confidence when present.",
uc.offset,
),
)
.with_extra(format!(
"{{\"chunk_type\":\"0x0104\",\"offset\":{}}}",
uc.offset,
)),
);
}
}
}
}
let mut unrecognized: Vec<Finding> = Vec::new();
let mut header_map: Vec<Finding> = Vec::new();
let mut string_length: Vec<Finding> = Vec::new();
let mut code_item: Vec<Finding> = Vec::new();
let mut detector_indeterminate: Vec<Finding> = Vec::new();
let mut duplicate_class_def: Vec<Finding> = Vec::new();
let mut class_data_off_collision: Vec<Finding> = Vec::new();
let mut spec_invariant: Vec<Finding> = Vec::new();
for (i, dex) in ctx.dex.iter().enumerate() {
let Some(dex_data) = ctx.dex_bytes(i) else {
continue;
};
unrecognized.extend(cap_findings(
droidsaw_dex::diag::collect_unrecognized_findings(dex, dex_data),
"DEX_UNRECOGNIZED_TRUNCATED",
Layer::Dex,
));
header_map.extend(cap_findings(
droidsaw_dex::diag::collect_header_map_findings(dex),
"DEX_HEADER_MAP_TRUNCATED",
Layer::Dex,
));
string_length.extend(cap_findings(
droidsaw_dex::diag::collect_string_length_findings(dex),
"DEX_STRING_LENGTH_TRUNCATED",
Layer::Dex,
));
code_item.extend(cap_findings(
droidsaw_dex::diag::collect_code_item_findings(dex),
"DEX_CODE_ITEM_TRUNCATED",
Layer::Dex,
));
detector_indeterminate.extend(cap_findings(
droidsaw_dex::diag::collect_detector_indeterminate_findings(dex),
"DEX_DETECTOR_INDETERMINATE_TRUNCATED",
Layer::Dex,
));
duplicate_class_def.extend(cap_findings(
droidsaw_dex::diag::collect_duplicate_class_def_findings(dex),
"DEX_DUPLICATE_CLASS_DEF_TRUNCATED",
Layer::Dex,
));
class_data_off_collision.extend(cap_findings(
droidsaw_dex::diag::collect_class_data_off_collision_findings(dex),
"DEX_CLASS_DATA_OFF_COLLISION_TRUNCATED",
Layer::Dex,
));
spec_invariant.extend(cap_findings(
droidsaw_dex::diag::collect_spec_invariant_findings(dex),
"DEX_SPEC_INVARIANT_TRUNCATED",
Layer::Dex,
));
}
findings.extend(cap_findings(unrecognized, "DEX_UNRECOGNIZED_TRUNCATED", Layer::Dex));
findings.extend(cap_findings(header_map, "DEX_HEADER_MAP_TRUNCATED", Layer::Dex));
findings.extend(cap_findings(string_length, "DEX_STRING_LENGTH_TRUNCATED", Layer::Dex));
findings.extend(cap_findings(code_item, "DEX_CODE_ITEM_TRUNCATED", Layer::Dex));
findings.extend(cap_findings(
detector_indeterminate,
"DEX_DETECTOR_INDETERMINATE_TRUNCATED",
Layer::Dex,
));
findings.extend(cap_findings(
duplicate_class_def,
"DEX_DUPLICATE_CLASS_DEF_TRUNCATED",
Layer::Dex,
));
findings.extend(cap_findings(
class_data_off_collision,
"DEX_CLASS_DATA_OFF_COLLISION_TRUNCATED",
Layer::Dex,
));
findings.extend(cap_findings(
spec_invariant,
"DEX_SPEC_INVARIANT_TRUNCATED",
Layer::Dex,
));
if let Some(hbc_owned) = ctx.hbc.as_ref() {
let hbc = hbc_owned.hbc();
const HBC_MIN_STR_LEN: usize = 8;
let mut high_entropy: Vec<String> = Vec::new();
for i in 0..hbc.string_count {
let s = hbc.string_as_str_or_empty(i);
if s.len() >= HBC_MIN_STR_LEN
&& shannon_entropy(s.as_bytes()) >= entropy_threshold
&& !s.contains("://")
&& !s.starts_with("com.")
&& !s.starts_with("org.")
&& !s.starts_with("net.")
&& !s.starts_with("io.")
{
high_entropy.push(s.into_owned());
}
}
if !high_entropy.is_empty() {
let sample = high_entropy.iter().take(3)
.map(|s| format!("{:?}", s))
.collect::<Vec<_>>()
.join(", ");
findings.push(Finding::new(
"HBC_HIGH_ENTROPY_STRINGS",
Layer::Hbc,
Severity::Info,
format!(
"{} high-entropy strings in Hermes bundle \
(entropy ≥ {entropy_threshold} bits/char, len ≥ {HBC_MIN_STR_LEN}); \
samples: {sample}. Use apk_trufflehog or apk_yara for credential patterns.",
high_entropy.len(),
),
));
}
}
let mut hbc_stitch_payloads: Vec<analysis::cross_layer_stitch::HbcStitchPayload> = Vec::new();
let mut hbc_backwalk_failures: Vec<analysis::cross_layer_stitch::HbcBackwalkFailurePayload> =
Vec::new();
if let Some(hbc_owned) = ctx.hbc.as_ref() {
use analysis::hbc_taint::HbcTaintAnalysis;
use analysis::dex_taint::TaintSource;
let hbc = hbc_owned.hbc();
let hbc_data = hbc_owned.bytes();
let scan = droidsaw_hermes::scanner::scan_parsed(hbc, hbc_data);
let nm_id =
(0..hbc.string_count).find(|&i| hbc.string_as_str_or_empty(i) == "NativeModules");
let bridge_func_ids: std::collections::BTreeSet<u32> = nm_id
.and_then(|id| scan.string_refs.get(&id))
.map(|fids| fids.iter().copied().collect())
.unwrap_or_default();
for func_id in 0..hbc.function_count {
let f = hbc.function_get(func_id);
if f.size == 0 { continue; }
let end_u64 = u64::from(f.offset) + u64::from(f.size);
if end_u64 > hbc_data.len() as u64 { continue; }
#[allow(
clippy::cast_possible_truncation,
reason = "PROOF: just verified `end_u64 <= hbc_data.len() as u64`; downcast is bounded by the addressable buffer length on every supported target."
)]
let end = end_u64 as usize;
let Some(decode_slice) = hbc_data.get(f.offset as usize..end) else {
continue;
};
let Ok(instructions) = droidsaw_hermes::decompile::decode::decode_function(
decode_slice,
hbc.opcode_version(),
) else {
continue;
};
let exc_count = hbc.function_exception_count(func_id);
let exc_handlers: Vec<droidsaw_hermes::decompile::cfg::ExcHandler> = (0..exc_count)
.map(|i| {
let eh = hbc.function_exception_get(func_id, i);
droidsaw_hermes::decompile::cfg::ExcHandler {
start: eh.start, end: eh.end, target: eh.target,
}
})
.collect();
let code_end = (end + 256).min(hbc_data.len());
let Some(code) = hbc_data.get(f.offset as usize..code_end) else {
continue;
};
let Ok(cfg) = droidsaw_hermes::decompile::cfg::Cfg::build(&instructions, &exc_handlers, code) else {
continue;
};
let Ok(ssa) = droidsaw_hermes::decompile::ssa::build_ssa(&cfg, f.frame_size) else {
continue;
};
let seeds: std::collections::BTreeMap<droidsaw_hermes::decompile::ssa::VarId, TaintSource> =
ssa.param_vars
.iter()
.map(|&v| (v, TaintSource::UserInput))
.collect();
if seeds.is_empty() {
continue;
}
let result = if bridge_func_ids.contains(&func_id) {
let get_str = |id: u32| -> String {
if id < hbc.string_count {
hbc.string_as_str_or_empty(id).into_owned()
} else {
String::new()
}
};
let get_literal = |a: u8, b: u32, c: u32, d: u32| -> (u8, u32, i32, f64) {
let v = hbc.literal_get(a, b, c, d);
(v.tag, v.str_id, v.ival, v.dval)
};
let get_shape = |i: u32| -> (u32, u32) {
match hbc.object_shape_get(i) {
Some(s) => (s.key_buffer_offset, s.num_props),
None => (0, 0),
}
};
let get_func_name = |fid2: u32| -> String {
if fid2 < hbc.function_count {
let fi = hbc.function_get(fid2);
if fi.name_id < hbc.string_count {
return hbc.string_as_str_or_empty(fi.name_id).into_owned();
}
}
String::new()
};
let get_bigint = |idx: u32| -> Option<String> { hbc.bigint_as_str(idx) };
let ssa = droidsaw_hermes::decompile::optimize::optimize(
ssa,
&get_str,
&get_literal,
&get_shape,
&get_func_name,
&get_bigint,
);
HbcTaintAnalysis::run_full(
&ssa,
func_id,
droidsaw_common::finding::Layer::Hbc,
seeds,
&bridge_func_ids,
)
} else {
HbcTaintAnalysis::run_eval_only(
&ssa,
func_id,
droidsaw_common::finding::Layer::Hbc,
seeds,
)
};
for tf in result.findings {
match tf.sink {
droidsaw_common::analysis::TaintSink::NativeModuleArg { .. } => {
hbc_stitch_payloads.push(
analysis::cross_layer_stitch::HbcStitchPayload { tf },
);
}
_ => {
findings.push(hbc_taint_to_finding(&tf));
}
}
}
for fs in result.backwalk_failures {
let hop_count = match fs.reason {
analysis::hbc_taint::BackwalkFailureReason::CalleeNotVar => 0u8,
analysis::hbc_taint::BackwalkFailureReason::ChainExtractionFailed => 1,
analysis::hbc_taint::BackwalkFailureReason::TerminalHopIsGetById => 2,
};
hbc_backwalk_failures.push(
analysis::cross_layer_stitch::HbcBackwalkFailurePayload {
func_id: fs.func_id,
hop_count,
},
);
}
}
}
let dex_data_vecs: Vec<&[u8]> = (0..ctx.dex.len())
.filter_map(|i| ctx.dex_bytes(i))
.collect();
let class_analysis = analysis::dex_taint::DexTaintAnalysis::collect_unified_code_index(
&ctx.dex, &dex_data_vecs,
);
let bridge = analysis::bridge::BridgeResolver::resolve(ctx);
let bridge_targets: std::collections::BTreeMap<(usize, droidsaw_dex::ids::MethodIdx), String> =
bridge.by_method.iter()
.flat_map(|(js_name, targets)| {
targets.iter().map(move |(dex_idx, m_idx)| ((*dex_idx, *m_idx), js_name.clone()))
})
.collect();
let mut method_tasks: Vec<(usize, droidsaw_dex::ids::MethodIdx, u32)> = Vec::new();
for ((i, dex), &data) in ctx.dex.iter().enumerate().zip(dex_data_vecs.iter()) {
for (class_defs_idx, cd) in dex.class_defs.iter().enumerate() {
if dex.class_def_is_shadowed(class_defs_idx) {
continue;
}
if cd.class_data_off == 0 {
continue;
}
let Ok(class_data) = droidsaw_dex::decode::parse_class_data(data, cd.class_data_off)
else {
continue;
};
for m in class_data
.direct_methods
.iter()
.chain(class_data.virtual_methods.iter())
{
if m.code_off == 0 {
continue;
}
method_tasks.push((i, m.method_idx, m.code_off));
}
}
}
use rayon::prelude::*;
type DexTaskOut = (
Vec<Finding>,
Vec<analysis::cross_layer_stitch::DexBridgeStitchPayload>,
);
let native_methods_per_dex =
analysis::dex_taint::collect_native_methods_per_dex(&ctx.dex, &dex_data_vecs);
let app_package: Option<String> = manifest_id(ctx).0;
let app_pkg: Option<&str> = app_package.as_deref();
let task_outputs: Vec<DexTaskOut> = method_tasks
.par_iter()
.map(|&(i, m_idx, code_off)| {
let empty: DexTaskOut = (Vec::new(), Vec::new());
let Some(&data) = dex_data_vecs.get(i) else {
return empty;
};
let Ok(code) = droidsaw_dex::decode::parse_code_item(data, code_off) else {
return empty;
};
let Ok(cfg) = droidsaw_dex::cfg::Cfg::build(&code) else {
return empty;
};
let Ok(ssa) = droidsaw_dex::ssa::SsaBody::build(&code, &cfg) else {
return empty;
};
let mut out: Vec<Finding> = Vec::new();
let mut bridge_out:
Vec<analysis::cross_layer_stitch::DexBridgeStitchPayload> = Vec::new();
let containing_key = ctx.dex.get(i)
.and_then(|dex| droidsaw_dex::method_key_for_idx(dex, m_idx));
let result = analysis::dex_taint::DexTaintAnalysis::run_interprocedural(
&ctx.dex,
&dex_data_vecs,
&class_analysis,
i,
&ssa,
droidsaw_common::finding::Layer::Dex,
std::collections::BTreeMap::new(),
&native_methods_per_dex,
4,
);
for mut tf in result.findings {
if let Some(ref key) = containing_key {
tf.class_descriptor = Some(key.class.to_string());
tf.method_signature = Some(format!(
"{}{}", key.name, key.proto
));
}
out.push(taint_finding_to_finding(&tf, app_pkg));
}
if let Some(js_method) = bridge_targets.get(&(i, m_idx)) {
use analysis::dex_taint::TaintSource;
let seeds =
analysis::dex_taint::DexTaintAnalysis::seeds_from_sources(
ssa.param_vars
.iter()
.map(|v| (v.clone(), TaintSource::ReactBridgeParam { method: js_method.clone() }))
.collect(),
);
#[allow(
clippy::cast_possible_truncation,
reason = "DEX method param count never exceeds 255 in practice; truncation is safe — downstream analysis intersects against HBC arg_positions which are also u8-range."
)]
let seed_positions: std::collections::BTreeMap<droidsaw_dex::ssa::VarId, u8> = ssa
.param_vars
.iter()
.enumerate()
.filter_map(|(idx, v)| {
idx.checked_sub(1).map(|pos| (v.clone(), pos as u8))
})
.collect();
if !seeds.is_empty() {
let bridge_result =
analysis::dex_taint::DexTaintAnalysis::run_interprocedural_bridge(
&ctx.dex,
&dex_data_vecs,
&class_analysis,
i,
&ssa,
droidsaw_common::finding::Layer::Dex,
seeds,
seed_positions,
&native_methods_per_dex,
4,
);
let positions = bridge_result.bridge_sink_reachable_positions;
for (idx, mut tf) in bridge_result.findings.into_iter().enumerate() {
if let Some(ref key) = containing_key {
tf.class_descriptor = Some(key.class.to_string());
tf.method_signature = Some(format!(
"{}{}", key.name, key.proto
));
}
let pos_set = positions.get(idx).cloned().unwrap_or_default();
bridge_out.push(
analysis::cross_layer_stitch::DexBridgeStitchPayload {
tf,
js_method: js_method.clone(),
dex_idx: i,
method_idx: m_idx,
sink_reachable_seed_positions: pos_set,
},
);
}
}
}
(out, bridge_out)
})
.collect();
let mut dex_bridge_payloads:
Vec<analysis::cross_layer_stitch::DexBridgeStitchPayload> = Vec::new();
for (regular, bridge_payloads) in task_outputs {
findings.extend(regular);
dex_bridge_payloads.extend(bridge_payloads);
}
let n_hbc_in = hbc_stitch_payloads.len();
let n_dex_in = dex_bridge_payloads.len();
let n_bw_in = hbc_backwalk_failures.len();
let stitch_outcome = analysis::cross_layer_stitch::stitch_cross_layer_taint(
hbc_stitch_payloads,
dex_bridge_payloads,
hbc_backwalk_failures,
&bridge,
);
debug_assert_eq!(
stitch_outcome.composites.len() * 2
+ stitch_outcome.unjoined_hbc.len()
+ stitch_outcome.unjoined_dex.len()
+ stitch_outcome.ambiguous.len(),
n_hbc_in + n_dex_in + n_bw_in,
"cross-layer stitcher partition invariant violated: input-sum != output-sum",
);
for composite in stitch_outcome.composites {
findings.push(composite_to_finding(composite));
}
for tf in stitch_outcome.unjoined_hbc {
findings.push(hbc_taint_to_finding(&tf));
}
for tf in stitch_outcome.unjoined_dex {
let js_method = match &tf.source {
droidsaw_common::analysis::TaintSource::ReactBridgeParam { method } => {
method.as_str()
}
_ => "",
};
findings.push(bridge_taint_to_finding(&tf, js_method));
}
findings.extend(stitch_outcome.ambiguous);
let cha_truncations = class_analysis
.cha_cache_truncations
.load(std::sync::atomic::Ordering::Relaxed);
if cha_truncations > 0 {
findings.push(Finding::new(
analysis::dex_taint::CHA_CACHE_ENTRY_CAP_HIT,
Layer::Dex,
Severity::Info,
format!(
"CHA cache exceeded the {}-entry cap; {cha_truncations} memoization \
inserts were skipped. Lookups past the cap re-iterate the candidate \
set, so virtual-call resolution remains correct but slower.",
65536_usize,
),
));
}
let build_truncations = class_analysis
.build_cache_truncations
.load(std::sync::atomic::Ordering::Relaxed);
if build_truncations > 0 {
findings.push(Finding::new(
analysis::dex_taint::BUILD_CACHE_ENTRY_CAP_HIT,
Layer::Dex,
Severity::Info,
format!(
"Per-call BuildCache exceeded the {}-entry cap; {build_truncations} \
(CodeItem, Cfg, SsaBody) memoization inserts were skipped across the \
audit's interproc fixpoint. Re-build cost is paid on subsequent \
lookups; no semantic regression.",
4096_usize,
),
));
}
findings.sort_by(|a, b| a.severity.cmp(&b.severity).then_with(|| a.id.cmp(&b.id)));
findings
}
fn is_first_party_class(class_descriptor: Option<&str>, app_package: Option<&str>) -> Option<bool> {
let class = class_descriptor?;
let pkg = app_package?;
if pkg.is_empty() {
return None;
}
let root = pkg.split('.').take(3).collect::<Vec<_>>().join("/");
if root.is_empty() {
return None;
}
let prefix = format!("L{root}/");
Some(class.trim_start_matches('[').starts_with(&prefix))
}
fn file_path_traversal_severity(
source: &analysis::dex_taint::TaintSource,
class_descriptor: Option<&str>,
app_package: Option<&str>,
) -> Severity {
use analysis::dex_taint::TaintSource;
match source {
TaintSource::IntentExtra { .. } | TaintSource::NetworkResponse { .. } => Severity::Medium,
_ => match is_first_party_class(class_descriptor, app_package) {
Some(false) => Severity::Low,
_ => Severity::Medium,
},
}
}
fn taint_finding_to_finding(tf: &analysis::dex_taint::TaintFinding, app_package: Option<&str>) -> Finding {
use analysis::dex_taint::{TaintSink};
let (id_tag, severity, cwe) = match &tf.sink {
TaintSink::NativeMethod { .. } => ("JNI_TAINTED_NATIVE_CALL", Severity::Medium, Some(111u16)),
TaintSink::RuntimeExec | TaintSink::Eval => ("DEX_TAINT_FLOW", Severity::Critical, Some(78u16)),
TaintSink::SqlExecute => ("DEX_TAINT_FLOW", Severity::High, Some(89)),
TaintSink::WebViewLoadUrl => ("DEX_TAINT_FLOW", Severity::High, Some(79)),
TaintSink::LogOutput => ("DEX_TAINT_FLOW", Severity::Medium, Some(532)),
TaintSink::ReflectionInvoke { .. } => ("DEX_TAINT_FLOW", Severity::Medium, Some(470)),
TaintSink::FilePathTraversal { .. } => ("DEX_TAINT_FLOW", Severity::Medium, Some(22)),
TaintSink::FileWrite { .. } => ("DEX_TAINT_FLOW", Severity::Medium, Some(22)),
TaintSink::ContentProviderInsert { .. } => ("DEX_TAINT_FLOW", Severity::Medium, Some(862)),
TaintSink::NativeModuleArg { .. } => ("DEX_TAINT_FLOW", Severity::Medium, Some(20)),
TaintSink::CryptoInput { .. } => ("DEX_TAINT_FLOW", Severity::Low, Some(327)),
TaintSink::NetworkFetch
| TaintSink::HttpRequest { .. } => ("DEX_TAINT_FLOW", Severity::Low, Some(918)),
_ => ("DEX_TAINT_FLOW", Severity::Low, None),
};
let severity = match &tf.sink {
TaintSink::FilePathTraversal { .. } => {
file_path_traversal_severity(&tf.source, tf.class_descriptor.as_deref(), app_package)
}
_ => severity,
};
let source_str = format!("{:?}", tf.source)
.split('{').next().unwrap_or("Unknown").trim().to_string();
let sink_str = format!("{:?}", tf.sink)
.split('{').next().unwrap_or("Unknown").trim().to_string();
let class_tag = tf.class_descriptor.as_deref()
.map(|c| format!(" in {c}"))
.unwrap_or_default();
let detail = if let TaintSink::NativeMethod { class, method } = &tf.sink {
format!(
"jni boundary: {source_str} → native {class}->{method}{class_tag} (func #{:#x}; native side unanalyzed)",
tf.func_id,
)
} else {
format!(
"intra-method taint: {source_str} → {sink_str}{class_tag} (func #{:#x}; interprocedural cross-DEX depth 4)",
tf.func_id,
)
};
let extra = serde_json::json!({
"source_offset": tf.source_offset,
"sink_offset": tf.sink_offset,
}).to_string();
let f = Finding::new(id_tag, tf.layer, severity, sanitize_detail(&detail))
.with_func(tf.func_id)
.with_extra(extra);
match cwe {
Some(c) => f.with_cwe(c),
None => f,
}
}
fn hbc_taint_to_finding(tf: &analysis::dex_taint::TaintFinding) -> Finding {
use analysis::dex_taint::TaintSink;
let (severity, cwe) = match &tf.sink {
TaintSink::Eval => (Severity::Critical, Some(95u16)),
TaintSink::NativeModuleArg { .. } => (Severity::High, Some(20)),
TaintSink::WebViewLoadUrl => (Severity::High, Some(79)),
TaintSink::SqlExecute => (Severity::High, Some(89)),
_ => (Severity::Medium, None),
};
let sink_str = format!("{:?}", tf.sink)
.split('{').next().unwrap_or("Unknown").trim().to_string();
let detail = format!(
"hbc taint: JsParam → {sink_str} (func #{:#x})",
tf.func_id
);
let f = Finding::new("HBC_TAINT_FLOW", tf.layer, severity, sanitize_detail(&detail))
.with_func(tf.func_id);
match cwe {
Some(c) => f.with_cwe(c),
None => f,
}
}
fn bridge_taint_to_finding(tf: &analysis::dex_taint::TaintFinding, js_method: &str) -> Finding {
use analysis::dex_taint::TaintSink;
let (severity, cwe) = match &tf.sink {
TaintSink::RuntimeExec | TaintSink::Eval => (Severity::Critical, Some(78u16)),
TaintSink::SqlExecute => (Severity::Critical, Some(89)),
TaintSink::WebViewLoadUrl => (Severity::High, Some(79)),
TaintSink::LogOutput => (Severity::Medium, Some(532)),
TaintSink::ReflectionInvoke { .. } => (Severity::High, Some(470)),
TaintSink::FilePathTraversal { .. } => (Severity::High, Some(22)),
TaintSink::FileWrite { .. } => (Severity::High, Some(22)),
TaintSink::ContentProviderInsert { .. } => (Severity::Medium, Some(862)),
TaintSink::NativeModuleArg { .. } => (Severity::Medium, Some(20)),
TaintSink::CryptoInput { .. } => (Severity::Medium, Some(327)),
TaintSink::NetworkFetch
| TaintSink::HttpRequest { .. } => (Severity::Low, Some(918)),
_ => (Severity::Medium, None),
};
let sink_str = format!("{:?}", tf.sink)
.split('{').next().unwrap_or("Unknown").trim().to_string();
let class_tag = tf.class_descriptor.as_deref()
.map(|c| format!(" in {c}"))
.unwrap_or_default();
let detail = format!(
"bridge taint: ReactBridgeParam[{js_method}] → {sink_str}{class_tag} (func #{:#x})",
tf.func_id
);
let f = Finding::new("BRIDGE_TAINT_FLOW", tf.layer, severity, sanitize_detail(&detail))
.with_func(tf.func_id);
match cwe {
Some(c) => f.with_cwe(c),
None => f,
}
}
fn composite_to_finding(
c: droidsaw_common::cross_layer_taint::CrossLayerTaintFinding<droidsaw_dex::ids::MethodIdx>,
) -> Finding {
use droidsaw_common::finding::{FindingProvenance, Layer};
let class_tag = c.native_class_descriptor.as_deref()
.map(|cd| format!(" in {cd}"))
.unwrap_or_default();
let sink_str = format!("{:?}", c.native_sink)
.split('{').next().unwrap_or("Unknown").trim().to_string();
let source_str = format!("{:?}", c.js_source)
.split('{').next().unwrap_or("Unknown").trim().to_string();
let detail = format!(
"cross-layer taint: {source_str} → {}.{} → {sink_str}{class_tag} \
(js func #{:#x} → dex func #{:#x})",
c.bridge.js_module.as_str(),
c.bridge.js_method.as_str(),
c.js_func_id,
c.native_func_id,
);
let severity = c.severity;
let cwe = c.cwe;
let js_func_id = c.js_func_id;
let native_func_id = c.native_func_id;
let bridge_label = format!(
"ReactBridge:{}.{}",
c.bridge.js_module.as_str(),
c.bridge.js_method.as_str(),
);
let extra = serde_json::to_string(&c).unwrap_or_else(|_| "{}".into());
#[cfg(debug_assertions)]
{
if let Ok(back) = serde_json::from_str::<
droidsaw_common::cross_layer_taint::CrossLayerTaintFinding<
droidsaw_dex::ids::MethodIdx,
>,
>(&extra)
{
debug_assert_eq!(c, back, "CrossLayerTaintFinding serde roundtrip drift");
}
}
let provenance = FindingProvenance {
source_layer: Layer::Hbc,
bridge: bridge_label,
sink_layer: Layer::Dex,
};
let mut f = Finding::new(
"CROSS_LAYER_TAINT_FLOW",
Layer::Dex,
severity,
sanitize_detail(&detail),
)
.with_extra(extra)
.with_func(native_func_id)
.with_provenance(provenance);
if let Some(cwe) = cwe {
f = f.with_cwe(cwe);
}
let _ = js_func_id;
f
}
fn manifest_id(ctx: &CrossLayerContext) -> (Option<String>, Option<String>) {
let Some(apk) = ctx.apk.as_ref() else {
return (None, None);
};
match apk.manifest_raw.as_ref() {
Some(raw) => match Manifest::from_binary_xml(raw) {
Ok(m) => (Some(m.package.clone()), Some(m.version_name.clone())),
Err(_) => (None, None),
},
None => (None, None),
}
}
#[cfg(test)]
mod taint_factory_tests {
use super::*;
use droidsaw_common::analysis::{TaintFinding, TaintSink, TaintSource};
use droidsaw_common::finding::Layer;
fn synth_dex_tf(func_id: u32) -> TaintFinding {
TaintFinding {
source: TaintSource::IntentExtra { key: "url".into() },
sink: TaintSink::WebViewLoadUrl,
layer: Layer::Dex,
func_id,
class_descriptor: None,
method_signature: None,
source_offset: None,
sink_offset: None,
}
}
fn synth_hbc_tf(func_id: u32) -> TaintFinding {
TaintFinding {
source: TaintSource::NativeModuleCall { module: "M".into(), method: "m".into() },
sink: TaintSink::Eval,
layer: Layer::Hbc,
func_id,
class_descriptor: None,
method_signature: None,
source_offset: None,
sink_offset: None,
}
}
#[test]
fn file_path_traversal_cross_boundary_stays_medium() {
assert_eq!(
file_path_traversal_severity(
&TaintSource::IntentExtra { key: String::new() },
Some("Lcom/bumptech/glide/Foo;"), Some("org.example.app")),
Severity::Medium
);
assert_eq!(
file_path_traversal_severity(
&TaintSource::NetworkResponse { endpoint: String::new() }, None, None),
Severity::Medium
);
}
#[test]
fn file_path_traversal_internal_in_library_drops_to_low() {
let pkg = Some("org.example.app");
assert_eq!(
file_path_traversal_severity(
&TaintSource::SharedPreferencesRead { key: String::new() },
Some("Lcom/bumptech/glide/Foo;"), pkg),
Severity::Low
);
assert_eq!(
file_path_traversal_severity(
&TaintSource::ContentProviderQuery { uri: String::new() },
Some("Lcom/facebook/imagepipeline/Bar;"), pkg),
Severity::Low
);
}
#[test]
fn file_path_traversal_first_party_stays_medium() {
assert_eq!(
file_path_traversal_severity(
&TaintSource::ContentProviderQuery { uri: String::new() },
Some("Lorg/example/app/FileResolver;"), Some("org.example.app")),
Severity::Medium
);
assert_eq!(
file_path_traversal_severity(
&TaintSource::ContentProviderQuery { uri: String::new() },
Some("Lcom/x/Y;"), None),
Severity::Medium
);
}
#[test]
fn is_first_party_class_reverse_domain_match() {
assert_eq!(is_first_party_class(Some("Lorg/example/app/Foo;"), Some("org.example.app")), Some(true));
assert_eq!(is_first_party_class(Some("Lorg/example/app/sub/Foo;"), Some("org.example.app.feature")), Some(true));
assert_eq!(is_first_party_class(Some("Lcom/bumptech/glide/Foo;"), Some("org.example.app")), Some(false));
assert_eq!(is_first_party_class(Some("Lcom/apple/Foo;"), Some("com.app.thing")), Some(false));
assert_eq!(is_first_party_class(None, Some("org.example.app")), None);
assert_eq!(is_first_party_class(Some("Lorg/example/app/Foo;"), None), None);
assert_eq!(is_first_party_class(Some("Lorg/example/app/Foo;"), Some("")), None);
}
#[test]
fn taint_finding_to_finding_propagates_func_id() {
let f = taint_finding_to_finding(&synth_dex_tf(0x176d5), None);
assert_eq!(f.func_id, Some(0x176d5));
}
#[test]
fn hbc_taint_to_finding_propagates_func_id() {
let f = hbc_taint_to_finding(&synth_hbc_tf(0xdeadbeef));
assert_eq!(f.func_id, Some(0xdeadbeef));
}
#[test]
fn bridge_taint_to_finding_propagates_func_id() {
let f = bridge_taint_to_finding(&synth_dex_tf(0x42), "fetchSecret");
assert_eq!(f.func_id, Some(0x42));
}
#[test]
fn dex_taint_finding_offsets_stored_in_extra() {
let tf = TaintFinding {
source: TaintSource::IntentExtra { key: "url".into() },
sink: TaintSink::WebViewLoadUrl,
layer: Layer::Dex,
func_id: 0x100,
class_descriptor: None,
method_signature: None,
source_offset: Some(0x04),
sink_offset: Some(0x1a),
};
let f = taint_finding_to_finding(&tf, None);
let extra = f.extra.as_deref().unwrap_or("");
let v: serde_json::Value = serde_json::from_str(extra).expect("extra must be valid JSON");
assert_eq!(v["source_offset"], serde_json::json!(4));
assert_eq!(v["sink_offset"], serde_json::json!(26));
}
#[test]
fn taint_finding_native_method_emits_jni_id_tag() {
let tf = TaintFinding {
source: TaintSource::IntentExtra { key: "key".into() },
sink: TaintSink::NativeMethod {
class: "Lcom/example/JniBridge;".into(),
method: "nativeDecrypt".into(),
},
layer: Layer::Dex,
func_id: 0xa00,
class_descriptor: None,
method_signature: None,
source_offset: Some(0x10),
sink_offset: Some(0x20),
};
let f = taint_finding_to_finding(&tf, None);
assert_eq!(f.id, "JNI_TAINTED_NATIVE_CALL");
assert_eq!(f.severity, Severity::Medium);
assert_eq!(f.cwe, Some(111));
assert!(
f.detail.starts_with("jni boundary:"),
"detail must carry the JNI prefix, got: {}",
f.detail,
);
assert!(f.detail.contains("Lcom/example/JniBridge;"));
assert!(f.detail.contains("nativeDecrypt"));
}
#[test]
fn hbc_taint_finding_offsets_remain_none_in_extra() {
let tf_hbc = synth_hbc_tf(0xbeef);
let f = hbc_taint_to_finding(&tf_hbc);
assert!(f.extra.is_none(), "HBC findings must not carry extra JSON");
}
#[test]
fn dex_taint_offsets_persist_to_db() {
let tf = TaintFinding {
source: TaintSource::IntentExtra { key: String::new() },
sink: TaintSink::WebViewLoadUrl,
layer: Layer::Dex,
func_id: 0x200,
class_descriptor: None,
method_signature: None,
source_offset: Some(0x08),
sink_offset: Some(0x2c),
};
let f = taint_finding_to_finding(&tf, None);
let tmp = tempfile::NamedTempFile::new().expect("tempfile");
write_findings_db(std::slice::from_ref(&f), tmp.path()).expect("write_findings_db");
let n = write_taint_flows_db(std::slice::from_ref(&f), tmp.path())
.expect("write_taint_flows_db");
assert_eq!(n, 1);
let conn = rusqlite::Connection::open(tmp.path()).expect("open");
let (src_off, snk_off): (Option<i64>, Option<i64>) = conn
.query_row(
"SELECT source_offset, sink_offset FROM taint_flows LIMIT 1",
[],
|r| Ok((r.get(0)?, r.get(1)?)),
)
.expect("query");
assert_eq!(src_off, Some(0x08), "source_offset must be non-None for DEX taint");
assert_eq!(snk_off, Some(0x2c), "sink_offset must be non-None for DEX taint");
}
#[test]
fn parse_taint_source_sink_handles_semicolon_suffix() {
let detail = "intra-method taint: IntentExtra → WebViewLoadUrl (func #0x176d5; interprocedural cross-DEX depth 4)";
let (source, sink) = super::export::parse_taint_source_sink(detail);
assert_eq!(source, "IntentExtra");
assert_eq!(sink, "WebViewLoadUrl");
}
}
#[cfg(test)]
mod dynamic_loading_gate_tests {
use super::*;
use droidsaw_dex::xrefs::{MethodKey, Xrefs};
fn mk_method_key(class: &str, name: &str) -> MethodKey {
MethodKey {
class: class.into(),
name: name.into(),
proto: "()V".into(),
}
}
#[test]
fn classify_descriptor_form_yields_descriptor_and_dot_form() {
let pair = classify_dynamic_loading_token("Ldalvik/system/DexClassLoader;");
assert_eq!(
pair,
Some((
"Ldalvik/system/DexClassLoader;".to_string(),
"dalvik.system.DexClassLoader".to_string(),
))
);
}
#[test]
fn classify_dot_form_yields_descriptor_and_dot_form() {
let pair = classify_dynamic_loading_token("dalvik.system.DexClassLoader");
assert_eq!(
pair,
Some((
"Ldalvik/system/DexClassLoader;".to_string(),
"dalvik.system.DexClassLoader".to_string(),
))
);
}
#[test]
fn classify_bare_class_name_resolves_via_known_table() {
let pair = classify_dynamic_loading_token("DexClassLoader");
assert_eq!(
pair,
Some((
"Ldalvik/system/DexClassLoader;".to_string(),
"dalvik.system.DexClassLoader".to_string(),
))
);
}
#[test]
fn classify_loadlibrary_token_returns_none() {
assert!(classify_dynamic_loading_token("loadLibrary").is_none());
assert!(classify_dynamic_loading_token("System").is_none());
}
#[test]
fn classify_generic_descriptor_form_works() {
let pair = classify_dynamic_loading_token("Lcom/foo/Bar;");
assert_eq!(
pair,
Some(("Lcom/foo/Bar;".to_string(), "com.foo.Bar".to_string()))
);
}
#[test]
fn classify_malformed_descriptor_returns_none() {
assert!(classify_dynamic_loading_token("L;").is_none());
assert!(classify_dynamic_loading_token("L").is_none());
assert!(classify_dynamic_loading_token("RandomClassName").is_none());
}
#[test]
fn unreferenced_token_returns_false_against_empty_xrefs() {
let xrefs = Xrefs::default();
assert!(!dynamic_loading_token_referenced(
&xrefs,
"dalvik.system.DexClassLoader"
));
assert!(!dynamic_loading_token_referenced(
&xrefs,
"Ldalvik/system/DexClassLoader;"
));
assert!(!dynamic_loading_token_referenced(&xrefs, "DexClassLoader"));
}
#[test]
fn type_xref_hit_resolves_descriptor_and_dot_form() {
let mut xrefs = Xrefs::default();
xrefs
.type_xrefs
.entry("Ldalvik/system/DexClassLoader;".to_string())
.or_default()
.push(mk_method_key("Lcom/app/Loader;", "load"));
assert!(dynamic_loading_token_referenced(
&xrefs,
"Ldalvik/system/DexClassLoader;"
));
assert!(dynamic_loading_token_referenced(
&xrefs,
"dalvik.system.DexClassLoader"
));
assert!(dynamic_loading_token_referenced(&xrefs, "DexClassLoader"));
}
#[test]
fn string_to_methods_dot_form_hit_resolves() {
let mut xrefs = Xrefs::default();
xrefs
.string_to_methods
.entry("dalvik.system.DexClassLoader".to_string())
.or_default()
.push(mk_method_key("Lcom/app/Loader;", "load"));
assert!(dynamic_loading_token_referenced(
&xrefs,
"Ldalvik/system/DexClassLoader;"
));
assert!(dynamic_loading_token_referenced(
&xrefs,
"dalvik.system.DexClassLoader"
));
}
#[test]
fn string_to_methods_descriptor_form_hit_resolves() {
let mut xrefs = Xrefs::default();
xrefs
.string_to_methods
.entry("Ldalvik/system/DexClassLoader;".to_string())
.or_default()
.push(mk_method_key("Lcom/app/Loader;", "load"));
assert!(dynamic_loading_token_referenced(
&xrefs,
"Ldalvik/system/DexClassLoader;"
));
}
#[test]
fn raw_string_match_in_string_to_methods_resolves() {
let mut xrefs = Xrefs::default();
xrefs
.string_to_methods
.entry("loadLibrary".to_string())
.or_default()
.push(mk_method_key("Lcom/app/Reflective;", "call"));
assert!(dynamic_loading_token_referenced(&xrefs, "loadLibrary"));
}
#[test]
fn partition_empty_xrefs_groups_all_class_load_as_unreferenced() {
let xrefs = Xrefs::default();
let strings = vec![
"Ldalvik/system/DexClassLoader;".to_string(),
"dalvik.system.DexClassLoader".to_string(),
];
let (referenced, unreferenced, non_class) =
partition_dynamic_loading_strings(&xrefs, &strings);
assert!(referenced.is_empty(), "no xref entries ⇒ no referenced");
assert_eq!(unreferenced.len(), 2);
assert!(non_class.is_empty());
}
#[test]
fn partition_with_xref_hit_groups_class_load_as_referenced() {
let mut xrefs = Xrefs::default();
xrefs
.type_xrefs
.entry("Ldalvik/system/DexClassLoader;".to_string())
.or_default()
.push(mk_method_key("Lcom/app/Loader;", "load"));
let strings = vec![
"Ldalvik/system/DexClassLoader;".to_string(),
"Ldalvik/system/PathClassLoader;".to_string(),
];
let (referenced, unreferenced, non_class) =
partition_dynamic_loading_strings(&xrefs, &strings);
assert_eq!(referenced, vec!["Ldalvik/system/DexClassLoader;".to_string()]);
assert_eq!(
unreferenced,
vec!["Ldalvik/system/PathClassLoader;".to_string()]
);
assert!(non_class.is_empty());
}
#[test]
fn partition_loadlibrary_grouped_as_non_class_load() {
let xrefs = Xrefs::default();
let strings = vec![
"loadLibrary in System".to_string(),
"Ldalvik/system/DexClassLoader;".to_string(),
];
let (_referenced, unreferenced, non_class) =
partition_dynamic_loading_strings(&xrefs, &strings);
assert_eq!(non_class, vec!["loadLibrary in System".to_string()]);
assert_eq!(
unreferenced,
vec!["Ldalvik/system/DexClassLoader;".to_string()]
);
}
#[test]
fn apply_partition_downgrades_when_all_unreferenced() {
let mut finding = Finding::new(
"DYNAMIC_LOADING",
droidsaw_common::Layer::Apk,
Severity::Medium,
"Dynamic code loading in DEX strings: Ldalvik/system/DexClassLoader;",
);
let referenced: Vec<String> = Vec::new();
let unreferenced = vec!["Ldalvik/system/DexClassLoader;".to_string()];
let non_class: Vec<String> = Vec::new();
apply_dynamic_loading_partition_to_finding(
&mut finding,
&referenced,
&unreferenced,
&non_class,
);
assert_eq!(finding.severity, Severity::Info);
assert!(
finding
.detail
.contains("present in DEX string pool but no function references"),
"detail must explain the type-pool-only case; got {:?}",
finding.detail
);
assert!(
finding.detail.contains("Ldalvik/system/DexClassLoader;"),
"detail must list the unreferenced descriptors"
);
}
#[test]
fn apply_partition_keeps_medium_when_any_referenced() {
let mut finding = Finding::new(
"DYNAMIC_LOADING",
droidsaw_common::Layer::Apk,
Severity::Medium,
"Dynamic code loading in DEX strings: Ldalvik/system/DexClassLoader;",
);
let referenced = vec!["Ldalvik/system/DexClassLoader;".to_string()];
let unreferenced: Vec<String> = Vec::new();
let non_class: Vec<String> = Vec::new();
apply_dynamic_loading_partition_to_finding(
&mut finding,
&referenced,
&unreferenced,
&non_class,
);
assert_eq!(finding.severity, Severity::Medium);
assert!(
finding
.detail
.contains("function references confirmed via dex xref index"),
"detail must reflect xref confirmation; got {:?}",
finding.detail
);
}
#[test]
fn apply_partition_keeps_medium_with_unreferenced_note_when_partial() {
let mut finding = Finding::new(
"DYNAMIC_LOADING",
droidsaw_common::Layer::Apk,
Severity::Medium,
"Dynamic code loading in DEX strings: A, B",
);
let referenced = vec!["Ldalvik/system/DexClassLoader;".to_string()];
let unreferenced = vec!["Ldalvik/system/PathClassLoader;".to_string()];
let non_class: Vec<String> = Vec::new();
apply_dynamic_loading_partition_to_finding(
&mut finding,
&referenced,
&unreferenced,
&non_class,
);
assert_eq!(finding.severity, Severity::Medium);
assert!(finding.detail.contains("Ldalvik/system/DexClassLoader;"));
assert!(finding.detail.contains("Ldalvik/system/PathClassLoader;"));
assert!(
finding.detail.contains("not counted toward this finding"),
"detail must call out the gated descriptors; got {:?}",
finding.detail
);
}
#[test]
fn apply_partition_keeps_medium_when_only_non_class_load() {
let mut finding = Finding::new(
"DYNAMIC_LOADING",
droidsaw_common::Layer::Apk,
Severity::Medium,
"Dynamic code loading in DEX strings: System loadLibrary call",
);
let referenced: Vec<String> = Vec::new();
let unreferenced: Vec<String> = Vec::new();
let non_class = vec!["System loadLibrary call".to_string()];
apply_dynamic_loading_partition_to_finding(
&mut finding,
&referenced,
&unreferenced,
&non_class,
);
assert_eq!(finding.severity, Severity::Medium);
assert!(finding.detail.contains("System loadLibrary call"));
}
}
#[cfg(test)]
mod detector_status_tests {
use super::*;
use droidsaw_cli_contract::AuditMode;
fn status_for(mode: AuditMode, detector: &str) -> String {
let v = build_detectors_status(mode, None);
v.get(detector)
.and_then(|d| d.get("status"))
.and_then(|s| s.as_str())
.expect("detector status field present")
.to_string()
}
fn status_for_with_trufflehog(
mode: AuditMode,
trufflehog_result: &Value,
detector: &str,
) -> String {
let v = build_detectors_status(mode, Some(trufflehog_result));
v.get(detector)
.and_then(|d| d.get("status"))
.and_then(|s| s.as_str())
.expect("detector status field present")
.to_string()
}
#[test]
fn basic_mode_skips_both_subprocess_detectors() {
assert_eq!(status_for(AuditMode::Basic, "yara"), "ran");
assert_eq!(status_for(AuditMode::Basic, "semgrep"), "skipped_by_mode");
assert_eq!(status_for(AuditMode::Basic, "trufflehog"), "skipped_by_mode");
}
#[test]
fn full_mode_with_ran_trufflehog_envelope_reports_ran() {
let envelope = json!({
"ran": true,
"hit_count": 5,
"verified_count": 2,
"unverified_count": 3,
"credentials_written": 5,
"db_table": "credentials",
"note": "…",
});
assert_eq!(
status_for_with_trufflehog(AuditMode::Full, &envelope, "yara"),
"ran",
);
assert_eq!(
status_for_with_trufflehog(AuditMode::Full, &envelope, "semgrep"),
"extracted",
);
assert_eq!(
status_for_with_trufflehog(AuditMode::Full, &envelope, "trufflehog"),
"ran",
);
}
#[test]
fn semgrep_mode_extracts_only_semgrep() {
assert_eq!(status_for(AuditMode::Semgrep, "yara"), "ran");
assert_eq!(status_for(AuditMode::Semgrep, "semgrep"), "extracted");
assert_eq!(status_for(AuditMode::Semgrep, "trufflehog"), "skipped_by_mode");
}
#[test]
fn trufflehog_mode_with_error_envelope_reports_error() {
let envelope = json!({
"ran": false,
"error": "spawn failed: ENOENT",
});
assert_eq!(
status_for_with_trufflehog(AuditMode::Trufflehog, &envelope, "yara"),
"ran",
);
assert_eq!(
status_for_with_trufflehog(AuditMode::Trufflehog, &envelope, "semgrep"),
"skipped_by_mode",
);
assert_eq!(
status_for_with_trufflehog(AuditMode::Trufflehog, &envelope, "trufflehog"),
"error",
);
}
#[test]
fn trufflehog_short_circuit_envelope_disambiguates_via_path_check() {
let envelope = json!({
"ran": false,
"strings_file": "/tmp/droidsaw-strings.txt",
"command": "trufflehog filesystem /tmp/droidsaw-strings.txt --json --no-verification",
});
let status = status_for_with_trufflehog(
AuditMode::Trufflehog,
&envelope,
"trufflehog",
);
assert!(
status == "binary_missing" || status == "no_strings_extracted",
"expected binary_missing or no_strings_extracted, got {status:?}",
);
}
#[test]
fn collect_apk_findings_surfaces_hermes_findings_from_context() {
use crate::context::CrossLayerContext;
use droidsaw_common::finding::{Confidence, Finding, Layer, Severity};
let mut f = Finding::new(
"HERMES_V98_FORM_AMBIGUOUS",
Layer::Hbc,
Severity::Low,
"synthetic regression test".to_string(),
);
f.confidence = Confidence::Verified;
let ctx = CrossLayerContext {
path: "test".to_string(),
apk: None,
hbc: None,
hbc_parse_error: None,
dex: Vec::new(),
dex_direct_bytes: None,
loaded_split_names: Vec::new(),
hermes_findings: vec![f.clone()],
permissive_recovery: droidsaw_apk::PermissiveRecoveryOpts::default(),
};
let out = collect_apk_findings(&ctx, 4.5);
assert!(
out.iter().any(|x| x.id == "HERMES_V98_FORM_AMBIGUOUS"),
"collect_apk_findings must surface ctx.hermes_findings into the audit envelope; got {:?}",
out.iter().map(|x| &x.id).collect::<Vec<_>>()
);
}
#[test]
fn sanitize_detail_strips_terminal_escape_sequences() {
let attacker_input = "evil.apk\x1b[2Jcleared\x07bell\x00null";
let sanitized = super::sanitize_detail(attacker_input);
assert_eq!(
sanitized, "evil.apk?[2Jcleared?bell?null",
"control bytes (0x1b, 0x07, 0x00) must be replaced with `?`"
);
let ok_input = "META-INF/MANIFEST.MF\tat line\n123";
assert_eq!(super::sanitize_detail(ok_input), ok_input);
}
#[test]
fn cap_findings_truncates_above_threshold_with_synthetic_marker() {
use droidsaw_common::finding::{Layer, Severity};
let mut v: Vec<Finding> = Vec::new();
for i in 0..300 {
v.push(Finding::new(
"DEX_DETECTOR_INDETERMINATE",
Layer::Dex,
Severity::Low,
format!("detector-indeterminate at offset {i}"),
));
}
let capped = super::cap_findings(v, "DEX_DETECTOR_INDETERMINATE_TRUNCATED", Layer::Dex);
assert_eq!(
capped.len(),
super::FINDING_CAP_PER_KIND + 1,
"capped Vec must be cap + 1 (truncation marker)"
);
let last = capped.last().expect("non-empty after cap");
assert_eq!(last.id, "DEX_DETECTOR_INDETERMINATE_TRUNCATED");
assert!(
last.detail.contains("44 additional"),
"truncation Finding must report the dropped count (300 - 256 = 44); got {:?}",
last.detail
);
}
#[test]
fn cap_findings_below_threshold_is_unchanged() {
use droidsaw_common::finding::{Layer, Severity};
let mut v: Vec<Finding> = Vec::new();
for i in 0..10 {
v.push(Finding::new(
"DEX_HEADER_MAP",
Layer::Dex,
Severity::Low,
format!("hdr-map at offset {i}"),
));
}
let capped = super::cap_findings(v.clone(), "DEX_HEADER_MAP_TRUNCATED", Layer::Dex);
assert_eq!(capped.len(), 10);
assert!(capped.iter().all(|f| f.id == "DEX_HEADER_MAP"));
}
#[test]
fn context_parse_clears_residual_hermes_findings_on_entry() {
use droidsaw_hermes::finding::{
discard_findings, drain_findings_for_test, emit_finding, HermesFinding,
};
discard_findings();
emit_finding(HermesFinding::V98FormAmbiguous {
early_options: 0x00,
late_options: 0x00,
function_count: 1,
debug_with: 0,
debug_without: 0,
picked_late: false,
});
assert_eq!(
drain_findings_for_test().len(),
1,
"preconditions: emit_finding must populate the channel"
);
emit_finding(HermesFinding::V98FormAmbiguous {
early_options: 0x00,
late_options: 0x00,
function_count: 1,
debug_with: 0,
debug_without: 0,
picked_late: false,
});
let nonexistent = std::path::Path::new("/tmp/droidsaw-test-finding-drain-nonexistent.bin");
let _ = crate::context::CrossLayerContext::parse(nonexistent, None);
let leftover = drain_findings_for_test();
assert!(
leftover.is_empty(),
"Context::parse must call discard_findings() on entry to clear cross-tenant residual; got {leftover:?}"
);
}
#[test]
fn hermes_finding_drain_guard_drops_drains_channel_on_scope_exit() {
use crate::context::HermesFindingDrainGuard;
use droidsaw_hermes::finding::{
discard_findings, drain_findings_for_test, emit_finding, HermesFinding,
};
discard_findings();
{
let _guard = HermesFindingDrainGuard::install_discard();
emit_finding(HermesFinding::OverflowIndexOutOfRange {
index: 7,
count: 3,
});
}
let leftover = drain_findings_for_test();
assert!(
leftover.is_empty(),
"HermesFindingDrainGuard's Drop must discard findings even when caller does not drain explicitly; got {leftover:?}"
);
}
#[test]
fn detectors_field_carries_binary_on_path_for_subprocess_detectors() {
for mode in [AuditMode::Basic, AuditMode::Full, AuditMode::Semgrep, AuditMode::Trufflehog] {
let v = build_detectors_status(mode, None);
assert!(
v.get("semgrep")
.and_then(|d| d.get("binary_on_path"))
.and_then(|b| b.as_bool())
.is_some(),
"semgrep.binary_on_path must be a bool under mode={:?}",
mode,
);
assert!(
v.get("trufflehog")
.and_then(|d| d.get("binary_on_path"))
.and_then(|b| b.as_bool())
.is_some(),
"trufflehog.binary_on_path must be a bool under mode={:?}",
mode,
);
}
}
}
#[cfg(test)]
mod meta_tool_version_tests {
use super::meta;
#[test]
fn meta_tool_version_matches_cargo_pkg_version() {
let m = meta(1, false, "hint", &["strings"]);
assert_eq!(
m.get("tool_version").and_then(|v| v.as_str()),
Some(env!("CARGO_PKG_VERSION")),
"_meta.tool_version must equal CARGO_PKG_VERSION; got: {m}",
);
}
}