droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
use std::io::Read;
use std::path::Path;

use anyhow::Result;
use droidsaw_apk::Apk;
use droidsaw_dex::DexFile;
use droidsaw_hermes::parser::HbcFile;
use self_cell::self_cell;

self_cell!(
    /// Owned Hermes bytecode: holds the raw byte buffer alongside the
    /// parsed `HbcFile<'_>` view that borrows from it. Replaces the prior
    /// `Box::leak`-based `&'static [u8]` trick, which leaked one buffer
    /// per APK load and made long-running services (MCP) unusable.
    pub struct HbcOwned {
        owner: Vec<u8>,

        #[covariant]
        dependent: HbcFile,
    }
);

impl HbcOwned {
    /// Parse a byte buffer into an owned `HbcFile`, optionally charging
    /// `budget` for the input size. See [`HbcFile::parse`] for details.
    ///
    /// Pass `None` to skip budget enforcement; pass `Some(&mut budget)` at
    /// trust boundaries (MCP `load`, server loops). This is the single
    /// canonical parse entry — the prior `parse_budgeted` parallel sibling
    /// was collapsed; the single canonical entry makes the budget
    /// choice explicit at the public call site.
    pub fn parse(
        data: Vec<u8>,
        budget: Option<&mut droidsaw_common::budget::ParseBudget>,
    ) -> Result<Self> {
        HbcOwned::try_new(data, |buf| {
            // `anyhow::Error::new` preserves the concrete `HermesError` type
            // in the error chain, which `classify()` downcasts to choose an
            // `ErrorClass::UserInput` arm without substring matching.
            HbcFile::parse(buf, budget).map_err(anyhow::Error::new)
        })
    }

    /// Borrow the parsed `HbcFile` view.
    pub fn hbc(&self) -> &HbcFile<'_> {
        self.borrow_dependent()
    }

    /// Borrow the raw bytes backing the parsed view.
    pub fn bytes(&self) -> &[u8] {
        self.borrow_owner()
    }
}

/// Recorded parse failure for an optionally-discovered Hermes bundle.
///
/// When an APK carries `assets/index.android.bundle` (or another
/// recognized HBC asset) that `HbcOwned::parse` rejects, the failure is
/// stored here instead of aborting the whole context — the dex/apk
/// layers stay serviceable. The inner `anyhow::Error` is kept intact
/// (Arc-shared) so re-surfacing it through an hbc-targeted command
/// preserves the concrete `HermesError` in the chain, which
/// `error::classify()` downcasts without substring matching.
#[derive(Debug, Clone)]
pub struct HbcParseFailure {
    error: std::sync::Arc<anyhow::Error>,
}

impl HbcParseFailure {
    fn new(error: anyhow::Error) -> Self {
        Self { error: std::sync::Arc::new(error) }
    }

    /// Full failure message including the inner error chain
    /// (`{:#}`-formatted), without the "present but unparseable"
    /// prefix — callers add their own framing.
    pub fn message(&self) -> String {
        format!("{:#}", self.error)
    }

    /// Build a fresh `anyhow::Error` that carries this failure (and,
    /// transitively, the original `HermesError`) in its source chain.
    pub fn to_error(&self) -> anyhow::Error {
        anyhow::Error::new(self.clone())
    }
}

impl std::fmt::Display for HbcParseFailure {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.write_str("Hermes bundle present in target but unparseable")
    }
}

impl std::error::Error for HbcParseFailure {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        // `&anyhow::Error` → `&(dyn Error + Send + Sync)` → coerce.
        Some(self.error.as_ref().as_ref())
    }
}

pub struct CrossLayerContext {
    /// Input file path the context was loaded from. Mirrors `apk.path`
    /// when an APK was parsed; set independently for raw HBC inputs so
    /// bytecode-only commands still have a path to report.
    pub path: String,
    /// Parsed APK container, when the input was an APK / AAB / XAPK.
    /// `None` for raw bytecode inputs (e.g. a standalone `.hbc` extracted
    /// from a bundle). Commands that need APK metadata (manifest, signing,
    /// native libs, audit, yara) call [`Self::require_apk`] to surface a
    /// clear error instead of unwrapping; bytecode-only commands (`hbc`
    /// variants, `decompile`, etc.) read `hbc` / `dex` directly.
    pub apk: Option<Apk>,
    pub hbc: Option<HbcOwned>,
    /// Parse failure for an APK's optionally-discovered Hermes bundle.
    /// `Some` exactly when the container carried a bundle that
    /// `HbcOwned::parse` rejected (then `hbc` is `None` and the dex/apk
    /// layers still serve). Always `None` for raw `.hbc` inputs — an
    /// explicitly-targeted bundle failing to parse is a hard error at
    /// [`Self::parse`], not recorded state. Surfaced by `info`
    /// (`layers.hbc_parse_error`), the audit envelope
    /// (`HBC_BUNDLE_UNPARSEABLE` finding), and every hbc-targeted
    /// command via [`Self::require_hbc`] / [`Self::ensure_hbc_parsed`].
    pub hbc_parse_error: Option<HbcParseFailure>,
    pub dex: Vec<DexFile>,
    /// Raw bytes of `dex[0]` when the input was a single standalone `.dex`
    /// file. `None` for APK/AAB inputs (those route raw bytes through
    /// `apk.dex[i].data`). Callers that need bytes (xrefs builder, semgrep
    /// emit, dex decompile, finding xrefs indexer) use [`Self::dex_bytes`]
    /// to dispatch.
    pub dex_direct_bytes: Option<Vec<u8>>,
    /// Names of co-located split APKs that were auto-discovered and merged
    /// into `apk` via `Apk::parse_multiple`. Populated by
    /// [`Self::parse_with_splits`] when `--no-auto-splits` is not set;
    /// empty for raw HBC / DEX inputs and for AAB / XAPK containers (those
    /// already carry their own multi-module structure). The `info` command
    /// surfaces this list so analysts know the full bundle scope.
    pub loaded_split_names: Vec<String>,
    /// Per-bundle Findings drained from `droidsaw_hermes`'s thread-local
    /// channel after each `HbcFile::parse`. Pre-translation: every
    /// HermesFinding variant is mapped to a workspace-shared
    /// `droidsaw_common::Finding` via
    /// [`droidsaw_hermes::finding::findings_as_common`]. Consumed by
    /// `collect_apk_findings` so the per-HBC mitigations surface to
    /// operators via the audit envelope rather than dying in the
    /// thread-local.
    pub hermes_findings: Vec<droidsaw_common::finding::Finding>,
    /// CLI-resolved `permissive_recovery.*` opt-ins. Threaded from the
    /// top-binary `--permissive-recovery=foo,bar` flag into per-command parse
    /// sites that build `ParseConfig` (e.g., `commands::manifest`).
    /// Defaults to no opts enabled — matches `ParseConfig::strict`'s
    /// `permissive_recovery` field.
    pub permissive_recovery: droidsaw_apk::PermissiveRecoveryOpts,
}

impl CrossLayerContext {
    /// Magic bytes for a Hermes bytecode bundle. Every HBC version
    /// (v40–v100 documented in droidsaw-hermes) begins with this 8-byte
    /// header; used by [`Self::parse`] to distinguish raw HBC input from
    /// APK / AAB zip input. Covers both the 4-byte leading magic and
    /// the 4-byte sub-magic — 8 bytes is unambiguous against any
    /// contrived corrupted ZIP prefix.
    const HBC_MAGIC: [u8; 8] = [0xc6, 0x1f, 0xbc, 0x03, 0xc1, 0x03, 0x19, 0x1f];

    /// ZIP local file header magic; matches APK / AAB / XAPK zip containers.
    const ZIP_MAGIC: [u8; 4] = [b'P', b'K', 0x03, 0x04];

    /// DEX magic prefix. Every Dalvik executable begins with `dex\n` at
    /// bytes 0..4 followed by a 3-byte ASCII version (e.g. `035`, `039`,
    /// `041`) and a null terminator at byte 7. `DexFile::parse` validates
    /// the version + null terminator; the sniff only confirms the prefix.
    const DEX_MAGIC_PREFIX: [u8; 4] = *b"dex\n";

    /// Drain droidsaw_hermes's per-thread Finding channel and translate
    /// the per-bundle records into workspace-shared `Finding` payloads.
    /// Call site discipline: invoke unconditionally after every
    /// `HbcOwned::parse` (Ok and Err alike) on the same thread — the
    /// channel is thread-local and findings emitted by a parse that
    /// then returned `Err` via `?` would otherwise leak into the next
    /// parse on the same blocking-pool worker (tokio `spawn_blocking`
    /// reuses workers across tasks).
    pub(crate) fn drain_hermes_findings() -> Vec<droidsaw_common::finding::Finding> {
        droidsaw_hermes::finding::findings_as_common(
            droidsaw_hermes::finding::drain_findings(),
        )
    }
}

/// RAII guard that drains the per-thread `droidsaw_hermes::finding::FINDINGS`
/// channel on Drop. Use at command entry to defend against cross-tenant
/// attribution leaks on tokio `spawn_blocking` workers reused across
/// tasks.
///
/// **Why RAII instead of explicit-drain.** The prior approach installed
/// explicit `drain_findings()` calls at end-of-command in `semgrep`,
/// `decompile`, and `decompile_hbc_all_js_stream`. Those drains sit AFTER
/// `?` / `bail!` / `fs::write?` / `create_dir_all?` I/O operations —
/// creating finding-leak holes on SIGPIPE / ENOSPC. RAII guards fire on
/// every exit path, even after failures. SIGPIPE / ENOSPC / any I/O
/// failure between the
/// emit-bearing `optimize::optimize()` and the explicit drain strands
/// findings in TLS, reopening the cross-tenant leak. Additionally,
/// four other commands (xrefs / export / frida / strings) invoke
/// emit-capable hermes functions but never drain.
///
/// RAII closes both gaps: `Drop` fires on every exit path (Ok / Err /
/// panic / propagated `?`), and installing the guard at command entry
/// is one line per command — uniform discipline regardless of the
/// command's internal control flow.
pub struct HermesFindingDrainGuard {
    _private: (),
}

impl Drop for HermesFindingDrainGuard {
    fn drop(&mut self) {
        droidsaw_hermes::finding::discard_findings();
    }
}

impl HermesFindingDrainGuard {
    /// Install a guard that discards findings on Drop. Pair with
    /// `discard_findings()` at entry as belt-and-suspenders against
    /// any prior tenant's stranded findings from before this guard
    /// was installed.
    ///
    /// Use at the top of every CrossLayerContext-bearing command that
    /// might trigger hermes emit_finding (directly via parse / scan, or
    /// transitively via decompile / optimize / scanner re-parse).
    #[must_use = "RAII guard must be bound to a let-binding; otherwise it Drops immediately"]
    pub fn install_discard() -> Self {
        droidsaw_hermes::finding::discard_findings();
        Self { _private: () }
    }
}

impl CrossLayerContext {

    /// 16-hex SipHash of the file at `path`. Matches the bundle-naming
    /// shape `droidsaw_common::diag` uses when the top-level binary wraps
    /// its decompile path in `with_input_hash` — so post-parse panics land
    /// in `<16-hex>/` instead of `unknown-<siphash>/`. This function
    /// complements the DexFile parser's `DexFile::parse` wrapper; this
    /// wider wrap covers cfg / ssa / structure / emit.
    pub fn input_hash(path: &Path) -> Result<String> {
        use std::hash::{Hash, Hasher};
        let bytes = std::fs::read(path)?;
        let mut h = std::collections::hash_map::DefaultHasher::new();
        bytes.hash(&mut h);
        Ok(format!("{:016x}", h.finish()))
    }

    /// Parse an input file, optionally charging `budget` for each byte
    /// slice read.
    ///
    /// When `budget` is `Some`, the budget is consumed incrementally:
    /// - Dex parse: input size charged before `DexFile::parse`.
    /// - HBC parse: input size charged before `HbcFile::parse`.
    /// - APK parse: delegates to `Apk::parse` (APK-level byte-cap guards
    ///   are already in place via `MAX_ENTRY_BYTES`).
    ///
    /// Pass `None` to skip budget enforcement (test contexts, one-shot CLI
    /// commands, or any call site where unbounded parsing is acceptable).
    /// Pass `Some(&mut budget)` at trust boundaries (MCP `load`, server
    /// loops) to prevent adversarial inputs from consuming unbounded
    /// resources.
    ///
    /// This is the single canonical parse entry. The prior `parse_budgeted`
    /// parallel sibling was collapsed — parallel public twins at the
    /// parser layer were silently letting MCP `load` thread no budget
    /// across the trust boundary.
    ///
    /// Returns `Err(...)` with an inner `BudgetExhausted` when `budget` is
    /// `Some` and any parse step exceeds the remaining budget.
    pub fn parse(
        path: &Path,
        mut budget: Option<&mut droidsaw_common::budget::ParseBudget>,
    ) -> Result<Self> {
        // Cross-tenant defense: this thread may have hosted a prior
        // parse that emitted findings then returned Err via `?` before
        // its caller reached the drain. tokio's `spawn_blocking` worker
        // pool reuses threads across tasks, so a prior tenant's stranded
        // findings would otherwise be drained into THIS bundle's
        // `hermes_findings` field and mis-attributed.
        droidsaw_hermes::finding::discard_findings();

        // Sniff the first 8 bytes so a raw `.hbc` extracted from an APK
        // (or fetched from a bundle server) loads without an APK wrapper.
        // RE workflows routinely produce standalone bytecode dumps and
        // forcing a re-zip is a kludge — we model raw bytecode as a
        // context with no `apk` rather than fabricating an empty APK.
        let mut header = [0u8; 8];
        match std::fs::File::open(path)?.read_exact(&mut header) {
            Ok(()) => {}
            Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
                // File shorter than 8 bytes: zero-filled header can't
                // match either magic; fall through to the APK path,
                // which will surface a clean format error.
            }
            Err(e) => return Err(e.into()),
        }

        if header == Self::HBC_MAGIC {
            // Raw HBC: parse whole-file into an owned HbcFile; the APK /
            // DEX slots stay empty.
            let bytes = std::fs::read(path)?;
            // Drain BEFORE propagating Err: parser-path emit sites fire
            // before typed-Err returns on adversarial input. Drain on Err
            // discards the findings (parse failed — Ok-tenant only gets
            // findings from a parse it owns); the unwrapped channel is
            // still empty afterward, which is the load-bearing property.
            let result = HbcOwned::parse(bytes, budget.as_deref_mut());
            let hermes_findings = Self::drain_hermes_findings();
            let hbc = result?;
            return Ok(Self {
                path: path.to_string_lossy().into_owned(),
                apk: None,
                hbc: Some(hbc),
                hbc_parse_error: None,
                dex: Vec::new(),
                dex_direct_bytes: None,
                loaded_split_names: Vec::new(),
                hermes_findings,
                permissive_recovery: droidsaw_apk::PermissiveRecoveryOpts::default(),
            });
        }

        if header[..4] == Self::DEX_MAGIC_PREFIX {
            // Raw DEX: parse whole-file into a single-element `dex` Vec so
            // commands iterating `ctx.dex` keep working unchanged. Raw
            // bytes live in `dex_direct_bytes` for the (dex_decompile /
            // xrefs / semgrep / finding-xrefs / trufflehog) sites that
            // need the bytes for a second pass. Version validation is
            // delegated to `DexFile::parse`, which rejects unsupported
            // version strings with a typed `DexError::UnsupportedVersion`.
            let bytes = std::fs::read(path)?;
            let dex = DexFile::parse(&bytes, budget.as_deref_mut())
                .map_err(|e| anyhow::anyhow!(e))?;
            return Ok(Self {
                path: path.to_string_lossy().into_owned(),
                apk: None,
                hbc: None,
                hbc_parse_error: None,
                dex: vec![dex],
                dex_direct_bytes: Some(bytes),
                loaded_split_names: Vec::new(),
                hermes_findings: Vec::new(),
                permissive_recovery: droidsaw_apk::PermissiveRecoveryOpts::default(),
            });
        }

        // Non-PK leading magic is NOT an early reject. Android's libziparchive is
        // EOCD-anchored: it installs a file with forged leading bytes as long as the
        // central directory is valid (polymock / Janus content-type evasion). Mirror
        // that — attempt the EOCD-anchored APK parse; a forged-magic-but-valid-zip
        // parses and `Apk::parse` emits APK_ZIP_LEADING_NON_ZIP_BYTES. Only when the
        // parse also fails is the input truly unrecognized, and the leading bytes
        // weren't a ZIP signature, do we surface the format error.
        let apk = match Apk::parse(path) {
            Ok(apk) => apk,
            Err(e) => {
                if header[..4] != Self::ZIP_MAGIC {
                    anyhow::bail!(
                        "unrecognized input: file begins with {:02x} {:02x} {:02x} {:02x}; \
                         expected APK / AAB / XAPK (PK\\x03\\x04), raw HBC (c6 1f bc 03 c1 03 19 1f), \
                         or raw DEX (64 65 78 0a 'dex\\n')",
                        header[0], header[1], header[2], header[3],
                    );
                }
                return Err(e.into());
            }
        };

        // Reorder: parse first, then drain unconditionally, then record.
        // Closes the parser-emit-then-Err leak shape.
        let hbc_result = apk
            .hbc
            .as_ref()
            .map(|data| HbcOwned::parse(data.clone(), budget.as_deref_mut()));
        let hermes_findings = Self::drain_hermes_findings();
        // Contain, honestly: the bundle was discovered optionally (the
        // user targeted the APK, not the bundle), so a bundle parse
        // failure is recorded state — not a context abort that would
        // take the healthy dex/apk layers down with it. Findings the
        // failed parse emitted stay on the context: partial signal from
        // a broken bundle belongs in the audit envelope. Budget already
        // consumed by the failed parse is not refunded — the cap holds.
        let (hbc, hbc_parse_error) = match hbc_result {
            Some(Ok(h)) => (Some(h), None),
            Some(Err(e)) => (None, Some(HbcParseFailure::new(e))),
            None => (None, None),
        };

        // `DexFile` is fully owned (no lifetime parameter) — it copies
        // everything out of the byte slice at parse time, so we can
        // borrow `dex_entry.data` directly without any lifetime trick.
        let mut dex = Vec::new();
        for dex_entry in &apk.dex {
            dex.push(DexFile::parse(&dex_entry.data, budget.as_deref_mut())
                .map_err(|e| anyhow::anyhow!(e))?);
        }

        Ok(Self {
            path: apk.path.clone(),
            apk: Some(apk),
            hbc,
            hbc_parse_error,
            dex,
            dex_direct_bytes: None,
            loaded_split_names: Vec::new(),
            hermes_findings,
                permissive_recovery: droidsaw_apk::PermissiveRecoveryOpts::default(),
        })
    }

    /// Parse an APK (or HBC / DEX) with optional split auto-discovery.
    ///
    /// When `no_auto_splits` is `false` and the input is an APK / AAB /
    /// XAPK (ZIP magic), co-located split APK files are discovered via
    /// [`droidsaw_apk::split::discover_splits`] and merged into a single
    /// logical `Apk` via [`Apk::parse_multiple`]. The discovered split
    /// names are recorded in [`Self::loaded_split_names`] so the `info`
    /// command can surface them.
    ///
    /// When `no_auto_splits` is `true`, or when the input is raw HBC / DEX,
    /// behaviour is identical to [`Self::parse`].
    pub fn parse_with_splits(
        path: &Path,
        no_auto_splits: bool,
        mut budget: Option<&mut droidsaw_common::budget::ParseBudget>,
    ) -> Result<Self> {
        // Cross-tenant defense (see Self::parse for rationale).
        droidsaw_hermes::finding::discard_findings();

        // For non-APK inputs (raw HBC, DEX), fall through to the standard
        // budgeted parse — split discovery is only meaningful for APKs.
        let mut header = [0u8; 8];
        match std::fs::File::open(path)?.read_exact(&mut header) {
            Ok(()) => {}
            Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {}
            Err(e) => return Err(e.into()),
        }

        // Non-APK inputs: delegate to parse.
        if header == Self::HBC_MAGIC || header[..4] == Self::DEX_MAGIC_PREFIX {
            return Self::parse(path, budget);
        }

        // Non-PK leading magic: a content-type-disguised APK (polymock / Janus) has
        // no `split_config.*` siblings, so split discovery is meaningless. Delegate
        // to `Self::parse`, which attempts the EOCD-anchored parse and emits
        // APK_ZIP_LEADING_NON_ZIP_BYTES, or bails as unrecognized if the parse fails.
        if header[..4] != Self::ZIP_MAGIC {
            return Self::parse(path, budget);
        }

        // APK path: optionally discover splits.
        let (apk, split_names) = if no_auto_splits {
            (Apk::parse(path)?, Vec::new())
        } else {
            let split_paths = droidsaw_apk::split::discover_splits(path)
                .unwrap_or_default();
            if split_paths.is_empty() {
                (Apk::parse(path)?, Vec::new())
            } else {
                // Build the full path list: base first, then splits.
                let mut all_paths = vec![path.to_path_buf()];
                let names: Vec<String> = split_paths
                    .iter()
                    .map(|p| droidsaw_apk::split::split_name(p))
                    .collect();
                all_paths.extend(split_paths);
                let apk = Apk::parse_multiple(&all_paths)?;
                (apk, names)
            }
        };

        // Drain BEFORE recording the result (see Self::parse for
        // rationale); optional-bundle parse failure is contained as
        // recorded state, same as Self::parse.
        let hbc_result = apk
            .hbc
            .as_ref()
            .map(|data| HbcOwned::parse(data.clone(), budget.as_deref_mut()));
        let hermes_findings = Self::drain_hermes_findings();
        let (hbc, hbc_parse_error) = match hbc_result {
            Some(Ok(h)) => (Some(h), None),
            Some(Err(e)) => (None, Some(HbcParseFailure::new(e))),
            None => (None, None),
        };

        let mut dex = Vec::new();
        for dex_entry in &apk.dex {
            dex.push(DexFile::parse(&dex_entry.data, budget.as_deref_mut())
                .map_err(|e| anyhow::anyhow!(e))?);
        }

        Ok(Self {
            path: apk.path.clone(),
            apk: Some(apk),
            hbc,
            hbc_parse_error,
            dex,
            dex_direct_bytes: None,
            loaded_split_names: split_names,
            hermes_findings,
                permissive_recovery: droidsaw_apk::PermissiveRecoveryOpts::default(),
        })
    }

    /// Borrow the parsed APK, or error with a clear message when the
    /// context was built from raw bytecode (no APK wrapper). Use this
    /// in commands that need manifest, signing, native libs, audit,
    /// yara, or any other APK-specific data.
    pub fn require_apk(&self) -> Result<&Apk> {
        self.apk.as_ref().ok_or_else(|| {
            anyhow::anyhow!(
                "this command requires an APK / AAB / XAPK input; got raw bytecode (.hbc / .dex)"
            )
        })
    }

    /// Borrow the parsed HBC layer for an hbc-targeted operation, or
    /// error. Distinguishes the two absent shapes honestly: a bundle
    /// that was present but unparseable re-surfaces its recorded typed
    /// parse error (the `HermesError` chain survives for
    /// `error::classify()`); a target with no Hermes bytecode at all
    /// keeps the established "no Hermes bytecode found" message.
    pub fn require_hbc(&self) -> Result<&HbcOwned> {
        if let Some(h) = self.hbc.as_ref() {
            return Ok(h);
        }
        self.ensure_hbc_parsed()?;
        Err(anyhow::anyhow!("no Hermes bytecode found in target"))
    }

    /// Error with the recorded typed parse failure when the input
    /// carried a Hermes bundle that did not parse; `Ok(())` otherwise
    /// (parsed fine, or no bundle at all). Guard for commands that
    /// explicitly name the hbc layer but tolerate its absence.
    pub fn ensure_hbc_parsed(&self) -> Result<()> {
        match self.hbc_parse_error.as_ref() {
            Some(failure) => Err(failure.to_error()),
            None => Ok(()),
        }
    }

    /// 16-hex hash of the input file at `path`, with a path-string-hash
    /// fallback when the bytes can't be read (permissions race, missing
    /// file, truncation between walk and parse). Used by multi-path
    /// iterators (`corpus_ingest`, etc.) and the MCP `load` / `with_ctx`
    /// scopes so each input still gets a unique bundle dir on panic.
    ///
    /// Bundles under the path-string fallback have no relationship to
    /// file bytes; triage via the `metadata.json` `path` field +
    /// `input_hash`.
    pub fn hash_path(path: &Path) -> String {
        Self::input_hash(path).unwrap_or_else(|_| {
            use std::hash::{Hash, Hasher};
            let mut h = std::collections::hash_map::DefaultHasher::new();
            path.to_string_lossy().hash(&mut h);
            format!("{:016x}", h.finish())
        })
    }

    /// Borrow the raw bytes of DEX index `i`, dispatching between an APK
    /// container and a standalone raw-DEX input. Returns `None` if no DEX
    /// source is available at the requested index.
    pub fn dex_bytes(&self, i: usize) -> Option<&[u8]> {
        if let Some(apk) = self.apk.as_ref() {
            return apk.dex.get(i).map(|e| e.data.as_slice());
        }
        if i == 0 {
            return self.dex_direct_bytes.as_deref();
        }
        None
    }
}