//! CycloneDX 1.5-compliant SBOM emit.
//!
//! Wires `apk.sdk_inventory` (54-rule fingerprint table from
//! `droidsaw_apk::sdk_inventory`) + `apk.native_libs` into the strict
//! CycloneDX `components` array. Each component carries `type`, `name`,
//! `bom-ref`, `purl`, optional `version` (from
//! `META-INF/<group>_<artifact>.version`), optional `hashes[].SHA-256`
//! (over source bytes; Path Y: ZIP re-open at sbom time), and optional
//! `licenses[]` (SPDX-License-Identifier parse of `META-INF/LICENSE*`).
//!
//! Heuristic detections — npm package names from Hermes string-table
//! scan, class-prefix-derived maven package names — carry no purl and
//! route to a top-level `_meta.heuristic_detections` envelope extension.
//! They are useful for analysts but cannot ride in the strict CycloneDX
//! `components` array per spec compliance.
//!
//! Validation: `droidsaw/tests/corpus_sbom_cyclonedx_validate.rs`
//! re-parses the strict-CycloneDX subset (`_meta` envelope key stripped)
//! through `cyclonedx_bom::Bom::parse_from_json_v1_5` +
//! `validate_version(SpecVersion::V1_5)`.
use std::collections::{BTreeMap, BTreeSet};
use std::fs::File;
use std::io::Read;
use std::path::Path;
use sha2::{Digest, Sha256};
use serde_json::{json, Value};
use zip::ZipArchive;
use crate::context::CrossLayerContext;
/// Cap per ZIP entry when hashing or scanning a license body. Bounds
/// memory under adversarial input — a 4 GiB declared-size entry would
/// otherwise allocate 4 GiB into the sbom command path.
const MAX_HASH_ENTRY_BYTES: u64 = 64 * 1024 * 1024;
const MAX_LICENSE_ENTRY_BYTES: u64 = 256 * 1024;
/// Hex-lowercase SHA-256 over `bytes`. `write!` into a pre-sized String
/// avoids the `format!("{b:02x}")` per-byte allocation cost; sbom runs
/// once per CLI invocation so the write-vs-table-index tradeoff is moot.
fn sha256_hex(bytes: &[u8]) -> String {
use std::fmt::Write as _;
let mut h = Sha256::new();
h.update(bytes);
let digest = h.finalize();
let mut out = String::with_capacity(64);
for byte in digest {
#[allow(
clippy::let_underscore_must_use,
reason = "write! to String is infallible; the unit Result must be discarded"
)]
let _ = write!(out, "{byte:02x}");
}
out
}
/// `SdkMarker.evidence` is stored as `<file_stem>:<inner_zip_path>` by
/// `Apk::parse` (see `apk/mod.rs:706`). The leading `<file_stem>:` is the
/// parsed APK file's basename without extension, not part of the ZIP
/// entry name; strip it before looking the entry up in the archive.
fn strip_apk_prefix(evidence: &str) -> &str {
evidence.split_once(':').map_or(evidence, |(_, rest)| rest)
}
/// Look up the version string for an SdkMarker by substring-matching
/// the marker's `path_contains` against the keys of `meta_inf_versions`
/// (which are full ZIP entry names like
/// `"META-INF/com.google.firebase_firebase-analytics.version"`). Mirrors
/// the same lookup used by `droidsaw_apk::sdk_inventory::to_finding`.
fn lookup_version<'a>(
marker: &droidsaw_apk::sdk_inventory::SdkMarker,
meta_inf_versions: &'a BTreeMap<String, String>,
) -> Option<&'a str> {
meta_inf_versions
.iter()
.find(|(k, _)| k.contains(marker.path_contains))
.map(|(_, v)| v.as_str())
}
/// Extract `(group, artifact)` from a marker's `META-INF/<group>_<artifact>`
/// rule path. Returns `None` for rules whose `path_contains` is not the
/// META-INF/`_`-separator form (native `.so` markers, asset paths,
/// `META-INF/com.braintreepayments.api` with no `_<artifact>` separator).
///
/// The returned `"group:artifact"` is the canonical key into
/// `apk.manifest_meta_data_versions`, enabling META-INF / meta-data
/// version-source merge.
fn marker_group_artifact(marker: &droidsaw_apk::sdk_inventory::SdkMarker) -> Option<String> {
let suffix = marker.path_contains.strip_prefix("META-INF/")?;
let (group, artifact) = suffix.split_once('_')?;
Some(format!("{group}:{artifact}"))
}
/// Format a CPE 2.3 string from a vendor/product pair + resolved version.
/// CPE 2.3 grammar:
///
/// ```text
/// cpe:2.3:part:vendor:product:version:update:edition:language:sw_edition:target_sw:target_hw:other
/// ```
///
/// `part` is always `a` (application) — every meta-data-rule mapping is an
/// application-layer SDK. Trailing fields default to `*` (any). Single
/// construction site so the format does not drift across the two call
/// sites (`sdk_marker_component` enrichment + `manifest_meta_data_component`
/// standalone).
fn format_cpe(vendor: &str, product: &str, version: &str) -> String {
format!("cpe:2.3:a:{vendor}:{product}:{version}:*:*:*:*:*:*:*")
}
/// Build a CycloneDX component for an `apk.manifest_meta_data_versions`
/// entry that was NOT already emitted via the sdk_inventory path. Used
/// for SDKs whose presence is detectable through AndroidManifest
/// meta-data but not through a ZIP-level file path (most Play Services
/// modules, when the META-INF/*.version markers are absent).
///
/// Purl shape: `pkg:maven/<group>/<artifact>@<version>`. `bom-ref` is
/// the purl (no per-blob digest — meta-data-derived components have no
/// single attributable ZIP entry to hash). Evidence carries the
/// `manifest-analysis` technique only.
fn manifest_meta_data_component(
group_artifact: &str,
version: &str,
rule: &droidsaw_apk::sdk_inventory_metadata::MetaDataEntry,
) -> Option<Value> {
let (group, artifact) = group_artifact.split_once(':')?;
if group.is_empty() || artifact.is_empty() {
return None;
}
let purl = format!("pkg:maven/{group}/{artifact}@{version}");
let mut obj = serde_json::Map::new();
obj.insert("type".into(), json!("library"));
obj.insert("bom-ref".into(), json!(purl));
obj.insert("name".into(), json!(artifact));
obj.insert("version".into(), json!(version));
obj.insert("purl".into(), json!(purl));
if let Some((vendor, product)) = rule.cpe_vendor_product {
obj.insert("cpe".into(), json!(format_cpe(vendor, product, version)));
}
obj.insert(
"evidence".into(),
json!({
"identity": {
"field": "purl",
"confidence": 0.85,
"methods": [{
"technique": "manifest-analysis",
"confidence": 0.85,
"value": format!("<meta-data android:name=\"{}\" />", rule.meta_data_key),
}],
},
}),
);
obj.insert(
"properties".into(),
Value::Array(vec![json!({
"name": "droidsaw:source",
"value": "manifest-meta-data",
})]),
);
Some(Value::Object(obj))
}
/// Slug a free-text SDK canonical name for use in a generic purl. Maps
/// non-alphanumeric runs to a single `-`; lowercases ASCII; trims
/// surrounding dashes. Idempotent.
fn slugify(name: &str) -> String {
let mut out = String::with_capacity(name.len());
let mut last_dash = true;
for c in name.chars() {
if c.is_ascii_alphanumeric() {
out.push(c.to_ascii_lowercase());
last_dash = false;
} else if !last_dash {
out.push('-');
last_dash = true;
}
}
out.trim_matches('-').to_string()
}
/// Classify an SdkMarker into a CycloneDX purl per
/// https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst.
///
/// - `META-INF/<group>_<artifact>` → `pkg:maven/<group>/<artifact>[@<version>]`
/// - `META-INF/<group>` (no `_<artifact>` separator) → `pkg:generic/<slug(name)>`
/// fallthrough; without an artifact half, the maven coord is undefined.
/// Rule `META-INF/com.braintreepayments.api` is the only current example.
/// - `lib<name>.so` → `pkg:generic/<name>` (native bare name)
/// - other asset-shaped marker → `pkg:generic/<slug(canonical-name)>`
///
/// Invariant: every `SdkMarker` with a non-empty `name` produces `Some(...)`.
/// The asset-slug fallthrough is what guarantees this; the per-ecosystem
/// branches above are early-returns, not gates. Confirmed by the per-rule
/// unit test against all 54 rows of `droidsaw_apk::sdk_inventory::scan_entries`.
fn marker_purl(
marker: &droidsaw_apk::sdk_inventory::SdkMarker,
version: Option<&str>,
) -> Option<String> {
let pc = marker.path_contains;
// META-INF/ marker without a group_artifact separator (only example
// in the current table: `META-INF/com.braintreepayments.api`) falls
// through to the asset-slug path below. The let-chains guard ensures
// we never emit `pkg:maven/<group>/` (no artifact half), which would
// not match any real maven coord.
if let Some(suffix) = pc.strip_prefix("META-INF/")
&& let Some((group, artifact)) = suffix.split_once('_')
{
let v = version.map(|v| format!("@{v}")).unwrap_or_default();
return Some(format!("pkg:maven/{group}/{artifact}{v}"));
}
if pc.starts_with("lib") && pc.ends_with(".so") {
let bare = pc.trim_start_matches("lib").trim_end_matches(".so");
if !bare.is_empty() {
return Some(format!("pkg:generic/{bare}"));
}
}
let slug = slugify(marker.name);
if slug.is_empty() {
return None;
}
Some(format!("pkg:generic/{slug}"))
}
/// OmniBOR gitoid (`gitoid:blob:sha256:<hex>`) over `bytes`. Implements
/// the OmniBOR-blob-sha256 algorithm:
///
/// ```text
/// gitoid = SHA-256(b"blob " || ascii(content.len()) || b"\0" || content)
/// ```
///
/// Content-addressable artifact identifier — pairs with the conventional
/// SHA-256 (`hashes[]`) on each component. Most useful on
/// `pkg:generic/<slug>` components where the purl is a placeholder; the
/// gitoid is content-truth regardless of what we called the artifact.
fn gitoid_blob_sha256(bytes: &[u8]) -> String {
let mut h = Sha256::new();
h.update(b"blob ");
h.update(bytes.len().to_string().as_bytes());
h.update(b"\0");
h.update(bytes);
let digest = h.finalize();
let mut out = String::with_capacity("gitoid:blob:sha256:".len().saturating_add(64));
out.push_str("gitoid:blob:sha256:");
use std::fmt::Write as _;
for byte in digest {
#[allow(
clippy::let_underscore_must_use,
reason = "write! to String is infallible; the unit Result must be discarded"
)]
let _ = write!(out, "{byte:02x}");
}
out
}
/// Per-component digest pair: conventional SHA-256 (rides in CycloneDX
/// `hashes[]`) and OmniBOR gitoid (rides in `properties[]` as
/// `name="omniborId"`). Bundled so both are computed in a single
/// decompress pass per ZIP entry.
#[derive(Debug, Clone)]
struct ComponentDigest {
sha256_hex: String,
omnibor_gitoid: String,
}
/// Read the named ZIP entry (decompressed) and return both the SHA-256
/// and the OmniBOR gitoid over the bytes. None on any I/O failure (entry
/// missing, oversized, decompression error) — digest absence is
/// spec-clean (CycloneDX `hashes[]` is zero-or-more and `properties[]`
/// is optional).
fn hash_archive_entry(
archive: &mut ZipArchive<File>,
entry_name: &str,
) -> Option<ComponentDigest> {
let mut entry = archive.by_name(entry_name).ok()?;
if entry.size() > MAX_HASH_ENTRY_BYTES {
return None;
}
let cap = usize::try_from(entry.size()).unwrap_or(0).min(1024 * 1024);
let mut buf = Vec::with_capacity(cap);
entry.read_to_end(&mut buf).ok()?;
Some(ComponentDigest {
sha256_hex: sha256_hex(&buf),
omnibor_gitoid: gitoid_blob_sha256(&buf),
})
}
/// SHA-256 over the entire file at `path`. Streamed via 64 KiB buffered
/// reads so memory stays bounded regardless of file size. sha2 0.11's
/// `Sha256` does not impl `std::io::Write`, so we drive the hasher
/// explicitly via `Update::update`. Used to attribute the attestation's
/// `buildDefinition.externalParameters.input` to the input APK's content
/// identity.
fn hash_file_sha256(path: &Path) -> anyhow::Result<String> {
let mut file = File::open(path)
.map_err(|e| anyhow::anyhow!("open APK for attestation hash: {e}"))?;
let mut hasher = Sha256::new();
let mut buf = vec![0u8; 64 * 1024];
loop {
let n = file
.read(&mut buf)
.map_err(|e| anyhow::anyhow!("read APK for attestation hash: {e}"))?;
if n == 0 {
break;
}
// Read::read contract: 0 <= n <= buf.len(). Direct slice with
// a debug_assert tripwire — silent fallback (`.unwrap_or(&[])`)
// would skip a hash chunk on contract violation, producing a
// silently-wrong digest. Loud in dev builds; correct on the
// happy path which Read::read always lands on.
debug_assert!(n <= buf.len(), "Read::read returned n={n} > buf.len()={}", buf.len());
#[allow(
clippy::indexing_slicing,
reason = "PROOF: Read::read trait contract bounds n by buf.len(); debug_assert above catches violation in dev builds"
)]
let chunk = &buf[..n];
hasher.update(chunk);
}
let digest = hasher.finalize();
let mut out = String::with_capacity(64);
use std::fmt::Write as _;
for byte in digest {
#[allow(
clippy::let_underscore_must_use,
reason = "write! to String is infallible; the unit Result must be discarded"
)]
let _ = write!(out, "{byte:02x}");
}
Ok(out)
}
/// Build an unsigned in-toto Statement v1 carrying a SLSA-Provenance v1.0
/// predicate. Documents the BOM's build event (input APK identity by
/// SHA-256, droidsaw version, start/finish timestamps) without claiming
/// authenticity. The Statement is unsigned; when signing is added, it
/// becomes an Attestation by being wrapped in a DSSE envelope per the
/// in-toto spec. The bare Statement we emit today detects accidental BOM
/// corruption (subject digest mismatch); it provides no tamper-evidence
/// against intentional rewrites.
///
/// `buildType` is a URN identifier (`urn:droidsaw:sbom-from-apk:v1`) —
/// URNs are stable names not expected to dereference, so SLSA-aware
/// validators skip HTTP lookup. A schema doc at a dereferenceable URL
/// may be added in future versions for validator interop.
///
/// The subject is the BOM's strict-CycloneDX subset (envelope minus
/// `_meta` and `_statement`) so a consumer can:
/// 1. take the envelope JSON,
/// 2. strip `_meta` and `_statement`,
/// 3. re-serialize with sorted keys,
/// 4. SHA-256 those bytes,
/// 5. compare to `subject[0].digest.sha256` on the Statement.
///
/// `serde_json::Map` defaults to `BTreeMap` (sorted-key iteration), so
/// step 3 matches our emit serialization byte-for-byte.
fn build_provenance_statement(
apk_path: &Path,
apk_sha256: &str,
bom_sha256: &str,
started_on: &str,
finished_on: &str,
) -> Value {
let apk_name = apk_path
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("input.apk");
let tool_version = env!("CARGO_PKG_VERSION");
json!({
"_type": "https://in-toto.io/Statement/v1",
"subject": [{
"name": "bom.json",
"digest": { "sha256": bom_sha256 },
}],
"predicateType": "https://slsa.dev/provenance/v1",
"predicate": {
"buildDefinition": {
"buildType": "urn:droidsaw:sbom-from-apk:v1",
"externalParameters": {
"input": {
"name": apk_name,
"digest": { "sha256": apk_sha256 },
},
},
"internalParameters": {},
"resolvedDependencies": [],
},
"runDetails": {
"builder": {
"id": "https://droidsaw.com/builder/sbom@v1",
"version": { "droidsaw": tool_version },
"builderDependencies": [],
},
"metadata": {
"startedOn": started_on,
"finishedOn": finished_on,
},
"byproducts": [],
},
},
})
}
/// Extract SPDX-License-Identifier values from any `META-INF/LICENSE*` or
/// `META-INF/NOTICE*` (case-insensitive) entries in the APK. Best-effort
/// — we only pick up the explicit `SPDX-License-Identifier: <expr>`
/// header. Implicit license bodies (full Apache 2.0 text without the
/// header) are not inferred to avoid guessing; brief explicitly forbids
/// that. Returns deduplicated set in stable order.
fn extract_spdx_licenses(archive: &mut ZipArchive<File>) -> BTreeSet<String> {
let mut out = BTreeSet::new();
let names: Vec<String> = (0..archive.len())
.filter_map(|i| archive.by_index(i).ok().map(|e| e.name().to_string()))
.filter(|n| {
let up = n.to_ascii_uppercase();
up.starts_with("META-INF/LICENSE") || up.starts_with("META-INF/NOTICE")
})
.collect();
for name in names {
let Ok(mut entry) = archive.by_name(&name) else {
continue;
};
if entry.size() > MAX_LICENSE_ENTRY_BYTES {
continue;
}
let cap = usize::try_from(entry.size()).unwrap_or(0).min(64 * 1024);
let mut buf = Vec::with_capacity(cap);
if entry.read_to_end(&mut buf).is_err() {
continue;
}
let text = String::from_utf8_lossy(&buf);
for line in text.lines() {
let trimmed = line.trim_start();
let Some(rest) = trimmed.strip_prefix("SPDX-License-Identifier:") else {
continue;
};
let v = rest.trim();
if !v.is_empty() && v.len() <= 128 {
out.insert(v.to_string());
}
}
}
out
}
/// Synthetic but stable UUID derived from `seed_bytes`. Forces RFC-4122
/// version 4 + variant bits so the output satisfies the CycloneDX
/// `serialNumber` regex without pulling in `uuid` as a dep. Determinism:
/// same input bytes → same `serialNumber`, so re-running sbom on the
/// same APK produces a diff-stable BOM.
fn synthetic_uuid(seed_bytes: &[u8]) -> String {
let mut h = Sha256::new();
h.update(b"droidsaw-sbom-serial-namespace-v1");
h.update(seed_bytes);
let digest = h.finalize();
// Take the first 16 bytes via slice-iter + enumerate so we hit
// neither `clippy::indexing_slicing` on `&digest[..16]` nor
// `clippy::expect_used` on a slice-length proof. sha2's GenericArray
// is 32 bytes, so the 16-byte copy is statically safe; the iter
// shape just makes that explicit at the lint floor.
let mut b: [u8; 16] = [0; 16];
for (slot, &src) in b.iter_mut().zip(digest.iter()) {
*slot = src;
}
// Force RFC-4122 version-4 + variant marker via destructure so the
// bit twiddling avoids `[idx]=` writes; idiomatic equivalent of
// `b[6] = (b[6] & 0x0f) | 0x40; b[8] = (b[8] & 0x3f) | 0x80;`.
let [b0, b1, b2, b3, b4, b5, b6raw, b7, b8raw, b9, b10, b11, b12, b13, b14, b15] = b;
let b6 = (b6raw & 0x0f) | 0x40;
let b8 = (b8raw & 0x3f) | 0x80;
format!(
"urn:uuid:{b0:02x}{b1:02x}{b2:02x}{b3:02x}-{b4:02x}{b5:02x}-{b6:02x}{b7:02x}-{b8:02x}{b9:02x}-{b10:02x}{b11:02x}{b12:02x}{b13:02x}{b14:02x}{b15:02x}",
)
}
/// Build one CycloneDX component object from an SdkMarker, with optional
/// version (from `META-INF/*.version` or `<meta-data>`), optional SHA-256
/// hash (ZIP re-open over the evidence path's bytes), optional CPE 2.3
/// string (when the rule's vendor/product mapping is unambiguous), and
/// a stable `bom-ref`.
///
/// When `manifest_meta_data_evidence` is `Some(key)` the version was
/// resolved from a `<meta-data android:name=key>` resolution rather than
/// a `META-INF/*.version` marker; the evidence.identity.methods[] array
/// gains a `manifest-analysis` entry alongside the filename entry.
///
/// Markers with no purl are dropped at the call site and routed to the
/// `_meta.heuristic_detections` envelope.
fn sdk_marker_component(
marker: &droidsaw_apk::sdk_inventory::SdkMarker,
version: Option<&str>,
purl: &str,
digest: Option<&ComponentDigest>,
cpe: Option<&str>,
manifest_meta_data_evidence: Option<&str>,
) -> Value {
let bom_ref = match digest {
Some(d) => format!("{purl}#sha256:{}", d.sha256_hex),
None => purl.to_string(),
};
let mut obj = serde_json::Map::new();
obj.insert("type".into(), json!("library"));
obj.insert("bom-ref".into(), json!(bom_ref));
obj.insert("name".into(), json!(marker.name));
if let Some(v) = version {
obj.insert("version".into(), json!(v));
}
obj.insert("purl".into(), json!(purl));
if let Some(c) = cpe {
obj.insert("cpe".into(), json!(c));
}
if let Some(d) = digest {
obj.insert(
"hashes".into(),
json!([{ "alg": "SHA-256", "content": d.sha256_hex }]),
);
}
let inner_path = strip_apk_prefix(&marker.evidence);
let confidence = if version.is_some() { 0.95 } else { 0.7 };
let mut methods = vec![json!({
"technique": "filename",
"confidence": confidence,
"value": inner_path,
})];
if let Some(key) = manifest_meta_data_evidence {
methods.push(json!({
"technique": "manifest-analysis",
"confidence": 0.85,
"value": format!("<meta-data android:name=\"{key}\" />"),
}));
}
obj.insert(
"evidence".into(),
json!({
"identity": {
"field": "purl",
"confidence": confidence,
"methods": Value::Array(methods),
},
}),
);
let mut props = vec![
json!({ "name": "droidsaw:category", "value": marker.category }),
];
if let Some(d) = digest {
// Rides in properties[] under CycloneDX 1.5 (1.5 has no
// top-level omniborId field). When cyclonedx-bom 0.9+ ships
// 1.6 support, a follow-up stream promotes this to the
// top-level `omniborId` field standardized in 1.6.
props.push(json!({ "name": "omniborId", "value": d.omnibor_gitoid }));
}
obj.insert("properties".into(), Value::Array(props));
Value::Object(obj)
}
/// Build one CycloneDX component for a raw native library entry (from
/// `apk.native_libs`). Purl: `pkg:generic/<abi>/<lib>`. Hash: SHA-256
/// over the decompressed `.so` bytes. No version (binaries carry no
/// out-of-band version marker at this layer).
fn native_lib_component(
lib: &droidsaw_apk::apk::NativeLib,
archive: &mut ZipArchive<File>,
) -> Value {
// SBOM Stream D: when a `.rodata` banner gave us a canonical
// upstream version (OpenSSL, libcurl, libpng, ...), promote the
// purl from `pkg:generic/<abi>/<lib>` to
// `pkg:generic/<canonical_name>@<version>` so OSV-Scanner / Grype
// match it against the NVD ecosystem entries (where Android native
// libs actually have CVE coverage). The original lib path stays in
// `evidence.identity.methods[]` so the provenance trail is intact.
let (purl, identity_confidence) = match lib.version_info.as_ref() {
Some(info) => (
format!("pkg:generic/{}@{}", info.canonical_name, info.version),
0.95,
),
None => (format!("pkg:generic/{}/{}", lib.abi, lib.name), 0.9),
};
let digest = hash_archive_entry(archive, &lib.path);
let bom_ref = match digest.as_ref() {
Some(d) => format!("{purl}#sha256:{}", d.sha256_hex),
None => purl.clone(),
};
let mut obj = serde_json::Map::new();
obj.insert("type".into(), json!("library"));
obj.insert("bom-ref".into(), json!(bom_ref));
obj.insert("name".into(), json!(lib.name));
if let Some(info) = lib.version_info.as_ref() {
obj.insert("version".into(), json!(info.version));
// Emit CPE 2.3 string when the curated rule carried a
// vendor/product mapping. CPE is the load-bearing identifier
// for Grype's NVD matcher on native libs — `pkg:generic`
// purls alone don't route to the per-ecosystem matchers, so
// without CPE the OpenSSL/curl/png CVE corpus is invisible
// even with a correct version. Confirmed on real APK
// (Pokemon TCG libssl.so@1.1.1a → 24 Grype CVE hits with
// CPE attached vs 0 without).
if let Some((vendor, product)) = info.cpe_vendor_product.as_ref() {
obj.insert(
"cpe".into(),
json!(format_cpe(vendor, product, &info.version)),
);
}
}
obj.insert("purl".into(), json!(purl));
if let Some(d) = digest.as_ref() {
obj.insert(
"hashes".into(),
json!([{ "alg": "SHA-256", "content": d.sha256_hex }]),
);
}
let mut methods = vec![json!({
"technique": "filename",
"confidence": 0.9,
"value": lib.path,
})];
if let Some(info) = lib.version_info.as_ref() {
methods.push(json!({
"technique": "binary-string-scan",
"confidence": 0.95,
"value": info.matched_string,
}));
}
obj.insert(
"evidence".into(),
json!({
"identity": {
"field": "purl",
"confidence": identity_confidence,
"methods": Value::Array(methods),
},
}),
);
let mut props = vec![
json!({ "name": "droidsaw:abi", "value": lib.abi }),
];
if let Some(soname) = lib.soname.as_ref() {
props.push(json!({ "name": "soname", "value": soname }));
}
if let Some(info) = lib.version_info.as_ref() {
props.push(json!({
"name": "droidsaw:canonical-name",
"value": info.canonical_name,
}));
}
if let Some(d) = digest.as_ref() {
props.push(json!({ "name": "omniborId", "value": d.omnibor_gitoid }));
}
obj.insert("properties".into(), Value::Array(props));
Value::Object(obj)
}
/// Hermes string-table heuristic scan: walk the HBC string pool for
/// `node_modules/<name>` references and surface the deduplicated
/// package-name set. Output now rides in the strict `components[]`
/// array (via [`hermes_npm_component`]) with property
/// `droidsaw:source: hermes-string-scan` so downstream consumers can
/// filter by provenance. Visibility-over-strictness: name-only purls
/// (`pkg:npm/<name>` without `@<version>`) are CycloneDX-1.5-valid and
/// OSV-Scanner's name-only query path catches package-level
/// vulnerabilities even without version data.
fn hermes_npm_heuristic(ctx: &CrossLayerContext) -> Vec<String> {
let Some(hbc_owned) = ctx.hbc.as_ref() else {
return Vec::new();
};
let hbc = hbc_owned.hbc();
let mut seen: BTreeSet<String> = BTreeSet::new();
for i in 0..hbc.string_count {
let s = hbc.string_as_str_or_empty(i);
let Some(pos) = s.find("node_modules/") else {
continue;
};
let after = pos.saturating_add("node_modules/".len());
let Some(tail) = s.get(after..) else {
continue;
};
let name: String = if let Some(stripped) = tail.strip_prefix('@') {
let mut parts = stripped.splitn(2, '/');
let scope = parts.next().unwrap_or("");
let n = parts.next().unwrap_or("").split('/').next().unwrap_or("");
if scope.is_empty() || n.is_empty() {
continue;
}
format!("@{scope}/{n}")
} else {
tail.split('/').next().unwrap_or("").to_string()
};
if !name.is_empty() {
seen.insert(name);
}
}
seen.into_iter().collect()
}
/// SBOM Stream G — Hermes bundle versioned-npm scan.
///
/// Walks the HBC string pool for `[@scope/]name@<semver>` literals and
/// returns the deduplicated `(name, version)` set. Output rides in
/// `components[]` as `pkg:npm/<name>@<version>` purls with provenance
/// property `droidsaw:source: hermes-version-scan` (vs Stream A's
/// `hermes-string-scan` which marks unversioned heuristic detections).
///
/// Production RN bundles are heavily minified — empirical premise
/// check on Coinbase + Tesla yields 0-3 versioned coords per app,
/// vs Stream A's ~4 unversioned ones. Honest absence: when only the
/// name is recoverable, the parent stream's `hermes_npm_heuristic`
/// already handles that case.
fn hermes_npm_versioned(ctx: &CrossLayerContext) -> BTreeSet<(String, String)> {
let mut out: BTreeSet<(String, String)> = BTreeSet::new();
let Some(hbc_owned) = ctx.hbc.as_ref() else {
return out;
};
let hbc = hbc_owned.hbc();
for i in 0..hbc.string_count {
let s_cow = hbc.string_as_str_or_empty(i);
let s: &str = &s_cow;
// Production HBC strings are often long concatenations; scan
// every byte position rather than just .find()-ing once.
let bytes = s.as_bytes();
let mut pos = 0usize;
while pos < bytes.len() {
// Find the next '@' that introduces a version token.
let Some(rel_at) = bytes.get(pos..).and_then(|b| b.iter().position(|&c| c == b'@'))
else {
break;
};
let at_pos = pos.saturating_add(rel_at);
pos = at_pos.saturating_add(1);
if let Some((name, version)) = parse_npm_versioned_at(s, at_pos)
&& is_likely_npm_name(&name)
{
out.insert((name, version));
}
}
}
out
}
/// Try to parse `[@scope/]name@<semver>` anchored at the version-`@`
/// position in `s`. Returns `(name, version)` on a clean parse.
///
/// Walks LEFT from `at_pos` to collect the package name (npm naming:
/// lowercase alnum + `-_.`, optionally scoped with a leading `@scope/`),
/// then RIGHT to collect the version (digits + dots + optional letter
/// suffix, must contain ≥2 dots to qualify as semver).
fn parse_npm_versioned_at(s: &str, at_pos: usize) -> Option<(String, String)> {
let bytes = s.as_bytes();
// Left-walk: collect npm-name bytes back from at_pos-1.
let mut start = at_pos;
while start > 0 {
let c = *bytes.get(start.saturating_sub(1))?;
if c.is_ascii_lowercase() || c.is_ascii_digit() || c == b'-' || c == b'_' || c == b'.' || c == b'/' {
start = start.saturating_sub(1);
} else {
break;
}
}
// If the char immediately before start is '@', extend left to grab
// the scope sigil (npm scoped packages: `@scope/name@version`).
if start > 0 && bytes.get(start.saturating_sub(1)) == Some(&b'@') {
start = start.saturating_sub(1);
}
if start >= at_pos {
return None;
}
let name = s.get(start..at_pos)?.to_owned();
// Right-walk: collect semver-shape after at_pos.
let mut end = at_pos.saturating_add(1);
let mut dots = 0usize;
let mut last_was_digit = false;
let mut saw_letter = false;
while end < bytes.len() {
let c = *bytes.get(end)?;
if c.is_ascii_digit() {
last_was_digit = true;
end = end.saturating_add(1);
} else if c == b'.' && last_was_digit && dots < 3 {
dots = dots.saturating_add(1);
last_was_digit = false;
end = end.saturating_add(1);
} else if c.is_ascii_alphabetic() && last_was_digit && dots >= 1 && !saw_letter {
saw_letter = true;
end = end.saturating_add(1);
break;
} else {
break;
}
}
if dots < 2 || !(last_was_digit || saw_letter) {
return None;
}
let version = s.get(at_pos.saturating_add(1)..end)?.to_owned();
Some((name, version))
}
/// Sanity-check an extracted name against npm's package-name rules.
/// Drops noise like single-letter strings, all-digit "names", and
/// names without any letter (which slip through the left-walk on
/// digit-rich JS sources).
fn is_likely_npm_name(name: &str) -> bool {
if name.len() < 2 || name.len() > 214 {
return false;
}
let stripped = name.strip_prefix('@').unwrap_or(name);
let has_letter = stripped.bytes().any(|c| c.is_ascii_lowercase());
has_letter
}
/// Build a CycloneDX component for a Hermes string-pool detection
/// that DID carry a version. Mirrors `hermes_npm_component` but emits
/// `pkg:npm/<name>@<version>` and the `hermes-version-scan` provenance
/// property.
fn hermes_npm_versioned_component(name: &str, version: &str) -> Value {
let purl = format!("pkg:npm/{name}@{version}");
json!({
"type": "library",
"bom-ref": purl.clone(),
"name": name,
"version": version,
"purl": purl,
"evidence": {
"identity": {
"field": "purl",
"confidence": 0.7,
"methods": [{
"technique": "code-string-scan",
"confidence": 0.7,
"value": format!("{name}@{version}"),
}],
},
},
"properties": [
{ "name": "droidsaw:source", "value": "hermes-version-scan" },
{ "name": "droidsaw:versioned", "value": "true" },
],
})
}
/// DEX class-prefix heuristic scan: surface every distinct
/// `<top>.<second>` package prefix seen as a class descriptor. Output
/// routes to `_meta.heuristic_detections` only — `com.google` is not a
/// Maven coordinate, so emitting it as a `pkg:maven/` component would
/// claim version/hash provenance the heuristic cannot back.
fn dex_class_prefix_heuristic(ctx: &CrossLayerContext) -> Vec<String> {
// Allowlist of reverse-DNS roots that signal "real third-party
// library, not app-internal class". Kotlin and kotlinx are
// included because they ARE meaningful ecosystem prefixes (the
// Kotlin stdlib + coroutine library both have real CVE histories).
// Excludes app-internal R8-minified classes (`a.a`, `b.c`),
// Kotlin coroutine debug frames (`_COROUTINE.*`), single-letter
// package noise, and apps' own package prefixes (resolved when
// the subject package is known).
const TLD_ALLOW: &[&str] = &[
"com", "io", "org", "net", "androidx", "kotlin", "kotlinx",
"edu", "gov", "uk", "de", "jp", "cn", "fr",
];
let subject_own_prefix: Option<String> = ctx
.apk
.as_ref()
.and_then(|a| a.decode_manifest())
.map(|m| m.package)
.filter(|p| !p.is_empty())
.and_then(|p| {
let mut parts = p.split('.');
let p0 = parts.next()?;
let p1 = parts.next()?;
Some(format!("{p0}.{p1}"))
});
let mut seen: BTreeSet<String> = BTreeSet::new();
for dex in &ctx.dex {
for cd in &dex.class_defs {
let Ok(desc) = dex.get_type_descriptor(cd.class_idx) else {
continue;
};
let inner = desc.trim_start_matches('L').trim_end_matches(';');
let parts: Vec<&str> = inner.split('/').collect();
if let [p0, p1, ..] = parts.as_slice() {
if !TLD_ALLOW.contains(p0) {
continue;
}
if p1.len() < 2 {
continue;
}
let prefix = format!("{p0}.{p1}");
if subject_own_prefix.as_deref() == Some(prefix.as_str()) {
continue;
}
seen.insert(prefix);
}
}
}
seen.into_iter().collect()
}
/// Build a CycloneDX component for an npm package detected via Hermes
/// string-table scan. Name-only purl (`pkg:npm/<name>`) — no version
/// because the heuristic recovers package names but not versions.
/// OSV-Scanner queries OSV.dev by name when version is absent, so
/// vuln matching still works at the package level.
///
/// Provenance marker: property `droidsaw:source: hermes-string-scan`
/// lets downstream consumers filter strict-version-bearing components
/// from heuristic name-only ones if they care to. Evidence confidence
/// 0.5 — heuristic detection, not a positive identification.
fn hermes_npm_component(name: &str) -> Value {
let purl = format!("pkg:npm/{name}");
json!({
"type": "library",
"bom-ref": purl.clone(),
"name": name,
"purl": purl,
"evidence": {
"identity": {
"field": "name",
"confidence": 0.5,
"methods": [{
"technique": "code-string-scan",
"confidence": 0.5,
"value": format!("node_modules/{name}/"),
}],
},
},
"properties": [
{ "name": "droidsaw:source", "value": "hermes-string-scan" },
{ "name": "droidsaw:versioned", "value": "false" },
],
})
}
/// Build a CycloneDX component for a class-prefix detection from the
/// DEX type-descriptor scan. Emits as `pkg:generic/<class-prefix>` —
/// the generic ecosystem is the honest claim: we observed a class
/// prefix but do NOT know what maven/gradle coord (if any) it
/// originated from. A single prefix like `com.google` can map to
/// many distinct artifacts (firebase, gms, guava, gson, ...) shipped
/// from different repositories; asserting `pkg:maven/com.google/...`
/// would be a false provenance claim and `pkg:maven/<group>/UNKNOWN`
/// is purl-spec-illegal (the path-after-type segment must be a
/// concrete identifier).
///
/// Provenance is preserved via two property k/v entries:
/// - `droidsaw:source: dex-class-prefix` — which detector fired
/// - `droidsaw:detection-shape: class-prefix-only` — explicit
/// "we don't know the artifact" signal for downstream filters
///
/// Evidence confidence 0.4 — lower than hermes-string-scan because
/// class prefixes are even less specific than `node_modules/` paths.
/// Stream C (JVM nested-JAR cataloger) will promote some of these to
/// full `pkg:maven/<group>/<artifact>@<version>` when concrete
/// coords are recoverable; this remains the honest fallback for the
/// rest.
/// SBOM Stream E — emit one CycloneDX component per recovered
/// DEX-only-SDK version constant.
///
/// Purl shape: `pkg:generic/<canonical-name>@<version>` per the
/// closed `apk-sbom-class-prefix-purl-shape` decision (no Maven
/// provenance, so the generic ecosystem keeps the purl honest).
///
/// `evidence.identity.methods[].technique` distinguishes the two
/// recovery paths:
/// - `ast-fingerprint` — curated `(class_descriptor, field_name)`
/// resolved exactly in the DEX (confidence 0.95).
/// - `filename` — string-pool regex fallback for R8-renamed apps
/// (confidence 0.6); per CycloneDX 1.5 §evidence.identity.method,
/// `source-code-analysis` is the spec tag for matches recovered
/// from program-source-shaped strings retained in the binary,
/// without structural verification of the matched call surface.
fn dex_sdk_version_component(
sdk: &droidsaw_apk::dex_sdk_versions::DexSdkVersion,
) -> Value {
use droidsaw_apk::dex_sdk_versions::DexSdkVersionTechnique;
let purl = format!("pkg:generic/{}@{}", sdk.canonical_name, sdk.version);
let mut obj = serde_json::Map::new();
obj.insert("type".into(), json!("library"));
obj.insert("bom-ref".into(), json!(purl));
obj.insert("name".into(), json!(sdk.canonical_name));
obj.insert("version".into(), json!(sdk.version));
obj.insert("purl".into(), json!(purl));
if let Some((vendor, product)) = sdk.cpe_vendor_product {
obj.insert("cpe".into(), json!(format_cpe(vendor, product, &sdk.version)));
}
let (technique, evidence_value) = match sdk.technique {
DexSdkVersionTechnique::ExactStaticField => (
"ast-fingerprint",
format!("static-field initializer: {}", sdk.canonical_name),
),
DexSdkVersionTechnique::ObfuscationFallback => (
"source-code-analysis",
format!("string-pool name+version proximity: {}", sdk.canonical_name),
),
};
obj.insert(
"evidence".into(),
json!({
"identity": {
"field": "purl",
"confidence": sdk.confidence,
"methods": [{
"technique": technique,
"confidence": sdk.confidence,
"value": evidence_value,
}],
},
}),
);
obj.insert(
"properties".into(),
Value::Array(vec![json!({
"name": "droidsaw:source",
"value": "dex-static-field-version",
})]),
);
Value::Object(obj)
}
fn dex_class_prefix_component(group: &str) -> Value {
let purl = format!("pkg:generic/{group}");
json!({
"type": "library",
"bom-ref": purl.clone(),
"name": group,
"purl": purl,
"evidence": {
"identity": {
"field": "name",
"confidence": 0.4,
"methods": [{
"technique": "dex-class-prefix-scan",
"confidence": 0.4,
"value": format!("L{}/...;", group.replace('.', "/")),
}],
},
},
"properties": [
{ "name": "droidsaw:source", "value": "dex-class-prefix" },
{ "name": "droidsaw:detection-shape", "value": "class-prefix-only" },
{ "name": "droidsaw:versioned", "value": "false" },
],
})
}
/// SBOM Stream C — emit one CycloneDX component per JVM nested JAR.
///
/// Purl shape, by descending confidence:
/// - `pkg:maven/<group>/<artifact>@<version>` when pom.properties was
/// present (ground-truth maven coord; confidence 0.95).
/// - `pkg:generic/<jar-basename>@<version>` when only the MANIFEST.MF
/// `Implementation-Version` / `Bundle-Version` was present
/// (confidence 0.6).
/// - `None` returned when neither source yielded data; caller skips
/// emission (honest absence — the JAR shipped classes-only with no
/// upstream attribution).
///
/// SHA-256 of the JAR bytes rides in `hashes[]` when the ZIP re-open
/// at sbom-time succeeds.
fn nested_jar_component(
jar: &droidsaw_apk::nested_jar_cataloger::NestedJarMetadata,
archive: &mut ZipArchive<File>,
) -> Option<Value> {
let (purl, name, version, confidence, source_tag) = match (jar.maven_coord.as_ref(), jar.manifest_meta.as_ref()) {
(Some(coord), _) => (
format!("pkg:maven/{}/{}@{}", coord.group_id, coord.artifact_id, coord.version),
coord.artifact_id.clone(),
Some(coord.version.clone()),
0.95,
"nested-jar-pom-properties",
),
(None, Some(mm)) => {
let v = mm.version.as_deref()?;
let basename = jar
.path
.rsplit('/')
.next()
.and_then(|s| s.strip_suffix(".jar"))
.unwrap_or("jar");
let name = mm.title.clone().unwrap_or_else(|| basename.to_owned());
(
format!("pkg:generic/{basename}@{v}"),
name,
Some(v.to_owned()),
0.6,
"nested-jar-manifest-mf",
)
}
(None, None) => return None,
};
let inner_path = jar
.path
.split_once(':')
.map_or(jar.path.as_str(), |(_, rest)| rest);
let digest = hash_archive_entry(archive, inner_path);
let bom_ref = match digest.as_ref() {
Some(d) => format!("{purl}#sha256:{}", d.sha256_hex),
None => purl.clone(),
};
let mut obj = serde_json::Map::new();
obj.insert("type".into(), json!("library"));
obj.insert("bom-ref".into(), json!(bom_ref));
obj.insert("name".into(), json!(name));
if let Some(v) = version.as_ref() {
obj.insert("version".into(), json!(v));
}
obj.insert("purl".into(), json!(purl));
if let Some(d) = digest.as_ref() {
obj.insert(
"hashes".into(),
json!([{ "alg": "SHA-256", "content": d.sha256_hex }]),
);
}
obj.insert(
"evidence".into(),
json!({
"identity": {
"field": "purl",
"confidence": confidence,
"methods": [{
"technique": "manifest-analysis",
"confidence": confidence,
"value": inner_path,
}],
},
}),
);
let mut props = vec![
json!({ "name": "droidsaw:source", "value": source_tag }),
];
if let Some(d) = digest.as_ref() {
props.push(json!({ "name": "omniborId", "value": d.omnibor_gitoid }));
}
obj.insert("properties".into(), Value::Array(props));
Some(Value::Object(obj))
}
/// Build a CycloneDX 1.5 `components[type=file]` entry for one bundled
/// ML model file. v1-minimal: extension-based identity only; no
/// magic-byte verification, no model-name inference. The component
/// carries the path + format + SHA-256 + size as
/// `droidsaw:ml-model:*` namespaced properties. CDX 1.6 support
/// (`type: "machine-learning-model"` + `mlInfo`/`modelCard` block) is
/// planned for future enhancement.
fn ml_model_component(model: &droidsaw_apk::ml_models::MlModelFile) -> Value {
// The model.path field carries `<apk_prefix>:<inner-path>`; strip
// the prefix for the display fields, keep the full path on
// properties.
let inner_path = model
.path
.split_once(':')
.map_or(model.path.as_str(), |(_, rest)| rest);
let inner_basename = inner_path.rsplit('/').next().unwrap_or(inner_path);
// Empty-basename fallback: an attacker-controlled ZIP entry name
// of `<prefix>:` or `:` could strip to empty; use the SHA-256
// prefix as a stable identifier so the purl + bom-ref remain
// well-formed and unique.
let basename: String = if inner_basename.is_empty() {
// `.get` rather than slicing: the sha256 field is hex ASCII by
// construction, but the lint floor requires the non-panicking
// form; a non-boundary index falls back to the full string.
let prefix = model
.sha256
.get(..16.min(model.sha256.len()))
.unwrap_or(model.sha256.as_str());
format!("model-{prefix}")
} else {
inner_basename.to_owned()
};
// No standard purl scheme exists for ML models. `pkg:generic`
// with the basename as the name slot matches droidsaw's
// convention for unversioned native-lib + nested-JAR emits (lines
// 575, 1015, 1076). The SHA-256 anchoring happens on `bom-ref`
// (purl + `#sha256:<hex>`) per the existing pattern at lines
// 497, 579, 1092 — the purl itself stays identity-only so
// cross-BOM consumers see a clean package URL.
let purl = format!("pkg:generic/{basename}");
let bom_ref = format!("{purl}#sha256:{}", model.sha256);
let mut obj = serde_json::Map::new();
obj.insert("type".into(), json!("file"));
obj.insert("bom-ref".into(), json!(bom_ref));
obj.insert("name".into(), json!(basename));
obj.insert("purl".into(), json!(purl));
obj.insert(
"hashes".into(),
json!([{ "alg": "SHA-256", "content": model.sha256 }]),
);
obj.insert(
"evidence".into(),
json!({
"identity": {
"field": "filename",
"confidence": 0.6,
"methods": [{
"technique": "filename",
"confidence": 0.6,
"value": inner_path,
}],
},
}),
);
obj.insert(
"properties".into(),
json!([
{ "name": "droidsaw:source", "value": "apk-ml-model" },
{ "name": "droidsaw:ml-model:format", "value": model.format.as_str() },
{ "name": "droidsaw:ml-model:path", "value": inner_path },
{ "name": "droidsaw:ml-model:size-bytes", "value": model.size_bytes.to_string() },
]),
);
Value::Object(obj)
}
pub fn sbom(ctx: &CrossLayerContext) -> anyhow::Result<Value> {
let apk = ctx.require_apk()?;
// Capture started_on at function entry; finished_on at the end. The
// attestation's predicate records both as the build event's span.
// These vary per run by design — the attestation documents the build
// event, not the BOM identity. The BOM bytes (and the attestation
// subject digest derived from them) stay deterministic across runs.
let started_on = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true);
// Path Y: re-open the APK ZIP once at sbom time for hash + license
// capture. Trade-off vs Path X (parse-time capture in apk crate):
// smaller diff, no apk-crate changes, sbom is one-shot CLI not hot
// loop. Promotes to Path X when a second consumer surfaces.
let archive_path = Path::new(&apk.path);
let archive_file = File::open(archive_path)
.map_err(|e| anyhow::anyhow!("re-open APK for sbom hash pass failed: {e}"))?;
let mut archive = ZipArchive::new(archive_file)
.map_err(|e| anyhow::anyhow!("re-parse APK ZIP for sbom hash pass failed: {e}"))?;
let licenses_spdx = extract_spdx_licenses(&mut archive);
let mut components: Vec<Value> = Vec::new();
let mut emitted_purls: BTreeSet<String> = BTreeSet::new();
// Track which group:artifact keys the sdk_inventory loop covered, so
// the standalone meta-data loop below doesn't double-emit. The
// emitted_purls set already de-dupes per purl, but we additionally
// need per-group:artifact attribution because the meta-data loop
// builds purls with `@<version>` baked in (different string from a
// META-INF-derived purl that may have None version).
let mut emitted_group_artifacts: BTreeSet<String> = BTreeSet::new();
for marker in &apk.sdk_inventory {
let meta_inf_version = lookup_version(marker, &apk.meta_inf_versions);
let group_artifact = marker_group_artifact(marker);
// Per acceptance gate: META-INF is preferred when both present;
// meta-data fills in when META-INF lookup returned None.
let manifest_version = group_artifact
.as_deref()
.and_then(|ga| apk.manifest_meta_data_versions.get(ga).map(String::as_str));
let (version, manifest_evidence) = match (meta_inf_version, manifest_version) {
(Some(v), _) => (Some(v), None),
(None, Some(v)) => {
let key = group_artifact
.as_deref()
.and_then(droidsaw_apk::sdk_inventory_metadata::rule_for_group_artifact)
.map(|r| r.meta_data_key);
(Some(v), key)
}
(None, None) => (None, None),
};
let Some(purl) = marker_purl(marker, version) else {
continue;
};
if !emitted_purls.insert(purl.clone()) {
continue;
}
if let Some(ref ga) = group_artifact {
emitted_group_artifacts.insert(ga.clone());
}
let cpe = match (version, group_artifact.as_deref()) {
(Some(v), Some(ga)) => droidsaw_apk::sdk_inventory_metadata::rule_for_group_artifact(ga)
.and_then(|r| r.cpe_vendor_product)
.map(|(vendor, product)| format_cpe(vendor, product, v)),
_ => None,
};
let inner_path = strip_apk_prefix(&marker.evidence);
let digest = hash_archive_entry(&mut archive, inner_path);
components.push(sdk_marker_component(
marker,
version,
&purl,
digest.as_ref(),
cpe.as_deref(),
manifest_evidence,
));
}
// Standalone meta-data-only components for SDKs whose presence is
// declared in AndroidManifest `<meta-data>` but whose group:artifact
// was not detected through the sdk_inventory ZIP-path matcher (most
// Play Services modules, when the META-INF/*.version markers are
// absent or stripped). One component per resolved group:artifact;
// version comes from the meta-data ARSC resolution path.
for (group_artifact, version) in &apk.manifest_meta_data_versions {
if emitted_group_artifacts.contains(group_artifact) {
continue;
}
let Some(rule) =
droidsaw_apk::sdk_inventory_metadata::rule_for_group_artifact(group_artifact)
else {
continue;
};
let Some(component) =
manifest_meta_data_component(group_artifact, version, rule)
else {
continue;
};
let purl = format!(
"pkg:maven/{}",
group_artifact.replacen(':', "/", 1),
);
let versioned_purl = format!("{purl}@{version}");
if !emitted_purls.insert(versioned_purl) {
continue;
}
components.push(component);
}
for libs in apk.native_libs.values() {
for lib in libs {
let purl = format!("pkg:generic/{}/{}", lib.abi, lib.name);
if !emitted_purls.insert(purl.clone()) {
continue;
}
components.push(native_lib_component(lib, &mut archive));
}
}
// SBOM Stream C — JVM nested-JAR cataloger. One component per
// `libs/*.jar` or `assets/**/*.jar` entry with recovered
// metadata. Purl preference:
// - `pkg:maven/<group>/<artifact>@<version>` when pom.properties
// was present (the ground-truth maven coord)
// - `pkg:generic/<jar-basename>@<version>` when only manifest
// attributes yielded a version
// - skip emission when neither source yielded data (honest
// absence; the JAR shipped classes-only with no upstream
// attribution)
for jar in &apk.nested_jars {
let Some(component) = nested_jar_component(jar, &mut archive) else {
continue;
};
// Dedup by purl against everything already emitted.
let Some(purl) = component.get("purl").and_then(Value::as_str) else {
continue;
};
if !emitted_purls.insert(purl.to_string()) {
continue;
}
components.push(component);
}
// SBOM — bundled ML model files. v1-minimal: extension-based,
// type=file, droidsaw:ml-model:* properties carry format + path +
// size. CDX 1.6 type promotion + magic-byte sniffing is planned
// for future enhancement.
for model in &apk.ml_models {
let component = ml_model_component(model);
let Some(purl) = component.get("purl").and_then(Value::as_str) else {
continue;
};
if !emitted_purls.insert(purl.to_string()) {
continue;
}
components.push(component);
}
// SBOM Stream E — DEX-only-SDK version constants. Walk each parsed
// DEX, run the curated `(class_descriptor, field_name)` table, and
// emit one component per recovered SDK. When the same canonical
// SDK shows up in more than one DEX (multi-DEX APK with split
// classes), the first occurrence wins — `match_against` returns
// exact-match results first, so an ExactStaticField hit in
// classes.dex shadows an ObfuscationFallback in classes2.dex.
let mut emitted_dex_sdks: BTreeSet<String> = BTreeSet::new();
for dex in &ctx.dex {
let recovered = droidsaw_apk::dex_sdk_versions::match_against(dex);
for (canonical, sdk) in recovered {
if !emitted_dex_sdks.insert(canonical.clone()) {
continue;
}
let component = dex_sdk_version_component(&sdk);
let Some(purl) = component.get("purl").and_then(Value::as_str) else {
continue;
};
if !emitted_purls.insert(purl.to_string()) {
continue;
}
components.push(component);
}
}
// Promote the two heuristic detection lists out of the `_meta`
// envelope extension into the strict `components[]` array.
// Visibility wins over the prior "strict-CDX components require
// queryable maven coords" stance: OSV-Scanner accepts name-only
// `pkg:npm/<name>` purls and matches at the package level.
// SBOM Stream G — versioned Hermes npm scan FIRST so a versioned
// hit shadows the unversioned heuristic for the same name. The
// emitted_purls set distinguishes by purl string, so
// `pkg:npm/foo@1.2.3` and `pkg:npm/foo` are separate entries —
// dedupe by name explicitly via `versioned_names` so the
// name-only fallback doesn't double-emit.
let hermes_versioned = hermes_npm_versioned(ctx);
let hermes_versioned_count = hermes_versioned.len();
let versioned_names: BTreeSet<String> = hermes_versioned
.iter()
.map(|(name, _)| name.clone())
.collect();
for (name, version) in &hermes_versioned {
let purl = format!("pkg:npm/{name}@{version}");
if !emitted_purls.insert(purl) {
continue;
}
components.push(hermes_npm_versioned_component(name, version));
}
let heuristic_npm = hermes_npm_heuristic(ctx);
let heuristic_npm_count = heuristic_npm.len();
for name in &heuristic_npm {
if versioned_names.contains(name) {
continue;
}
let purl = format!("pkg:npm/{name}");
if !emitted_purls.insert(purl) {
continue;
}
components.push(hermes_npm_component(name));
}
let heuristic_maven_prefix = dex_class_prefix_heuristic(ctx);
let heuristic_maven_prefix_count = heuristic_maven_prefix.len();
for group in &heuristic_maven_prefix {
let purl = format!("pkg:generic/{group}");
if !emitted_purls.insert(purl) {
continue;
}
components.push(dex_class_prefix_component(group));
}
components.sort_by(|a, b| {
let an = a.get("bom-ref").and_then(Value::as_str).unwrap_or("");
let bn = b.get("bom-ref").and_then(Value::as_str).unwrap_or("");
an.cmp(bn)
});
let component_count = components.len();
let serial_number = synthetic_uuid(apk.path.as_bytes());
let tool_version = env!("CARGO_PKG_VERSION");
// Subject component for the BOM — names the APK being inventoried
// and self-attests the extraction methodology. `metadata.component`
// is the canonical CycloneDX field for "what this BOM is about".
let subject_pkg = apk
.decode_manifest()
.map(|m| m.package)
.filter(|p| !p.is_empty());
let subject_name = subject_pkg
.clone()
.or_else(|| {
Path::new(&apk.path)
.file_stem()
.and_then(|s| s.to_str())
.map(str::to_owned)
})
.unwrap_or_else(|| "android-application".to_owned());
let mut subject_obj = serde_json::Map::new();
subject_obj.insert("type".into(), json!("application"));
subject_obj.insert("bom-ref".into(), json!(format!("subject:{subject_name}")));
subject_obj.insert("name".into(), json!(subject_name));
subject_obj.insert(
"description".into(),
json!(
"Android application SBOM reconstructed from a compiled APK by \
droidsaw via static binary analysis. The component set is a \
best-effort inventory pieced together from META-INF/*.version \
markers, AndroidManifest <meta-data> tags, native shared-library \
enumeration, and string-scan heuristics over Hermes and DEX. \
Build-time manifests, lockfiles, and source are unavailable; \
completeness is partial by construction. See \
`metadata.properties[].droidsaw:*` for the methodology flags."
),
);
if let Some(pkg) = subject_pkg {
subject_obj.insert("purl".into(), json!(format!("pkg:android/{pkg}")));
}
let mut metadata = serde_json::Map::new();
metadata.insert(
"timestamp".into(),
json!(chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true)),
);
// Tools list: droidsaw self-attestation + (SBOM Stream H) any
// build-toolchain identifiers recovered from `META-INF/MANIFEST.MF`
// main attributes (`Created-By: 8.5.2 (Gradle)` etc). Multiple
// entries when both Created-By and Build-Jdk are present.
let mut tools_components = vec![json!({
"type": "application",
"name": "droidsaw",
"version": tool_version,
})];
if let Some(mf_bytes) = apk.manifest_mf_raw.as_deref() {
let attrs = droidsaw_apk::manifest_mf::parse_main_attributes(mf_bytes);
if let Some(created_by) = attrs.get("Created-By")
&& let Some((name, version)) =
droidsaw_apk::manifest_mf::parse_toolchain_created_by(created_by)
{
tools_components.push(json!({
"type": "application",
"name": name,
"version": version,
"properties": [
{ "name": "droidsaw:source", "value": "manifest-mf-created-by" },
],
}));
}
if let Some(jdk_spec) = attrs.get("Build-Jdk-Spec") {
tools_components.push(json!({
"type": "application",
"name": "JDK",
"version": jdk_spec,
"properties": [
{ "name": "droidsaw:source", "value": "manifest-mf-build-jdk-spec" },
],
}));
} else if let Some(jdk) = attrs.get("Build-Jdk") {
// Fall back to Build-Jdk when Build-Jdk-Spec is absent.
tools_components.push(json!({
"type": "application",
"name": "JDK",
"version": jdk,
"properties": [
{ "name": "droidsaw:source", "value": "manifest-mf-build-jdk" },
],
}));
}
}
metadata.insert(
"tools".into(),
json!({ "components": tools_components }),
);
metadata.insert("component".into(), Value::Object(subject_obj));
// CycloneDX 1.5 field (added in 1.5; 1.6 standardizes the same key).
// Tags the BOM as reconstructed by a downstream binary analyzer —
// not produced by the original build pipeline. Honest about provenance.
metadata.insert(
"lifecycles".into(),
json!([{ "phase": "post-build" }]),
);
// Partial-visibility disclosure — machine-readable flags so
// downstream consumers (humans + tools) know they are looking at
// a reverse-engineered, best-effort inventory, not a build-time
// ground truth. Per-component evidence.identity.confidence
// already varies (0.95 META-INF, 0.85 meta-data, 0.5 hermes,
// 0.4 class-prefix); these flags surface the *why* at the
// BOM-document level.
metadata.insert(
"properties".into(),
json!([
{ "name": "droidsaw:sbom-completeness", "value": "partial" },
{ "name": "droidsaw:extraction-method", "value": "static-binary-analysis" },
{ "name": "droidsaw:source-availability", "value": "none" },
]),
);
if !licenses_spdx.is_empty() {
metadata.insert(
"licenses".into(),
Value::Array(
licenses_spdx
.iter()
.map(|id| json!({ "license": { "id": id } }))
.collect(),
),
);
}
let mut bom = serde_json::Map::new();
bom.insert("bomFormat".into(), json!("CycloneDX"));
bom.insert("specVersion".into(), json!("1.5"));
bom.insert("serialNumber".into(), json!(serial_number));
bom.insert("version".into(), json!(1));
bom.insert("metadata".into(), Value::Object(metadata));
bom.insert("components".into(), Value::Array(components));
// SBOM Stream — OpenVEX claims inline as CycloneDX 1.5
// `vulnerabilities[]`. One entry per (CVE, native-lib) pair that
// droidsaw has an opinion on. CVEs NOT in the curated table emit
// no entry — silence over fabrication. Native libs without
// version_info also emit no entries (no canonical_name to match
// against the curated rules).
let vex_claims =
crate::commands::sbom_openvex::gather_claims(&apk.native_libs);
if !vex_claims.is_empty() {
let vulnerabilities: Vec<Value> = vex_claims
.iter()
.map(crate::commands::sbom_openvex::cyclonedx_vulnerability)
.collect();
bom.insert("vulnerabilities".into(), Value::Array(vulnerabilities));
}
// Compute the strict-CycloneDX subset bytes BEFORE adding _meta or
// _statement, so the Statement's subject digest is over exactly
// what a strict-CDX consumer sees (envelope minus the droidsaw
// extensions). serde_json::Map defaults to BTreeMap → sorted-key
// serialization → byte-identical to what a consumer re-derives by
// sorted-key re-serializing the same envelope minus the extensions.
// Note: `metadata.timestamp` lives inside the strict subset, so the
// subject digest changes per run (the Statement documents that
// run's build event). Within a single run, producer and consumer
// see the same bytes — that's the verification property.
let bom_bytes = serde_json::to_vec(&Value::Object(bom.clone()))
.map_err(|e| anyhow::anyhow!("serialize strict-CycloneDX subset for in-toto Statement: {e}"))?;
let bom_sha256 = sha256_hex(&bom_bytes);
let apk_sha256 = hash_file_sha256(archive_path)?;
let finished_on = chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true);
let statement = build_provenance_statement(
archive_path,
&apk_sha256,
&bom_sha256,
&started_on,
&finished_on,
);
let heuristic_total = heuristic_npm_count
.saturating_add(heuristic_maven_prefix_count)
.saturating_add(hermes_versioned_count);
let meta = json!({
"count": component_count,
"truncated": false,
"hint": "components[] holds purl-bearing fingerprints from META-INF markers, AndroidManifest meta-data, native libs, Hermes string-scan (pkg:npm/<name>), Hermes version-scan (pkg:npm/<name>@<version>), and DEX class-prefix scan (pkg:generic/<class-prefix>). _statement is an unsigned in-toto Statement carrying a SLSA-Provenance v1.0 predicate over the strict CycloneDX subset (not a signed Attestation). Per-component confidence + droidsaw:source property classify provenance; metadata.properties carries BOM-level completeness flags.",
"related": ["info", "manifest", "audit"],
"heuristic_detections": {
"npm_from_hermes_strings_count": heuristic_npm_count,
"npm_from_hermes_versioned_count": hermes_versioned_count,
"maven_class_prefix_count": heuristic_maven_prefix_count,
"count": heuristic_total,
"note": "Heuristic entries are now first-class components[] in this BOM; these counts are summary-only. Filter by component.properties[].name == 'droidsaw:source' to extract per-source subsets.",
},
});
bom.insert("_meta".into(), meta);
bom.insert("_statement".into(), statement);
if !vex_claims.is_empty() {
// Standalone OpenVEX 0.2.0 envelope under `_openvex`, mirroring
// the `_statement` extension pattern. Consumers that need a
// file extract with `jq '._openvex' bom.json > bom.openvex.json`
// — keeps the BOM single-file while letting OpenVEX-native
// tools (Trivy `--vex`, Grype `--vex-add`) consume the
// statement set verbatim.
let openvex =
crate::commands::sbom_openvex::openvex_envelope(&vex_claims, &serial_number);
bom.insert("_openvex".into(), openvex);
}
Ok(Value::Object(bom))
}
#[cfg(test)]
mod tests {
use super::*;
use droidsaw_apk::sdk_inventory::SdkMarker;
use droidsaw_apk::sdk_inventory_metadata::{
IntegerPacking, MetaDataEntry, MetaDataValueKind, rule_for_group_artifact,
};
fn marker(path_contains: &'static str, name: &'static str, category: &'static str) -> SdkMarker {
SdkMarker {
name,
category,
evidence: format!("base:{path_contains}.version"),
path_contains,
}
}
// ── SBOM Stream A — meta-data version + CPE plumbing ──────────────
#[test]
fn marker_group_artifact_extracts_maven_coord() {
let m = marker(
"META-INF/com.google.firebase_firebase-analytics",
"Firebase Analytics",
"analytics",
);
assert_eq!(
marker_group_artifact(&m).as_deref(),
Some("com.google.firebase:firebase-analytics"),
);
}
#[test]
fn marker_group_artifact_returns_none_for_native_so_rule() {
let m = marker("libhermes.so", "React Native (Hermes)", "runtime");
assert!(marker_group_artifact(&m).is_none());
}
#[test]
fn marker_group_artifact_returns_none_for_no_underscore_meta_inf() {
// The Braintree rule has no `_<artifact>` half.
let m = marker("META-INF/com.braintreepayments.api", "Braintree", "payments");
assert!(marker_group_artifact(&m).is_none());
}
#[test]
fn format_cpe_emits_canonical_2_3_shape() {
// CPE 2.3 grammar: cpe:2.3:part:vendor:product:version + 7 fields,
// for 13 colon-separated segments / 12 colons total.
let cpe = format_cpe("google", "play_services", "22.89.0");
assert_eq!(cpe, "cpe:2.3:a:google:play_services:22.89.0:*:*:*:*:*:*:*");
assert_eq!(cpe.matches(':').count(), 12);
}
#[test]
fn manifest_meta_data_component_well_formed() {
let rule = rule_for_group_artifact("com.google.android.gms:play-services-base")
.expect("rule present");
let component = manifest_meta_data_component(
"com.google.android.gms:play-services-base",
"22.89.0",
rule,
)
.expect("well-formed");
assert_eq!(component["type"], "library");
assert_eq!(component["name"], "play-services-base");
assert_eq!(component["version"], "22.89.0");
assert_eq!(
component["purl"],
"pkg:maven/com.google.android.gms/play-services-base@22.89.0",
);
assert_eq!(
component["cpe"],
"cpe:2.3:a:google:play_services:22.89.0:*:*:*:*:*:*:*",
);
let methods = component["evidence"]["identity"]["methods"]
.as_array()
.expect("methods array");
assert_eq!(methods.len(), 1);
assert_eq!(methods[0]["technique"], "manifest-analysis");
assert!(
methods[0]["value"]
.as_str()
.unwrap_or("")
.contains("com.google.android.gms.version"),
"evidence value must name the originating meta-data key",
);
}
#[test]
fn manifest_meta_data_component_omits_cpe_when_template_absent() {
// play-services-maps shares the same meta-data key but carries
// no CPE template — the SDK family is too broad for one CPE.
let rule = rule_for_group_artifact("com.google.android.gms:play-services-maps")
.expect("rule present");
let component = manifest_meta_data_component(
"com.google.android.gms:play-services-maps",
"22.89.0",
rule,
)
.expect("well-formed");
assert!(component.get("cpe").is_none(), "no CPE when template absent");
}
#[test]
fn manifest_meta_data_component_rejects_malformed_group_artifact() {
// Defensive: a synthetic rule with a malformed group_artifact
// should drop rather than emit a half-populated purl.
let rule = MetaDataEntry {
meta_data_key: "x",
group_artifact: ":artifact-only",
cpe_vendor_product: None,
integer_packing: None,
value_kind: MetaDataValueKind::Version,
};
assert!(manifest_meta_data_component(":artifact-only", "1.0", &rule).is_none());
}
#[test]
fn sdk_marker_component_emits_cpe_and_manifest_evidence() {
let m = marker(
"META-INF/com.google.firebase_firebase-analytics",
"Firebase Analytics",
"analytics",
);
let cpe = format_cpe("google", "firebase", "21.5.0");
let component = sdk_marker_component(
&m,
Some("21.5.0"),
"pkg:maven/com.google.firebase/firebase-analytics@21.5.0",
None,
Some(&cpe),
Some("com.google.firebase.messaging.default_notification_icon"),
);
assert_eq!(component["cpe"], cpe);
let methods = component["evidence"]["identity"]["methods"]
.as_array()
.expect("methods array");
assert_eq!(methods.len(), 2, "filename + manifest-analysis entries");
assert_eq!(methods[0]["technique"], "filename");
assert_eq!(methods[1]["technique"], "manifest-analysis");
}
#[test]
fn sdk_marker_component_without_meta_data_evidence_keeps_filename_only() {
let m = marker(
"META-INF/com.bugsnag_bugsnag-android",
"Bugsnag",
"crash",
);
let component = sdk_marker_component(
&m,
Some("5.0.0"),
"pkg:maven/com.bugsnag/bugsnag-android@5.0.0",
None,
None,
None,
);
assert!(component.get("cpe").is_none());
let methods = component["evidence"]["identity"]["methods"]
.as_array()
.expect("methods array");
assert_eq!(methods.len(), 1);
assert_eq!(methods[0]["technique"], "filename");
}
#[test]
fn play_services_packing_integration_round_trip() {
// End-to-end: a synthetic Apk with a meta-data-derived version
// map should produce a standalone component whose version
// matches the decoded MMmmPBBBBB integer.
use droidsaw_apk::sdk_inventory_metadata::decode_play_services_version;
let decoded = decode_play_services_version(22_890_000);
assert_eq!(decoded, "22.89.0");
let rule = rule_for_group_artifact("com.google.android.gms:play-services-base")
.expect("rule present");
let component = manifest_meta_data_component(
"com.google.android.gms:play-services-base",
&decoded,
rule,
)
.expect("well-formed");
assert_eq!(component["version"], "22.89.0");
assert_eq!(rule.integer_packing, Some(IntegerPacking::PlayServices));
}
#[test]
fn purl_maven_with_version() {
let m = marker(
"META-INF/com.google.firebase_firebase-analytics",
"Firebase Analytics",
"analytics",
);
assert_eq!(
marker_purl(&m, Some("21.5.0")).as_deref(),
Some("pkg:maven/com.google.firebase/firebase-analytics@21.5.0"),
);
}
#[test]
fn purl_maven_without_version() {
let m = marker(
"META-INF/com.bugsnag_bugsnag-android",
"Bugsnag",
"crash",
);
assert_eq!(
marker_purl(&m, None).as_deref(),
Some("pkg:maven/com.bugsnag/bugsnag-android"),
);
}
#[test]
fn purl_generic_native_so() {
let m = marker("libhermes.so", "React Native (Hermes)", "runtime");
assert_eq!(
marker_purl(&m, None).as_deref(),
Some("pkg:generic/hermes"),
);
}
#[test]
fn purl_generic_asset_slug() {
let m = marker("assets/flutter_assets", "Flutter", "runtime");
assert_eq!(marker_purl(&m, None).as_deref(), Some("pkg:generic/flutter"));
}
#[test]
fn purl_generic_mlkit_substring_rule() {
let m = marker("mlkit", "ML Kit", "ml");
assert_eq!(marker_purl(&m, None).as_deref(), Some("pkg:generic/ml-kit"));
}
#[test]
fn purl_meta_inf_without_artifact_separator_falls_to_generic() {
// The Braintree rule `META-INF/com.braintreepayments.api` has no
// `_<artifact>` half — `split_once('_')` returns None. Without the
// asset-slug fallthrough, the marker would be silently dropped
// from BOTH `components[]` AND `_meta.heuristic_detections` —
// material data loss for an SDK we successfully detected.
let m = marker(
"META-INF/com.braintreepayments.api",
"Braintree",
"payments",
);
assert_eq!(
marker_purl(&m, None).as_deref(),
Some("pkg:generic/braintree"),
"META-INF/ without `_<artifact>` separator must fall through to asset-slug, not return None",
);
}
#[test]
fn purl_resolves_for_every_rule_in_production_sdk_inventory_table() {
// Drive every rule in `droidsaw_apk::sdk_inventory::RULES` by
// feeding the rule's `path_contains` as a synthetic ZIP entry
// name (substring match). Catches accidental classifier gaps
// when new rules land — a regression test specifically for the
// META-INF/com.braintreepayments.api class of data-loss bugs.
//
// Note: `scan_entries` dedupes by `(name, category)` so rules
// that share a canonical name (Flutter has 2; Mapbox has 2;
// AppsFlyer has 2; Stripe has 2; Sentry has 3; Firebase
// Crashlytics has 2) fold into a single marker. We still cover
// every distinct (name, category) and therefore every rule's
// unique path_contains, since dedup picks the FIRST hit.
use droidsaw_apk::sdk_inventory::scan_entries;
let synth_entries: Vec<&str> = vec![
"assets/flutter_assets/x", "lib/arm64-v8a/libflutter.so",
"assets/www/cordova.js", "assets/capacitor.config.json",
"assets/index.android.bundle", "lib/arm64-v8a/libhermes.so",
"lib/arm64-v8a/libreactnativejni.so", "lib/arm64-v8a/libjsc.so",
"lib/arm64-v8a/libil2cpp.so", "lib/arm64-v8a/libmonosgen-2.0.so",
"assets/bin/Data/Managed/x", "lib/arm64-v8a/libunity.so",
"assets/com/appsflyer/x", "META-INF/com.appsflyer_af-android-sdk.version",
"META-INF/com.adjust.sdk_adjust-android.version",
"META-INF/io.branch.sdk.android_library.version",
"META-INF/io.sentry_sentry-android.version",
"assets/microgram/runtime/sentry/x", "lib/arm64-v8a/libsentry.so",
"META-INF/com.google.firebase_firebase-crashlytics.version",
"lib/arm64-v8a/libcrashlytics.so",
"META-INF/com.bugsnag_bugsnag-android.version",
"lib/arm64-v8a/libbugsnag-ndk.so",
"META-INF/com.google.firebase_firebase-analytics.version",
"META-INF/com.google.firebase_firebase-messaging.version",
"META-INF/com.google.firebase_firebase-auth.version",
"META-INF/com.google.firebase_firebase-database.version",
"META-INF/com.google.firebase_firebase-firestore.version",
"META-INF/com.google.firebase_firebase-storage.version",
"META-INF/com.google.firebase_firebase-config.version",
"META-INF/com.google.android.gms_play-services-maps.version",
"META-INF/com.google.android.gms_play-services-ads.version",
"META-INF/com.google.android.gms_play-services-auth.version",
"META-INF/com.google.android.gms_play-services-location.version",
"META-INF/com.applovin_applovin-sdk.version",
"lib/arm64-v8a/libapplovin_sdk.so",
"META-INF/com.ironsource.sdk_mediationsdk.version",
"lib/arm64-v8a/libunityads.so", "lib/arm64-v8a/libfmod.so",
"lib/arm64-v8a/libstripe.so",
"META-INF/com.stripe_stripe-android.version",
"META-INF/com.braintreepayments.api",
"lib/arm64-v8a/libmapbox-gl.so", "lib/arm64-v8a/libmapbox-common.so",
"lib/arm64-v8a/librealm-jni.so", "lib/arm64-v8a/libsqlcipher.so",
"lib/arm64-v8a/libtensorflowlite_jni.so",
"lib/arm64-v8a/libopencv_java.so", "assets/google-mlkit/x",
"META-INF/com.onesignal_OneSignal.version",
"META-INF/com.urbanairship.android_urbanairship.version",
"META-INF/com.facebook.android_facebook-login.version",
"META-INF/com.facebook.android_facebook-core.version",
];
let markers = scan_entries(synth_entries.iter().copied());
assert!(
markers.len() >= 40,
"expected ~45+ distinct SDK markers from synthesized entries; got {}",
markers.len(),
);
for m in &markers {
assert!(
marker_purl(m, None).is_some(),
"purl classifier returned None for marker name={:?} path_contains={:?}",
m.name, m.path_contains,
);
}
}
#[test]
fn purl_classifier_covers_every_ecosystem_class() {
for (pc, name, cat) in [
("META-INF/com.google.firebase_firebase-analytics", "Firebase Analytics", "analytics"),
("META-INF/com.appsflyer_af-android-sdk", "AppsFlyer", "attribution"),
("META-INF/com.facebook.android_facebook-login", "Facebook Login", "auth"),
("META-INF/com.onesignal_OneSignal", "OneSignal", "push"),
("libflutter.so", "Flutter", "runtime"),
("libsentry.so", "Sentry (native)", "crash"),
("libtensorflowlite_jni.so", "TensorFlow Lite", "ml"),
("libopencv_java", "OpenCV", "ml"),
("assets/flutter_assets", "Flutter", "runtime"),
("assets/com/appsflyer", "AppsFlyer", "attribution"),
("mlkit", "ML Kit", "ml"),
] {
let m = marker(pc, name, cat);
assert!(
marker_purl(&m, None).is_some(),
"purl classifier returned None for rule {pc:?}",
);
}
}
#[test]
fn slugify_collapses_runs() {
assert_eq!(slugify("Firebase Analytics"), "firebase-analytics");
assert_eq!(slugify("React Native (Hermes)"), "react-native-hermes");
assert_eq!(slugify("Airship (Urban Airship)"), "airship-urban-airship");
}
#[test]
fn synthetic_uuid_is_stable() {
let a = synthetic_uuid(b"/tmp/foo.apk");
let b = synthetic_uuid(b"/tmp/foo.apk");
assert_eq!(a, b);
assert!(a.starts_with("urn:uuid:"));
let after = a.strip_prefix("urn:uuid:").expect("prefix present");
let chars: Vec<char> = after.chars().collect();
assert_eq!(chars[14], '4', "v4 marker missing in {a}");
assert!(
['8', '9', 'a', 'b'].contains(&chars[19]),
"RFC-4122 variant nibble wrong in {a}",
);
}
#[test]
fn synthetic_uuid_differs_by_input() {
assert_ne!(synthetic_uuid(b"a"), synthetic_uuid(b"b"));
}
#[test]
fn strip_apk_prefix_handles_split_form() {
assert_eq!(strip_apk_prefix("base:lib/arm64-v8a/libfoo.so"), "lib/arm64-v8a/libfoo.so");
assert_eq!(strip_apk_prefix("config.arm64_v8a:lib/x"), "lib/x");
assert_eq!(strip_apk_prefix("no-prefix-here"), "no-prefix-here");
}
// ── OmniBOR gitoid (Phase 2) ───────────────────────────────────────
#[test]
fn gitoid_empty_input_matches_canonical_kat() {
// Canonical OmniBOR-blob-sha256 vector for empty input.
// Same value as git's SHA-256 empty-blob hash; documented in
// git's SHA-256 transition spec + the OmniBOR test suite.
// Algorithm: SHA-256(b"blob 0\0")
assert_eq!(
gitoid_blob_sha256(b""),
"gitoid:blob:sha256:473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813",
);
}
#[test]
fn gitoid_format_is_well_formed() {
// Every gitoid: gitoid:blob:sha256: + exactly 64 lowercase hex.
for input in [b"" as &[u8], b"hello\n", &[0u8; 64], &[0xffu8; 1024]] {
let g = gitoid_blob_sha256(input);
let suffix = g
.strip_prefix("gitoid:blob:sha256:")
.expect("scheme prefix present");
assert_eq!(suffix.len(), 64, "hex digest must be 64 chars: {g}");
assert!(
suffix.chars().all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()),
"hex digest must be lowercase: {g}",
);
}
}
#[test]
fn gitoid_is_deterministic_per_content() {
assert_eq!(gitoid_blob_sha256(b"hello\n"), gitoid_blob_sha256(b"hello\n"));
assert_eq!(
gitoid_blob_sha256(&[0u8; 128]),
gitoid_blob_sha256(&[0u8; 128]),
);
}
#[test]
fn gitoid_differs_by_content() {
assert_ne!(gitoid_blob_sha256(b"hello\n"), gitoid_blob_sha256(b"world\n"));
// One-byte diff at the tail produces a different gitoid.
assert_ne!(
gitoid_blob_sha256(b"hello\n"),
gitoid_blob_sha256(b"hello"),
);
}
#[test]
fn gitoid_length_separator_is_load_bearing() {
// The `b"blob " + len + b"\0" + content` framing makes
// length-extension-style ambiguity impossible: empty content (len 0)
// and content `b"\0"` (len 1) hash to different values because
// the length field differs.
let g_empty = gitoid_blob_sha256(b"");
let g_one_null = gitoid_blob_sha256(b"\0");
assert_ne!(g_empty, g_one_null);
}
// ── in-toto Statement w/ SLSA-Provenance v1.0 predicate ───────────
#[test]
fn provenance_statement_subject_digest_matches_input() {
let env = build_provenance_statement(
Path::new("/tmp/example.apk"),
"deadbeef".repeat(8).as_str(),
"feedface".repeat(8).as_str(),
"2026-05-23T19:30:00Z",
"2026-05-23T19:30:01Z",
);
assert_eq!(env["_type"], "https://in-toto.io/Statement/v1");
assert_eq!(env["predicateType"], "https://slsa.dev/provenance/v1");
let subject = env["subject"].as_array().expect("subject is array");
assert_eq!(subject.len(), 1, "subject must have exactly one entry");
assert_eq!(subject[0]["name"], "bom.json");
assert_eq!(
subject[0]["digest"]["sha256"],
"feedface".repeat(8).as_str(),
);
let pred = &env["predicate"];
assert_eq!(
pred["buildDefinition"]["externalParameters"]["input"]["digest"]["sha256"],
"deadbeef".repeat(8).as_str(),
);
assert_eq!(
pred["buildDefinition"]["externalParameters"]["input"]["name"],
"example.apk",
);
assert_eq!(pred["runDetails"]["metadata"]["startedOn"], "2026-05-23T19:30:00Z");
assert_eq!(pred["runDetails"]["metadata"]["finishedOn"], "2026-05-23T19:30:01Z");
assert_eq!(pred["runDetails"]["builder"]["id"], "https://droidsaw.com/builder/sbom@v1");
}
#[test]
fn provenance_statement_is_unsigned() {
// Currently emits a bare in-toto Statement (not a signed Attestation).
// An Attestation is a Statement wrapped in a DSSE envelope with
// payloadType + base64(payload) + signatures[]. Verify
// the emit is NOT DSSE-wrapped: no payloadType, no payload, no
// signatures.
let env = build_provenance_statement(
Path::new("/tmp/x.apk"), "a", "b", "t1", "t2",
);
assert!(env.get("payloadType").is_none(), "must not be DSSE-wrapped");
assert!(env.get("payload").is_none(), "must not carry base64 payload");
assert!(env.get("signatures").is_none(), "unsigned Statement must omit signatures");
}
#[test]
fn provenance_statement_omits_empty_invocation_id() {
// SLSA v1.0 `invocationId` is OPTIONAL with URI type; an empty
// string fails the type constraint. Omit the key entirely when
// we have no invocation URI to record.
let env = build_provenance_statement(
Path::new("/tmp/x.apk"), "a", "b", "t1", "t2",
);
let metadata = &env["predicate"]["runDetails"]["metadata"];
assert!(metadata.get("invocationId").is_none(),
"empty invocationId is non-conformant; omit instead");
}
#[test]
fn provenance_statement_build_type_is_urn() {
// URN identifier avoids the validator-noise of a vapor HTTP URL.
// SLSA v1.0 permits any URI form for buildType.
let env = build_provenance_statement(
Path::new("/tmp/x.apk"), "a", "b", "t1", "t2",
);
assert_eq!(
env["predicate"]["buildDefinition"]["buildType"],
"urn:droidsaw:sbom-from-apk:v1",
);
}
// ── Bundled ML model components ───────────────────────────────────
#[test]
fn ml_model_component_emits_type_file_with_namespaced_props() {
let model = droidsaw_apk::ml_models::MlModelFile {
path: "base.apk:assets/models/sample.tflite".to_owned(),
format: droidsaw_apk::ml_models::MlModelFormat::Tflite,
size_bytes: 12_345,
sha256: "a".repeat(64),
};
let c = ml_model_component(&model);
assert_eq!(c["type"], "file");
assert_eq!(c["name"], "sample.tflite");
// purl stays identity-only per droidsaw's existing convention
// (`#sha256:` anchor lives on bom-ref, not on purl).
assert_eq!(c["purl"].as_str(), Some("pkg:generic/sample.tflite"));
assert_eq!(
c["bom-ref"].as_str(),
Some(format!("pkg:generic/sample.tflite#sha256:{}", "a".repeat(64)).as_str()),
);
assert_eq!(
c["hashes"][0]["content"].as_str(),
Some("a".repeat(64).as_str()),
);
let props: Vec<(&str, &str)> = c["properties"]
.as_array()
.expect("properties array")
.iter()
.filter_map(|p| Some((p["name"].as_str()?, p["value"].as_str()?)))
.collect();
assert!(props.contains(&("droidsaw:source", "apk-ml-model")));
assert!(props.contains(&("droidsaw:ml-model:format", "tflite")));
assert!(props.contains(&("droidsaw:ml-model:path", "assets/models/sample.tflite")));
assert!(props.contains(&("droidsaw:ml-model:size-bytes", "12345")));
}
#[test]
fn ml_model_component_strips_apk_prefix_from_inner_path() {
let model = droidsaw_apk::ml_models::MlModelFile {
path: "splits/feature1.apk:assets/llm/weights.gguf".to_owned(),
format: droidsaw_apk::ml_models::MlModelFormat::Gguf,
size_bytes: 1_500_000_000,
sha256: "b".repeat(64),
};
let c = ml_model_component(&model);
assert_eq!(c["name"], "weights.gguf");
assert_eq!(c["purl"].as_str(), Some("pkg:generic/weights.gguf"));
let props: Vec<(&str, &str)> = c["properties"]
.as_array()
.expect("properties array")
.iter()
.filter_map(|p| Some((p["name"].as_str()?, p["value"].as_str()?)))
.collect();
// The full `splits/feature1.apk:...` is stripped to the inner
// path so consumers see the canonical APK-internal location.
assert!(props.contains(&("droidsaw:ml-model:path", "assets/llm/weights.gguf")));
assert!(props.contains(&("droidsaw:ml-model:size-bytes", "1500000000")));
}
#[test]
fn ml_model_component_empty_basename_fallback_uses_sha_prefix() {
// Pathological ZIP entry name that strips to empty basename
// (e.g., attacker-controlled APK with entry name ":") must
// still produce a well-formed purl + bom-ref. Falls back to
// a sha256-derived identifier.
let sha = "c".repeat(64);
let model = droidsaw_apk::ml_models::MlModelFile {
path: "base.apk:".to_owned(),
format: droidsaw_apk::ml_models::MlModelFormat::Tflite,
size_bytes: 0,
sha256: sha.clone(),
};
let c = ml_model_component(&model);
let name = c["name"].as_str().expect("name str");
assert!(
name.starts_with("model-"),
"empty-basename must fall back to sha-derived name; got {name}",
);
// Purl must still be a valid pkg:generic with non-empty name.
let purl = c["purl"].as_str().expect("purl str");
assert_eq!(purl, format!("pkg:generic/{name}"));
}
// ── Heuristic-detection promotion ─────────────────────────────────
#[test]
fn hermes_npm_component_uses_name_only_purl_and_marks_source() {
let c = hermes_npm_component("@datadog/mobile-react-native");
assert_eq!(c["type"], "library");
assert_eq!(c["purl"], "pkg:npm/@datadog/mobile-react-native");
assert!(
c.get("version").is_none(),
"name-only heuristic must not synthesize a version",
);
let props: Vec<&str> = c["properties"]
.as_array()
.expect("properties")
.iter()
.filter_map(|p| p["name"].as_str())
.collect();
assert!(props.contains(&"droidsaw:source"));
assert!(props.contains(&"droidsaw:versioned"));
let methods = c["evidence"]["identity"]["methods"]
.as_array()
.expect("methods array");
assert_eq!(methods.len(), 1);
assert_eq!(methods[0]["technique"], "code-string-scan");
}
#[test]
fn dex_sdk_version_component_exact_match_emits_generic_purl_and_ast_fingerprint() {
use droidsaw_apk::dex_sdk_versions::{DexSdkVersion, DexSdkVersionTechnique};
let sdk = DexSdkVersion {
canonical_name: "onesignal-android-sdk".into(),
version: "5.1.6".into(),
confidence: 0.95,
technique: DexSdkVersionTechnique::ExactStaticField,
cpe_vendor_product: Some(("onesignal", "onesignal_sdk")),
};
let c = dex_sdk_version_component(&sdk);
assert_eq!(c["purl"], "pkg:generic/onesignal-android-sdk@5.1.6");
assert_eq!(c["name"], "onesignal-android-sdk");
assert_eq!(c["version"], "5.1.6");
assert_eq!(c["cpe"], "cpe:2.3:a:onesignal:onesignal_sdk:5.1.6:*:*:*:*:*:*:*");
assert_eq!(
c["evidence"]["identity"]["methods"][0]["technique"],
"ast-fingerprint",
);
let conf = c["evidence"]["identity"]["confidence"].as_f64().unwrap_or(0.0);
assert!((conf - 0.95).abs() < 1e-6);
}
#[test]
fn dex_sdk_version_component_obfuscation_fallback_emits_source_code_analysis_technique() {
use droidsaw_apk::dex_sdk_versions::{DexSdkVersion, DexSdkVersionTechnique};
let sdk = DexSdkVersion {
canonical_name: "amplitude-android-sdk".into(),
version: "2.39.0".into(),
confidence: 0.6,
technique: DexSdkVersionTechnique::ObfuscationFallback,
cpe_vendor_product: None,
};
let c = dex_sdk_version_component(&sdk);
assert_eq!(c["purl"], "pkg:generic/amplitude-android-sdk@2.39.0");
assert!(c.get("cpe").is_none(), "cpe must be absent when vendor/product unknown");
assert_eq!(
c["evidence"]["identity"]["methods"][0]["technique"],
"source-code-analysis",
);
let conf = c["evidence"]["identity"]["confidence"].as_f64().unwrap_or(0.0);
assert!((conf - 0.6).abs() < 1e-6);
}
#[test]
fn dex_class_prefix_component_uses_generic_purl_shape() {
let c = dex_class_prefix_component("com.facebook");
// pkg:generic/<prefix> is the honest claim: we observed a class
// prefix but do not know the maven coord. The prior shape
// pkg:maven/<group>/UNKNOWN asserted a false maven provenance
// and used a purl-spec-illegal sentinel for the artifact half.
assert_eq!(c["purl"], "pkg:generic/com.facebook");
assert_eq!(c["name"], "com.facebook");
// No UNKNOWN substring anywhere in the emitted component.
let serialized = serde_json::to_string(&c).expect("serialize");
assert!(
!serialized.contains("UNKNOWN"),
"class-prefix component must not emit the UNKNOWN sentinel anywhere",
);
// Class-prefix confidence is intentionally lower than hermes
// string-scan because a prefix alone is more ambiguous.
let conf = c["evidence"]["identity"]["confidence"].as_f64().unwrap_or(1.0);
assert!(conf < 0.5, "class-prefix confidence must be < hermes scan");
// The provenance properties make the "we don't know the artifact"
// signal explicit for downstream filters.
let props: Vec<(&str, &str)> = c["properties"]
.as_array()
.expect("properties array")
.iter()
.filter_map(|p| Some((p["name"].as_str()?, p["value"].as_str()?)))
.collect();
assert!(props.contains(&("droidsaw:source", "dex-class-prefix")));
assert!(props.contains(&("droidsaw:detection-shape", "class-prefix-only")));
let methods = c["evidence"]["identity"]["methods"]
.as_array()
.expect("methods array");
assert_eq!(methods[0]["technique"], "dex-class-prefix-scan");
assert!(
methods[0]["value"]
.as_str()
.unwrap_or("")
.contains("Lcom/facebook"),
"evidence value should show JVM-internal class path form",
);
}
// ── Native lib version emit (Stream D) ────────────────────────────
fn native_lib_with_version(version: &str, canonical: &str, soname: Option<&str>) -> droidsaw_apk::apk::NativeLib {
droidsaw_apk::apk::NativeLib {
path: "base.apk:lib/arm64-v8a/libssl.so".to_owned(),
name: "libssl.so".to_owned(),
abi: "arm64-v8a".to_owned(),
size: 1024,
soname: soname.map(str::to_owned),
version_info: Some(droidsaw_apk::native_lib_versions::NativeLibVersion {
canonical_name: canonical.to_owned(),
version: version.to_owned(),
matched_string: format!("{canonical} {version} stub matched-string"),
cpe_vendor_product: Some(("openssl".to_owned(), "openssl".to_owned())),
}),
}
}
fn native_lib_without_version() -> droidsaw_apk::apk::NativeLib {
droidsaw_apk::apk::NativeLib {
path: "base.apk:lib/arm64-v8a/libfoo.so".to_owned(),
name: "libfoo.so".to_owned(),
abi: "arm64-v8a".to_owned(),
size: 1024,
soname: None,
version_info: None,
}
}
/// Build a tiny in-memory ZIP that contains the named entry so
/// `hash_archive_entry` has a real archive to read from. Returns a
/// `ZipArchive<File>` rooted on a tempfile that the caller is
/// responsible for letting drop at end of scope.
#[allow(
clippy::unwrap_used,
clippy::expect_used,
reason = "test-only helper; not subject to deny-lint floor"
)]
fn synth_archive_with_entry(entry_path: &str, content: &[u8]) -> ZipArchive<File> {
use std::io::Write as _;
let pid = std::process::id();
let path = std::env::temp_dir().join(format!("sbom-streamd-{pid}-{}.zip", entry_path.replace('/', "_")));
let file = File::create(&path).unwrap();
let mut zw = zip::ZipWriter::new(file);
let opts = zip::write::SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Stored);
zw.start_file(entry_path, opts).unwrap();
zw.write_all(content).unwrap();
zw.finish().unwrap();
ZipArchive::new(File::open(&path).unwrap()).unwrap()
}
#[test]
fn native_lib_component_with_version_promotes_purl_and_adds_evidence() {
let lib = native_lib_with_version("1.1.1a", "openssl", Some("libssl.so.1.1"));
let mut ar = synth_archive_with_entry("lib/arm64-v8a/libssl.so", &[0u8; 32]);
let component = native_lib_component(&lib, &mut ar);
assert_eq!(component["version"], "1.1.1a");
assert_eq!(component["purl"], "pkg:generic/openssl@1.1.1a");
assert_eq!(
component["cpe"],
"cpe:2.3:a:openssl:openssl:1.1.1a:*:*:*:*:*:*:*",
"CPE 2.3 is the load-bearing identifier for Grype NVD matching",
);
let methods = component["evidence"]["identity"]["methods"]
.as_array()
.expect("methods array");
assert_eq!(methods.len(), 2, "filename + binary-string-scan entries");
assert_eq!(methods[1]["technique"], "binary-string-scan");
let props: Vec<(&str, &str)> = component["properties"]
.as_array()
.expect("properties array")
.iter()
.filter_map(|p| Some((p["name"].as_str()?, p["value"].as_str()?)))
.collect();
assert!(props.contains(&("soname", "libssl.so.1.1")));
assert!(props.contains(&("droidsaw:canonical-name", "openssl")));
}
#[test]
fn native_lib_component_without_version_keeps_legacy_purl() {
let lib = native_lib_without_version();
let mut ar = synth_archive_with_entry("lib/arm64-v8a/libfoo.so", &[0u8; 32]);
let component = native_lib_component(&lib, &mut ar);
assert!(component.get("version").is_none());
assert_eq!(component["purl"], "pkg:generic/arm64-v8a/libfoo.so");
let methods = component["evidence"]["identity"]["methods"]
.as_array()
.expect("methods array");
assert_eq!(methods.len(), 1, "filename entry only");
let props_names: Vec<&str> = component["properties"]
.as_array()
.expect("properties array")
.iter()
.filter_map(|p| p["name"].as_str())
.collect();
assert!(!props_names.contains(&"soname"));
assert!(!props_names.contains(&"droidsaw:canonical-name"));
}
// ── Hermes version scan (Stream G) ────────────────────────────────
#[test]
fn parse_npm_versioned_finds_scoped_coord() {
let s = "...@eppo/buffer@6.2.0 trailing";
let at = s.find('@').and_then(|p| s[p.saturating_add(1)..].find('@')).map(|p| p + 1).unwrap_or(0) + 1;
let _ = at; // anchor at the version '@', second one
// The version '@' is at the position right before "6"
let at_v = s.rfind('@').unwrap();
let (name, version) = parse_npm_versioned_at(s, at_v).expect("parsed");
assert_eq!(name, "@eppo/buffer");
assert_eq!(version, "6.2.0");
}
#[test]
fn parse_npm_versioned_finds_unscoped_coord() {
let s = "leading text buffer@6.2.0 rest";
let at_v = s.rfind('@').unwrap();
let (name, version) = parse_npm_versioned_at(s, at_v).expect("parsed");
assert_eq!(name, "buffer");
assert_eq!(version, "6.2.0");
}
#[test]
fn parse_npm_versioned_rejects_short_version() {
// Single-dot version isn't semver; should reject.
let s = "foo@1.5";
let at_v = s.rfind('@').unwrap();
assert!(parse_npm_versioned_at(s, at_v).is_none());
}
#[test]
fn parse_npm_versioned_handles_letter_suffix() {
let s = "openssl@1.1.1a tail";
let at_v = s.rfind('@').unwrap();
let (name, version) = parse_npm_versioned_at(s, at_v).expect("parsed");
assert_eq!(name, "openssl");
assert_eq!(version, "1.1.1a");
}
#[test]
fn is_likely_npm_name_filters_noise() {
assert!(is_likely_npm_name("react-native-firebase"));
assert!(is_likely_npm_name("@react-navigation/elements"));
assert!(!is_likely_npm_name(""));
assert!(!is_likely_npm_name("a"));
assert!(!is_likely_npm_name("123"));
assert!(!is_likely_npm_name("--"));
}
#[test]
fn hermes_npm_versioned_component_emits_proper_purl_and_evidence() {
let c = hermes_npm_versioned_component("@eppo/buffer", "6.2.0");
assert_eq!(c["purl"], "pkg:npm/@eppo/buffer@6.2.0");
assert_eq!(c["version"], "6.2.0");
let methods = c["evidence"]["identity"]["methods"]
.as_array()
.expect("methods array");
assert_eq!(methods[0]["technique"], "code-string-scan");
let props: Vec<(&str, &str)> = c["properties"]
.as_array()
.expect("properties array")
.iter()
.filter_map(|p| Some((p["name"].as_str()?, p["value"].as_str()?)))
.collect();
assert!(props.contains(&("droidsaw:source", "hermes-version-scan")));
assert!(props.contains(&("droidsaw:versioned", "true")));
}
// ── Stream C — nested-JAR cataloger ───────────────────────────────
#[allow(
clippy::unwrap_used,
reason = "test-only helper; not subject to deny-lint floor"
)]
fn empty_archive() -> ZipArchive<File> {
use std::io::Write as _;
// Unique path per call: tests in this binary run concurrently, so a
// pid-only key is shared by every caller and the create/read pair
// races (observed as UnexpectedEof in ZipArchive::new).
static NEXT: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0);
let n = NEXT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
let pid = std::process::id();
let path = std::env::temp_dir().join(format!("sbom-streamc-{pid}-{n}.zip"));
let file = File::create(&path).unwrap();
let mut zw = zip::ZipWriter::new(file);
let opts = zip::write::SimpleFileOptions::default()
.compression_method(zip::CompressionMethod::Stored);
zw.start_file("placeholder", opts).unwrap();
zw.write_all(b"").unwrap();
zw.finish().unwrap();
ZipArchive::new(File::open(&path).unwrap()).unwrap()
}
#[test]
fn nested_jar_component_with_maven_coord_emits_full_purl() {
let jar = droidsaw_apk::nested_jar_cataloger::NestedJarMetadata {
sha256: "0000000000000000000000000000000000000000000000000000000000000000".to_owned(),
path: "base.apk:libs/example.jar".to_owned(),
maven_coord: Some(droidsaw_apk::nested_jar_cataloger::MavenCoord {
group_id: "com.example".to_owned(),
artifact_id: "foo".to_owned(),
version: "1.2.3".to_owned(),
}),
manifest_meta: None,
};
let mut ar = empty_archive();
let c = nested_jar_component(&jar, &mut ar).expect("emitted");
assert_eq!(c["purl"], "pkg:maven/com.example/foo@1.2.3");
assert_eq!(c["name"], "foo");
assert_eq!(c["version"], "1.2.3");
let props: Vec<(&str, &str)> = c["properties"]
.as_array()
.expect("properties array")
.iter()
.filter_map(|p| Some((p["name"].as_str()?, p["value"].as_str()?)))
.collect();
assert!(props.contains(&("droidsaw:source", "nested-jar-pom-properties")));
}
#[test]
fn nested_jar_component_with_manifest_only_falls_back_to_generic() {
let jar = droidsaw_apk::nested_jar_cataloger::NestedJarMetadata {
sha256: "0000000000000000000000000000000000000000000000000000000000000000".to_owned(),
path: "base.apk:libs/legacy.jar".to_owned(),
maven_coord: None,
manifest_meta: Some(droidsaw_apk::nested_jar_cataloger::ManifestMeta {
title: Some("LegacyLib".to_owned()),
version: Some("9.8.7".to_owned()),
}),
};
let mut ar = empty_archive();
let c = nested_jar_component(&jar, &mut ar).expect("emitted");
assert_eq!(c["purl"], "pkg:generic/legacy@9.8.7");
assert_eq!(c["name"], "LegacyLib");
let props: Vec<(&str, &str)> = c["properties"]
.as_array()
.expect("properties array")
.iter()
.filter_map(|p| Some((p["name"].as_str()?, p["value"].as_str()?)))
.collect();
assert!(props.contains(&("droidsaw:source", "nested-jar-manifest-mf")));
}
#[test]
fn nested_jar_component_with_no_metadata_emits_none() {
let jar = droidsaw_apk::nested_jar_cataloger::NestedJarMetadata {
sha256: "0000000000000000000000000000000000000000000000000000000000000000".to_owned(),
path: "base.apk:assets/main.jar".to_owned(),
maven_coord: None,
manifest_meta: None,
};
let mut ar = empty_archive();
assert!(
nested_jar_component(&jar, &mut ar).is_none(),
"JAR with no attribution must not emit a component (honest absence)",
);
}
#[test]
fn nested_jar_component_with_manifest_title_but_no_version_emits_none() {
// Title without version → can't synthesize a usable purl
// (need @version suffix per Stream-A discipline).
let jar = droidsaw_apk::nested_jar_cataloger::NestedJarMetadata {
sha256: "0000000000000000000000000000000000000000000000000000000000000000".to_owned(),
path: "base.apk:libs/x.jar".to_owned(),
maven_coord: None,
manifest_meta: Some(droidsaw_apk::nested_jar_cataloger::ManifestMeta {
title: Some("X".to_owned()),
version: None,
}),
};
let mut ar = empty_archive();
assert!(nested_jar_component(&jar, &mut ar).is_none());
}
}