use droidsaw_apk::Manifest;
use droidsaw_common::{Finding, Severity};
use serde_json::{json, Value};
use crate::context::CrossLayerContext;
use super::{collect_apk_findings, meta, progress, to_hex, DEFAULT_ENTROPY_THRESHOLD_BITS};
fn parse_severity(s: &str) -> anyhow::Result<Severity> {
match s.to_lowercase().as_str() {
"critical" => Ok(Severity::Critical),
"high" => Ok(Severity::High),
"medium" | "med" => Ok(Severity::Medium),
"low" => Ok(Severity::Low),
"info" => Ok(Severity::Info),
other => anyhow::bail!(
"--min-severity must be one of: critical, high, medium, low, info (got {other})"
),
}
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`u32 → i64` widening (target_sdk_int, key_size_bits); `bool → i64` well-defined as 0/1; `u64 → i64` narrowing for SystemTime::as_secs (safe until year 2262) and fs::metadata::len (file sizes ≪ i64::MAX). Skipped /failed/ingested counters are display-only and handled in the saturating commit.")]
pub fn corpus_ingest(
paths: &[std::path::PathBuf],
output: &str,
tag: Option<&str>,
skip_existing: bool,
) -> anyhow::Result<Value> {
let mut apk_paths: Vec<std::path::PathBuf> = Vec::new();
for p in paths {
if p.is_dir() {
for entry in walkdir::WalkDir::new(p).into_iter().filter_map(|e| e.ok()) {
if entry.file_type().is_file()
&& entry.path().extension().map(|e| e == "apk").unwrap_or(false)
{
apk_paths.push(entry.path().to_path_buf());
}
}
} else {
apk_paths.push(p.clone());
}
}
apk_paths.sort();
progress!("corpus_ingest: {:?} apk paths", apk_paths.len());
let mut db = rusqlite::Connection::open(output)?;
db.execute_batch(
"
CREATE TABLE IF NOT EXISTS apks (
path TEXT PRIMARY KEY,
tag TEXT,
package TEXT,
version_code TEXT,
version_name TEXT,
min_sdk TEXT,
target_sdk_int INTEGER,
debuggable INTEGER,
allow_backup INTEGER,
file_size INTEGER,
ingested_at INTEGER,
parse_error TEXT
);
CREATE TABLE IF NOT EXISTS signers (
apk_path TEXT,
scheme TEXT,
cert_sha256 TEXT,
subject TEXT,
issuer TEXT,
serial TEXT,
not_before TEXT,
not_after TEXT,
is_expired INTEGER,
is_self_signed INTEGER,
key_algorithm TEXT,
key_size_bits INTEGER,
ec_curve TEXT,
signature_algorithm TEXT,
rsa_modulus_hex TEXT,
PRIMARY KEY (apk_path, scheme, cert_sha256)
);
CREATE INDEX IF NOT EXISTS idx_signers_fingerprint ON signers(cert_sha256);
CREATE INDEX IF NOT EXISTS idx_signers_key ON signers(key_algorithm, key_size_bits);
CREATE INDEX IF NOT EXISTS idx_apks_package ON apks(package);
",
)?;
let tag_str = tag.unwrap_or("");
#[allow(
clippy::cast_possible_wrap,
reason = "INTENT: SystemTime::now().duration_since(UNIX_EPOCH) returns Duration with u64 secs; SQLite INTEGER columns are i64, so widening reinterpretation is the required encoding. Realistic timestamps (year 2026+) are well below i64::MAX (year ~292277024596)."
)]
let now = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() as i64)
.unwrap_or(0);
let mut ingested = 0usize;
let mut skipped = 0usize;
let mut failed = 0usize;
let mut signer_rows = 0usize;
let tx = db.transaction()?;
for path in &apk_paths {
let per_path_hash = CrossLayerContext::hash_path(path);
droidsaw_common::diag::with_input_hash(&per_path_hash, || -> anyhow::Result<()> {
let path_str = path.to_string_lossy().into_owned();
if skip_existing {
let exists: i64 = tx
.query_row(
"SELECT COUNT(*) FROM apks WHERE path = ?1 AND parse_error IS NULL",
rusqlite::params![&path_str],
|row| row.get(0),
)
.unwrap_or(0);
if exists > 0 {
skipped = skipped.saturating_add(1);
return Ok(());
}
}
#[allow(
clippy::cast_possible_wrap,
reason = "INTENT: file_size encoded as i64 for SQLite INTEGER column. APK files are bounded by ZIP central-directory addressability (<2^48 in practice); always within i64::MAX."
)]
let file_size = std::fs::metadata(path).map(|m| m.len() as i64).unwrap_or(0);
let apk = match droidsaw_apk::Apk::parse(path) {
Ok(a) => a,
Err(e) => {
tx.execute(
"INSERT OR REPLACE INTO apks (path, tag, file_size, ingested_at, parse_error) VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![&path_str, tag_str, file_size, now, e.to_string()],
)?;
failed = failed.saturating_add(1);
return Ok(());
}
};
let (package, version_code, version_name, min_sdk, target_sdk_int, debuggable, allow_backup) =
match apk
.manifest_raw
.as_ref()
.and_then(|raw| droidsaw_apk::Manifest::from_binary_xml(raw).ok())
{
Some(m) => (
Some(m.package),
Some(m.version_code),
Some(m.version_name),
Some(m.min_sdk),
Some(i64::from(m.target_sdk_int)),
Some(i64::from(m.debuggable)),
m.allow_backup.map(i64::from),
),
None => (None, None, None, None, None, None, None),
};
tx.execute(
"INSERT OR REPLACE INTO apks (path, tag, package, version_code, version_name, min_sdk, target_sdk_int, debuggable, allow_backup, file_size, ingested_at, parse_error) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, NULL)",
rusqlite::params![
&path_str,
tag_str,
package,
version_code,
version_name,
min_sdk,
target_sdk_int,
debuggable,
allow_backup,
file_size,
now,
],
)?;
let signing = match apk.signing_info() {
Ok(s) => s,
Err(_) => {
ingested = ingested.saturating_add(1);
return Ok(());
}
};
if let Some(cert) = &signing.v1_cert {
tx.execute(
"INSERT OR REPLACE INTO signers (apk_path, scheme, cert_sha256, subject, issuer, serial, not_before, not_after, is_expired, is_self_signed, key_algorithm, key_size_bits, ec_curve, signature_algorithm, rsa_modulus_hex) VALUES (?1, 'V1', ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14)",
rusqlite::params![
&path_str,
&cert.sha256_fingerprint,
&cert.subject,
&cert.issuer,
&cert.serial,
&cert.not_before,
&cert.not_after,
i64::from(cert.is_expired_at(std::time::SystemTime::now())),
i64::from(signing.v1_self_signed),
&cert.key_algorithm,
i64::from(cert.key_size_bits),
if cert.ec_curve.is_empty() {
None
} else {
Some(cert.ec_curve.as_str())
},
&cert.signature_algorithm,
to_hex(&cert.rsa_modulus),
],
)?;
signer_rows = signer_rows.saturating_add(1);
}
for s in &signing.signers {
let scheme = format!("{:?}", s.scheme);
let fp = s.cert_sha256.clone().unwrap_or_default();
if fp.is_empty() {
continue;
}
tx.execute(
"INSERT OR REPLACE INTO signers (apk_path, scheme, cert_sha256, subject, issuer, serial, not_before, not_after, is_expired, is_self_signed, key_algorithm, key_size_bits, ec_curve, signature_algorithm, rsa_modulus_hex) VALUES (?1, ?2, ?3, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ?4, ?5, ?6, ?7, ?8)",
rusqlite::params![
&path_str,
&scheme,
&fp,
&s.public_key_algorithm,
i64::from(s.public_key_size_bits),
if s.public_key_ec_curve.is_empty() {
None
} else {
Some(s.public_key_ec_curve.as_str())
},
s.signature_algorithms.first().cloned().unwrap_or_default(),
to_hex(&s.public_key_rsa_modulus),
],
)?;
signer_rows = signer_rows.saturating_add(1);
}
ingested = ingested.saturating_add(1);
Ok(())
})?;
}
tx.commit()?;
progress!(
"corpus_ingest: ingested={ingested:?} skipped={skipped:?} failed={failed:?} signer_rows={signer_rows:?}"
);
Ok(json!({
"output": output,
"scanned": apk_paths.len(),
"ingested": ingested,
"skipped": skipped,
"failed": failed,
"signer_rows": signer_rows,
"_meta": meta(
1,
false,
"query with `sqlite3 <output>`; idx_signers_fingerprint indexes cert_sha256 for batch-GCD and signer-pivot queries",
&["scan-corpus", "signing", "sbom"],
),
}))
}
pub fn scan_corpus(
paths: &[std::path::PathBuf],
min_severity: &str,
out: &mut dyn std::io::Write,
) -> anyhow::Result<()> {
let threshold = parse_severity(min_severity)?;
let mut apk_paths: Vec<std::path::PathBuf> = Vec::new();
for p in paths {
if p.is_dir() {
for entry in walkdir::WalkDir::new(p).into_iter().filter_map(|e| e.ok()) {
if entry.file_type().is_file()
&& entry.path().extension().map(|e| e == "apk").unwrap_or(false)
{
apk_paths.push(entry.path().to_path_buf());
}
}
} else {
apk_paths.push(p.clone());
}
}
for p in &apk_paths {
let per_path_hash = CrossLayerContext::hash_path(p);
let result: anyhow::Result<()> = droidsaw_common::diag::with_input_hash(&per_path_hash, || {
match CrossLayerContext::parse(p, None) {
Ok(ctx) => {
let package = ctx
.apk
.as_ref()
.and_then(|a| a.manifest_raw.as_ref())
.and_then(|r| Manifest::from_binary_xml(r).ok())
.map(|m| m.package)
.unwrap_or_default();
let findings: Vec<Finding> = collect_apk_findings(&ctx, DEFAULT_ENTROPY_THRESHOLD_BITS);
for f in findings {
if f.severity <= threshold {
let line = json!({
"path": p.to_string_lossy(),
"package": package,
"finding": f,
});
writeln!(out, "{}", serde_json::to_string(&line)?)?;
}
}
}
Err(e) => {
progress!("could not parse {:?}: {:?}", p, e);
}
}
Ok(())
});
result?;
}
Ok(())
}