droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
use droidsaw_apk::Manifest;
use droidsaw_common::{Finding, Severity};
use serde_json::{json, Value};

use crate::context::CrossLayerContext;

use super::{collect_apk_findings, meta, progress, to_hex, DEFAULT_ENTROPY_THRESHOLD_BITS};

/// Parse a `--min-severity` CLI argument into a `Severity` threshold. Used by
/// `scan_corpus` and any other batch path that needs a severity filter.
fn parse_severity(s: &str) -> anyhow::Result<Severity> {
    match s.to_lowercase().as_str() {
        "critical" => Ok(Severity::Critical),
        "high" => Ok(Severity::High),
        "medium" | "med" => Ok(Severity::Medium),
        "low" => Ok(Severity::Low),
        "info" => Ok(Severity::Info),
        other => anyhow::bail!(
            "--min-severity must be one of: critical, high, medium, low, info (got {other})"
        ),
    }
}

/// Ingest a directory of APKs into a SQLite corpus database. Writes
/// two tables: `apks` (one row per APK with package/version/sdk/flags)
/// and `signers` (one row per signer with raw key material). Re-runs
/// are safe: rows are upserted, and APKs already present are skipped
/// unless `skip_existing` is false.
///
/// Exposed symmetrically via CLI (`droidsaw corpus-ingest`) and MCP.
/// Designed for batch-GCD and supply-chain analysis across an APK corpus.
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`u32 → i64` widening (target_sdk_int, key_size_bits); `bool → i64` well-defined as 0/1; `u64 → i64` narrowing for SystemTime::as_secs (safe until year 2262) and fs::metadata::len (file sizes ≪ i64::MAX). Skipped /failed/ingested counters are display-only and handled in the saturating commit.")]
pub fn corpus_ingest(
    paths: &[std::path::PathBuf],
    output: &str,
    tag: Option<&str>,
    skip_existing: bool,
) -> anyhow::Result<Value> {
    // Expand directories via walkdir — same pattern as scan_corpus.
    let mut apk_paths: Vec<std::path::PathBuf> = Vec::new();
    for p in paths {
        if p.is_dir() {
            for entry in walkdir::WalkDir::new(p).into_iter().filter_map(|e| e.ok()) {
                if entry.file_type().is_file()
                    && entry.path().extension().map(|e| e == "apk").unwrap_or(false)
                {
                    apk_paths.push(entry.path().to_path_buf());
                }
            }
        } else {
            apk_paths.push(p.clone());
        }
    }
    apk_paths.sort();
    progress!("corpus_ingest: {:?} apk paths", apk_paths.len());

    let mut db = rusqlite::Connection::open(output)?;
    db.execute_batch(
        "
        CREATE TABLE IF NOT EXISTS apks (
            path TEXT PRIMARY KEY,
            tag TEXT,
            package TEXT,
            version_code TEXT,
            version_name TEXT,
            min_sdk TEXT,
            target_sdk_int INTEGER,
            debuggable INTEGER,
            allow_backup INTEGER,
            file_size INTEGER,
            ingested_at INTEGER,
            parse_error TEXT
        );
        CREATE TABLE IF NOT EXISTS signers (
            apk_path TEXT,
            scheme TEXT,
            cert_sha256 TEXT,
            subject TEXT,
            issuer TEXT,
            serial TEXT,
            not_before TEXT,
            not_after TEXT,
            is_expired INTEGER,
            is_self_signed INTEGER,
            key_algorithm TEXT,
            key_size_bits INTEGER,
            ec_curve TEXT,
            signature_algorithm TEXT,
            rsa_modulus_hex TEXT,
            PRIMARY KEY (apk_path, scheme, cert_sha256)
        );
        CREATE INDEX IF NOT EXISTS idx_signers_fingerprint ON signers(cert_sha256);
        CREATE INDEX IF NOT EXISTS idx_signers_key ON signers(key_algorithm, key_size_bits);
        CREATE INDEX IF NOT EXISTS idx_apks_package ON apks(package);
        ",
    )?;

    let tag_str = tag.unwrap_or("");
    #[allow(
        clippy::cast_possible_wrap,
        reason = "INTENT: SystemTime::now().duration_since(UNIX_EPOCH) returns Duration with u64 secs; SQLite INTEGER columns are i64, so widening reinterpretation is the required encoding. Realistic timestamps (year 2026+) are well below i64::MAX (year ~292277024596)."
    )]
    let now = std::time::SystemTime::now()
        .duration_since(std::time::UNIX_EPOCH)
        .map(|d| d.as_secs() as i64)
        .unwrap_or(0);

    let mut ingested = 0usize;
    let mut skipped = 0usize;
    let mut failed = 0usize;
    let mut signer_rows = 0usize;

    let tx = db.transaction()?;
    for path in &apk_paths {
        // Per-path scope so a panic on path N lands in a bundle named for
        // N's hash, not the first-path's hash or `unknown-*`.
        let per_path_hash = CrossLayerContext::hash_path(path);
        droidsaw_common::diag::with_input_hash(&per_path_hash, || -> anyhow::Result<()> {
            let path_str = path.to_string_lossy().into_owned();

            if skip_existing {
                let exists: i64 = tx
                    .query_row(
                        "SELECT COUNT(*) FROM apks WHERE path = ?1 AND parse_error IS NULL",
                        rusqlite::params![&path_str],
                        |row| row.get(0),
                    )
                    .unwrap_or(0);
                if exists > 0 {
                    // DISPLAY-ONLY: corpus-ingest progress counter.
                    skipped = skipped.saturating_add(1);
                    return Ok(());
                }
            }

            #[allow(
                clippy::cast_possible_wrap,
                reason = "INTENT: file_size encoded as i64 for SQLite INTEGER column. APK files are bounded by ZIP central-directory addressability (<2^48 in practice); always within i64::MAX."
            )]
            let file_size = std::fs::metadata(path).map(|m| m.len() as i64).unwrap_or(0);

            let apk = match droidsaw_apk::Apk::parse(path) {
                Ok(a) => a,
                Err(e) => {
                    tx.execute(
                        "INSERT OR REPLACE INTO apks (path, tag, file_size, ingested_at, parse_error) VALUES (?1, ?2, ?3, ?4, ?5)",
                        rusqlite::params![&path_str, tag_str, file_size, now, e.to_string()],
                    )?;
                    // DISPLAY-ONLY: corpus-ingest progress counter.
                    failed = failed.saturating_add(1);
                    return Ok(());
                }
            };

            let (package, version_code, version_name, min_sdk, target_sdk_int, debuggable, allow_backup) =
                match apk
                    .manifest_raw
                    .as_ref()
                    .and_then(|raw| droidsaw_apk::Manifest::from_binary_xml(raw).ok())
                {
                    Some(m) => (
                        Some(m.package),
                        Some(m.version_code),
                        Some(m.version_name),
                        Some(m.min_sdk),
                        Some(i64::from(m.target_sdk_int)),
                        Some(i64::from(m.debuggable)),
                        m.allow_backup.map(i64::from),
                    ),
                    None => (None, None, None, None, None, None, None),
                };

            tx.execute(
                "INSERT OR REPLACE INTO apks (path, tag, package, version_code, version_name, min_sdk, target_sdk_int, debuggable, allow_backup, file_size, ingested_at, parse_error) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, NULL)",
                rusqlite::params![
                    &path_str,
                    tag_str,
                    package,
                    version_code,
                    version_name,
                    min_sdk,
                    target_sdk_int,
                    debuggable,
                    allow_backup,
                    file_size,
                    now,
                ],
            )?;

            let signing = match apk.signing_info() {
                Ok(s) => s,
                Err(_) => {
                    // DISPLAY-ONLY: corpus-ingest progress counter.
                    ingested = ingested.saturating_add(1);
                    return Ok(());
                }
            };

            if let Some(cert) = &signing.v1_cert {
                tx.execute(
                    "INSERT OR REPLACE INTO signers (apk_path, scheme, cert_sha256, subject, issuer, serial, not_before, not_after, is_expired, is_self_signed, key_algorithm, key_size_bits, ec_curve, signature_algorithm, rsa_modulus_hex) VALUES (?1, 'V1', ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14)",
                    rusqlite::params![
                        &path_str,
                        &cert.sha256_fingerprint,
                        &cert.subject,
                        &cert.issuer,
                        &cert.serial,
                        &cert.not_before,
                        &cert.not_after,
                        i64::from(cert.is_expired_at(std::time::SystemTime::now())),
                        i64::from(signing.v1_self_signed),
                        &cert.key_algorithm,
                        i64::from(cert.key_size_bits),
                        if cert.ec_curve.is_empty() {
                            None
                        } else {
                            Some(cert.ec_curve.as_str())
                        },
                        &cert.signature_algorithm,
                        to_hex(&cert.rsa_modulus),
                    ],
                )?;
                // DISPLAY-ONLY: V1 cert row counter.
                signer_rows = signer_rows.saturating_add(1);
            }

            for s in &signing.signers {
                let scheme = format!("{:?}", s.scheme);
                let fp = s.cert_sha256.clone().unwrap_or_default();
                if fp.is_empty() {
                    continue;
                }
                tx.execute(
                    "INSERT OR REPLACE INTO signers (apk_path, scheme, cert_sha256, subject, issuer, serial, not_before, not_after, is_expired, is_self_signed, key_algorithm, key_size_bits, ec_curve, signature_algorithm, rsa_modulus_hex) VALUES (?1, ?2, ?3, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ?4, ?5, ?6, ?7, ?8)",
                    rusqlite::params![
                        &path_str,
                        &scheme,
                        &fp,
                        &s.public_key_algorithm,
                        i64::from(s.public_key_size_bits),
                        if s.public_key_ec_curve.is_empty() {
                            None
                        } else {
                            Some(s.public_key_ec_curve.as_str())
                        },
                        s.signature_algorithms.first().cloned().unwrap_or_default(),
                        to_hex(&s.public_key_rsa_modulus),
                    ],
                )?;
                // DISPLAY-ONLY: V2/V3 signer row counter.
                signer_rows = signer_rows.saturating_add(1);
            }

            // DISPLAY-ONLY: corpus-ingest progress counter.
            ingested = ingested.saturating_add(1);
            Ok(())
        })?;
    }
    tx.commit()?;

    progress!(
        "corpus_ingest: ingested={ingested:?} skipped={skipped:?} failed={failed:?} signer_rows={signer_rows:?}"
    );

    Ok(json!({
        "output": output,
        "scanned": apk_paths.len(),
        "ingested": ingested,
        "skipped": skipped,
        "failed": failed,
        "signer_rows": signer_rows,
        "_meta": meta(
            1,
            false,
            "query with `sqlite3 <output>`; idx_signers_fingerprint indexes cert_sha256 for batch-GCD and signer-pivot queries",
            &["scan-corpus", "signing", "sbom"],
        ),
    }))
}

pub fn scan_corpus(
    paths: &[std::path::PathBuf],
    min_severity: &str,
    out: &mut dyn std::io::Write,
) -> anyhow::Result<()> {
    let threshold = parse_severity(min_severity)?;

    // Expand directories via walkdir.
    let mut apk_paths: Vec<std::path::PathBuf> = Vec::new();
    for p in paths {
        if p.is_dir() {
            for entry in walkdir::WalkDir::new(p).into_iter().filter_map(|e| e.ok()) {
                if entry.file_type().is_file()
                    && entry.path().extension().map(|e| e == "apk").unwrap_or(false)
                {
                    apk_paths.push(entry.path().to_path_buf());
                }
            }
        } else {
            apk_paths.push(p.clone());
        }
    }

    for p in &apk_paths {
        // Per-path scope so a panic on path N lands under N's hash, not
        // path-1's or `unknown-*`. Matches the pattern in `corpus_ingest`.
        let per_path_hash = CrossLayerContext::hash_path(p);
        let result: anyhow::Result<()> = droidsaw_common::diag::with_input_hash(&per_path_hash, || {
            match CrossLayerContext::parse(p, None) {
                Ok(ctx) => {
                    let package = ctx
                        .apk
                        .as_ref()
                        .and_then(|a| a.manifest_raw.as_ref())
                        .and_then(|r| Manifest::from_binary_xml(r).ok())
                        .map(|m| m.package)
                        .unwrap_or_default();
                    let findings: Vec<Finding> = collect_apk_findings(&ctx, DEFAULT_ENTROPY_THRESHOLD_BITS);
                    for f in findings {
                        if f.severity <= threshold {
                            let line = json!({
                                "path": p.to_string_lossy(),
                                "package": package,
                                "finding": f,
                            });
                            writeln!(out, "{}", serde_json::to_string(&line)?)?;
                        }
                    }
                }
                Err(e) => {
                    progress!("could not parse {:?}: {:?}", p, e);
                }
            }
            Ok(())
        });
        result?;
    }
    Ok(())
}