use droidsaw_common::Finding;
use serde_json::{json, Value};
use crate::analysis;
use crate::context::CrossLayerContext;
use super::{credentials_fp, honeycomb_fp, meta, progress};
#[allow(
clippy::arithmetic_side_effects,
clippy::as_conversions,
clippy::cast_possible_wrap,
clippy::cast_possible_truncation,
reason = "DEX struct-field indices widen u32→usize for `.get()` bounds-checked lookup; `i + 1` / `dex_idx + 1` dex-layer labels bounded by ctx.dex.len(); `j as i64` / `len() as i64` casts widen for SQLite bind params (realistic row counts ≪ i64::MAX). Row counters (string_rows/class_rows/etc.) are display-only and handled in the saturating commit."
)]
pub fn export(ctx: &CrossLayerContext, output: &str) -> anyhow::Result<Value> {
let _drain_guard = crate::context::HermesFindingDrainGuard::install_discard();
let mut db = rusqlite::Connection::open(output)?;
db.execute_batch(
"
CREATE TABLE IF NOT EXISTS strings (id INTEGER, kind TEXT, value TEXT, length INTEGER, layer TEXT, PRIMARY KEY (id, layer));
CREATE TABLE IF NOT EXISTS functions (id INTEGER, name TEXT, param_count INTEGER, offset INTEGER, size INTEGER, layer TEXT, PRIMARY KEY (id, layer));
CREATE TABLE IF NOT EXISTS classes (id INTEGER, name TEXT, superclass TEXT, layer TEXT, PRIMARY KEY (id, layer));
CREATE TABLE IF NOT EXISTS edges (caller_id INTEGER, caller_layer TEXT, callee_id INTEGER, callee_layer TEXT, bridge_type TEXT, PRIMARY KEY (caller_id, caller_layer, callee_id, callee_layer));
CREATE VIRTUAL TABLE IF NOT EXISTS strings_fts USING fts5(value, content=strings, content_rowid=rowid);
",
)?;
let tx = db.transaction()?;
let mut string_rows = 0u64;
let mut function_rows = 0u64;
let mut class_rows = 0u64;
let mut edge_rows = 0u64;
if let Some(hbc_owned) = ctx.hbc.as_ref() {
let hbc = hbc_owned.hbc();
for i in 0..hbc.string_count {
let kind = match hbc.string_get(i) {
Ok(Some(sd)) => match sd.kind {
0 => "literal",
1 => "ident",
_ => "other",
},
Ok(None) | Err(_) => continue,
};
let value = hbc.string_as_str_or_empty(i);
tx.execute(
"INSERT OR REPLACE INTO strings VALUES (?1,?2,?3,?4,'hbc')",
rusqlite::params![i, kind, &*value, value.len() as i64],
)?;
string_rows = string_rows.saturating_add(1);
}
for fid in 0..hbc.function_count {
let f = hbc.function_get(fid);
let name = if f.name_id < hbc.string_count {
hbc.string_as_str_or_empty(f.name_id).into_owned()
} else {
String::new()
};
tx.execute(
"INSERT OR REPLACE INTO functions VALUES (?1,?2,?3,?4,?5,'hbc')",
rusqlite::params![fid, name, f.param_count, f.offset, f.size],
)?;
function_rows = function_rows.saturating_add(1);
}
}
for (i, dex) in ctx.dex.iter().enumerate() {
let layer = format!("dex{}", i + 1);
for (j, entry) in dex.strings.iter().enumerate() {
let s = entry.as_str_lossy();
tx.execute(
"INSERT OR REPLACE INTO strings VALUES (?1,'dex',?2,?3,?4)",
rusqlite::params![j as i64, s, s.len() as i64, layer],
)?;
string_rows = string_rows.saturating_add(1);
}
for (j, cd) in dex.class_defs.iter().enumerate() {
if dex.class_def_is_shadowed(j) {
continue;
}
let name = dex
.type_descriptors
.get(cd.class_idx.0 as usize)
.cloned()
.unwrap_or_default();
let super_name = cd
.superclass_idx
.and_then(|s| dex.type_descriptors.get(s.0 as usize))
.cloned()
.unwrap_or_default();
tx.execute(
"INSERT OR REPLACE INTO classes VALUES (?1,?2,?3,?4)",
rusqlite::params![j as i64, name, super_name, layer],
)?;
class_rows = class_rows.saturating_add(1);
}
for (j, m) in dex.methods.iter().enumerate() {
let name = dex
.strings
.get(m.name_idx.0 as usize)
.map(|e| e.as_str_lossy().to_string())
.unwrap_or_default();
tx.execute(
"INSERT OR REPLACE INTO functions VALUES (?1,?2,0,0,0,?3)",
rusqlite::params![j as i64, name, layer],
)?;
function_rows = function_rows.saturating_add(1);
}
}
if let Some(hbc_owned) = ctx.hbc.as_ref() {
let hbc = hbc_owned.hbc();
let hbc_data = hbc_owned.bytes();
let scan = droidsaw_hermes::scanner::scan_parsed(hbc, hbc_data);
let bridge = analysis::bridge::BridgeResolver::resolve(ctx);
let nm_id =
(0..hbc.string_count).find(|&i| hbc.string_as_str_or_empty(i) == "NativeModules");
if let Some(nm_id) = nm_id
&& let Some(ref_fids) = scan.string_refs.get(&nm_id)
{
for &fid in ref_fids {
for (&str_id, fids) in &scan.string_refs {
if fids.contains(&fid) && str_id != nm_id {
let method_name = hbc.string_as_str_or_empty(str_id).into_owned();
if let Some(targets) = bridge.by_method.get(&method_name) {
for (dex_idx, m_idx) in targets {
let layer = format!("dex{}", dex_idx + 1);
tx.execute(
"INSERT OR IGNORE INTO edges VALUES (?1,'hbc',?2,?3,'bridge')",
rusqlite::params![fid, m_idx.0, layer],
)?;
edge_rows = edge_rows.saturating_add(1);
}
}
}
}
}
}
}
tx.commit()?;
db.execute("INSERT INTO strings_fts(strings_fts) VALUES('rebuild')", [])?;
progress!("exported to {output:?}");
let out = json!({
"output": output,
"tables": ["strings", "functions", "classes", "edges", "strings_fts"],
"row_counts": {
"strings": string_rows,
"functions": function_rows,
"classes": class_rows,
"edges": edge_rows,
},
"_meta": meta(
1,
false,
"query with `sqlite3 <output>`; strings_fts is a full-text search index over strings.value",
&["audit", "info", "sbom"],
),
});
Ok(out)
}
pub const FINDINGS_SCHEMA_REV: i64 = 6;
pub fn migrate_findings_schema_to_current(db: &mut rusqlite::Connection) -> anyhow::Result<()> {
db.execute(
"CREATE TABLE IF NOT EXISTS schema_meta (
schema_rev INTEGER PRIMARY KEY,
applied_at TEXT NOT NULL
)",
[],
)?;
let current: i64 = db.query_row(
"SELECT COALESCE(MAX(schema_rev), 0) FROM schema_meta",
[],
|r| r.get(0),
)?;
if current >= FINDINGS_SCHEMA_REV {
return Ok(());
}
let tx = db.transaction()?;
if current < 2 {
if !findings_has_column(&tx, "adversary_profile_relevance")? {
tx.execute_batch(
"ALTER TABLE findings ADD COLUMN adversary_profile_relevance TEXT NOT NULL DEFAULT '[]';
ALTER TABLE findings ADD COLUMN completeness TEXT NOT NULL DEFAULT 'present';
ALTER TABLE findings ADD COLUMN source_api TEXT;
ALTER TABLE findings ADD COLUMN source_api_args TEXT;
ALTER TABLE findings ADD COLUMN resolution TEXT;",
)?;
}
tx.execute_batch(
"CREATE TABLE IF NOT EXISTS third_parties (
rowid INTEGER PRIMARY KEY,
identity TEXT NOT NULL,
version TEXT,
first_seen_location TEXT,
jurisdictions TEXT NOT NULL DEFAULT '[]'
);
CREATE TABLE IF NOT EXISTS acquisition_metadata (
rowid INTEGER PRIMARY KEY CHECK (rowid = 1),
source_kind TEXT NOT NULL DEFAULT 'unknown',
operator TEXT,
authority_ref TEXT,
acquired_at TEXT,
pre_analysis_hash TEXT
);
CREATE TABLE IF NOT EXISTS declared_claims (
rowid INTEGER PRIMARY KEY,
source TEXT NOT NULL,
category TEXT NOT NULL,
declared TEXT NOT NULL,
hash TEXT NOT NULL
);",
)?;
}
if current < 3 {
tx.execute_batch(
"DROP VIEW IF EXISTS credentials;
CREATE VIEW credentials AS
SELECT rowid, severity, confidence, detail,
json_extract(extra, '$.detector') AS detector,
json_extract(extra, '$.raw') AS raw,
json_extract(extra, '$.verified') AS verified,
extra
FROM findings WHERE source = 'trufflehog';",
)?;
}
if current < 4 {
if !findings_has_column(&tx, "run_id")? {
tx.execute_batch("ALTER TABLE findings ADD COLUMN run_id TEXT;")?;
}
if !findings_has_column(&tx, "mode")? {
tx.execute_batch("ALTER TABLE findings ADD COLUMN mode TEXT;")?;
}
if !findings_has_column(&tx, "signature_hash")? {
tx.execute_batch("ALTER TABLE findings ADD COLUMN signature_hash TEXT;")?;
}
tx.execute_batch(
"CREATE UNIQUE INDEX IF NOT EXISTS findings_signature_hash_uniq \
ON findings(signature_hash) WHERE signature_hash IS NOT NULL;",
)?;
}
if current < 5 {
if taint_flows_has_column(&tx, "func_id")? {
if !taint_flows_has_column(&tx, "source_offset")? {
tx.execute_batch("ALTER TABLE taint_flows ADD COLUMN source_offset INTEGER;")?;
}
if !taint_flows_has_column(&tx, "sink_offset")? {
tx.execute_batch("ALTER TABLE taint_flows ADD COLUMN sink_offset INTEGER;")?;
}
}
}
if current < 6 {
tx.execute_batch(CROSS_LAYER_TAINT_FLOWS_SCHEMA)?;
}
tx.execute(
"INSERT INTO schema_meta(schema_rev, applied_at) VALUES (?1, ?2)",
rusqlite::params![FINDINGS_SCHEMA_REV, chrono::Utc::now().to_rfc3339()],
)?;
tx.commit()?;
Ok(())
}
const CROSS_LAYER_TAINT_FLOWS_SCHEMA: &str = "
CREATE TABLE IF NOT EXISTS cross_layer_taint_flows (
rowid INTEGER PRIMARY KEY,
js_module TEXT NOT NULL,
js_method TEXT NOT NULL,
dex_idx INTEGER NOT NULL,
method_idx INTEGER NOT NULL,
js_func_id INTEGER NOT NULL,
native_func_id INTEGER NOT NULL,
native_class_descriptor TEXT,
native_method_signature TEXT,
source_type TEXT NOT NULL,
sink_type TEXT NOT NULL,
severity TEXT NOT NULL,
cwe INTEGER
);
CREATE VIRTUAL TABLE IF NOT EXISTS cross_layer_taint_flows_fts
USING fts5(js_module, js_method, native_class_descriptor,
native_method_signature, source_type, sink_type,
content=cross_layer_taint_flows, content_rowid=rowid,
tokenize='porter unicode61');
CREATE INDEX IF NOT EXISTS idx_cross_layer_bridge
ON cross_layer_taint_flows(js_module, js_method);
CREATE INDEX IF NOT EXISTS idx_cross_layer_severity
ON cross_layer_taint_flows(severity);
CREATE VIEW IF NOT EXISTS cross_layer_taint_critical AS
SELECT rowid, js_module, js_method, native_class_descriptor,
native_method_signature, source_type, sink_type, severity, cwe
FROM cross_layer_taint_flows
WHERE severity IN ('Critical', 'High')
ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 END,
rowid;
";
fn findings_has_column(
db: &rusqlite::Connection,
column: &str,
) -> rusqlite::Result<bool> {
let mut stmt = db.prepare("PRAGMA table_info(findings)")?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let name: String = row.get(1)?;
if name == column {
return Ok(true);
}
}
Ok(false)
}
fn taint_flows_has_column(
db: &rusqlite::Connection,
column: &str,
) -> rusqlite::Result<bool> {
let mut stmt = db.prepare("PRAGMA table_info(taint_flows)")?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let name: String = row.get(1)?;
if name == column {
return Ok(true);
}
}
Ok(false)
}
pub fn finding_signature_hash(f: &Finding) -> String {
use sha2::{Digest, Sha256};
let mut h = Sha256::new();
h.update(f.layer.as_str().as_bytes());
h.update(b"\0");
h.update(f.id.as_bytes());
h.update(b"\0");
h.update(f.source.as_str().as_bytes());
h.update(b"\0");
h.update(f.detail.as_bytes());
let digest = h.finalize();
let mut out = String::with_capacity(64);
for byte in digest.iter() {
let hi = byte >> 4;
let lo = byte & 0x0f;
out.push(hex_nibble(hi));
out.push(hex_nibble(lo));
}
out
}
#[inline]
pub(super) const fn hex_nibble(n: u8) -> char {
#[allow(clippy::arithmetic_side_effects, reason = "every arithmetic expression below is bounded by the match arm. `0..=9` ⇒ `b'0' + n ∈ 0x30..=0x39` (no u8 overflow). `10..=15` ⇒ `b'a' + n ∈ 0x6b..=0x70` then `- 10` ⇒ `0x61..=0x66` (no u8 overflow on either step). `_` arm returns a sentinel since `unreachable!()` would trip the panic-deny lint; callers feed `byte >> 4` or `byte & 0x0f` so this arm is never reached.")]
#[allow(clippy::as_conversions, reason = "PROOF: u8 → char widening; the arm guards confine the u8 to the 0x30..=0x39 (digits) or 0x61..=0x66 (lowercase hex) range, which are all valid ASCII code points. `as char` is the canonical byte-to-ASCII-char conversion.")]
match n {
0..=9 => (b'0' + n) as char,
10..=15 => (b'a' + (n - 10)) as char,
_ => '?',
}
}
pub fn write_findings_db_with_run(
findings: &[Finding],
output: &std::path::Path,
run_id: Option<&str>,
mode: Option<&str>,
update_db: bool,
) -> anyhow::Result<usize> {
let mut db = rusqlite::Connection::open(output)?;
ensure_findings_schema(&mut db)?;
if !update_db {
match mode {
Some(m) => {
db.execute("DELETE FROM findings WHERE mode = ?1", rusqlite::params![m])?;
}
None => {
db.execute("DELETE FROM findings", [])?;
}
}
}
let tx = db.transaction()?;
let mut inserted = 0usize;
{
let mut stmt = tx.prepare(
"INSERT OR IGNORE INTO findings \
(severity, layer, id_tag, gauge_class, source, confidence, detail, cwe, extra, run_id, mode, signature_hash) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for f in findings {
let sig = finding_signature_hash(f);
let n = stmt.execute(rusqlite::params![
format!("{:?}", f.severity),
format!("{:?}", f.layer),
f.id,
f.gauge_class.as_str(),
f.source.as_str(),
f.confidence.as_str(),
f.detail,
f.cwe,
f.extra,
run_id,
mode,
sig,
])?;
#[allow(clippy::arithmetic_side_effects, reason = "`n` is `INSERT OR IGNORE` result, in {0, 1}; bounded.")]
{ inserted += n; }
}
}
tx.commit()?;
db.execute("INSERT INTO findings_fts(findings_fts) VALUES('rebuild')", [])?;
Ok(inserted)
}
fn ensure_findings_schema(db: &mut rusqlite::Connection) -> anyhow::Result<()> {
db.execute_batch(WRITE_FINDINGS_DB_SCHEMA)?;
migrate_findings_schema_to_current(db)?;
Ok(())
}
pub fn ensure_findings_db_schema(output: &std::path::Path) -> anyhow::Result<()> {
let mut db = rusqlite::Connection::open(output)?;
ensure_findings_schema(&mut db)
}
const WRITE_FINDINGS_DB_SCHEMA: &str =
"
CREATE TABLE IF NOT EXISTS findings (
rowid INTEGER PRIMARY KEY,
severity TEXT NOT NULL,
layer TEXT NOT NULL,
id_tag TEXT NOT NULL,
gauge_class TEXT NOT NULL DEFAULT 'Semantic',
source TEXT NOT NULL DEFAULT 'manifest',
confidence TEXT NOT NULL DEFAULT 'unverified',
dismiss_reason TEXT,
detail TEXT NOT NULL,
cwe INTEGER,
extra TEXT
);
CREATE VIRTUAL TABLE IF NOT EXISTS findings_fts
USING fts5(detail, id_tag, source, content=findings, content_rowid=rowid, tokenize='porter unicode61');
CREATE VIEW IF NOT EXISTS credentials AS
SELECT rowid, severity, confidence, detail,
json_extract(extra, '$.detector') AS detector,
json_extract(extra, '$.raw') AS raw,
json_extract(extra, '$.verified') AS verified,
extra
FROM findings WHERE source = 'trufflehog';
CREATE TABLE IF NOT EXISTS xrefs (
rowid INTEGER PRIMARY KEY,
layer TEXT NOT NULL,
string_value TEXT NOT NULL,
function_id INTEGER,
function_name TEXT
);
CREATE VIRTUAL TABLE IF NOT EXISTS xrefs_fts
USING fts5(string_value, function_name, content=xrefs, content_rowid=rowid, tokenize='porter unicode61');
CREATE TABLE IF NOT EXISTS taint_flows (
rowid INTEGER PRIMARY KEY,
layer TEXT NOT NULL,
func_id INTEGER NOT NULL,
source_type TEXT NOT NULL,
sink_type TEXT NOT NULL,
severity TEXT NOT NULL,
cwe INTEGER,
source_offset INTEGER,
sink_offset INTEGER
);
CREATE VIRTUAL TABLE IF NOT EXISTS taint_flows_fts
USING fts5(source_type, sink_type, content=taint_flows, content_rowid=rowid, tokenize='porter unicode61');
CREATE TABLE IF NOT EXISTS cross_layer_taint_flows (
rowid INTEGER PRIMARY KEY,
js_module TEXT NOT NULL,
js_method TEXT NOT NULL,
dex_idx INTEGER NOT NULL,
method_idx INTEGER NOT NULL,
js_func_id INTEGER NOT NULL,
native_func_id INTEGER NOT NULL,
native_class_descriptor TEXT,
native_method_signature TEXT,
source_type TEXT NOT NULL,
sink_type TEXT NOT NULL,
severity TEXT NOT NULL,
cwe INTEGER
);
CREATE VIRTUAL TABLE IF NOT EXISTS cross_layer_taint_flows_fts
USING fts5(js_module, js_method, native_class_descriptor,
native_method_signature, source_type, sink_type,
content=cross_layer_taint_flows, content_rowid=rowid,
tokenize='porter unicode61');
CREATE INDEX IF NOT EXISTS idx_cross_layer_bridge
ON cross_layer_taint_flows(js_module, js_method);
CREATE INDEX IF NOT EXISTS idx_cross_layer_severity
ON cross_layer_taint_flows(severity);
CREATE VIEW IF NOT EXISTS cross_layer_taint_critical AS
SELECT rowid, js_module, js_method, native_class_descriptor,
native_method_signature, source_type, sink_type, severity, cwe
FROM cross_layer_taint_flows
WHERE severity IN ('Critical', 'High')
ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 END,
rowid;
CREATE VIEW IF NOT EXISTS semgrep_hits AS
SELECT rowid, severity, id_tag, detail,
json_extract(extra, '$.class_name') AS class_name,
json_extract(extra, '$.check_id') AS check_id,
json_extract(extra, '$.message') AS message
FROM findings WHERE source = 'semgrep';
CREATE TABLE IF NOT EXISTS finding_xrefs (
rowid INTEGER PRIMARY KEY,
finding_rowid INTEGER NOT NULL,
layer TEXT NOT NULL,
string_value TEXT NOT NULL,
function_name TEXT NOT NULL
);
CREATE VIRTUAL TABLE IF NOT EXISTS finding_xrefs_fts
USING fts5(string_value, function_name, content=finding_xrefs, content_rowid=rowid, tokenize='porter unicode61');
CREATE VIEW IF NOT EXISTS actionable_findings AS
SELECT rowid, severity, id_tag, source, confidence, detail, cwe
FROM findings
WHERE severity IN ('Critical', 'High') AND gauge_class = 'Semantic'
AND confidence != 'dismissed'
ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 END, rowid;
CREATE VIEW IF NOT EXISTS dismissed_findings AS
SELECT rowid, severity, id_tag, source, detail, dismiss_reason
FROM findings WHERE confidence = 'dismissed';
CREATE VIEW IF NOT EXISTS confirmed_findings AS
SELECT rowid, severity, id_tag, source, detail, cwe
FROM findings WHERE confidence = 'confirmed';
CREATE VIEW IF NOT EXISTS confidence_summary AS
SELECT source, confidence, COUNT(*) AS n
FROM findings GROUP BY source, confidence ORDER BY source, confidence;
CREATE VIEW IF NOT EXISTS finding_context AS
SELECT f.rowid AS finding_rowid, f.severity, f.id_tag, f.detail,
fx.string_value, fx.function_name, fx.layer
FROM findings f
JOIN finding_xrefs fx ON f.rowid = fx.finding_rowid
ORDER BY f.severity, fx.string_value;
CREATE VIEW IF NOT EXISTS finding_urls AS
SELECT DISTINCT f.id_tag, f.severity, fx.string_value AS url
FROM findings f
JOIN finding_xrefs fx ON f.rowid = fx.finding_rowid
WHERE fx.string_value LIKE 'http%' OR fx.string_value LIKE '%://%'
ORDER BY f.severity, fx.string_value;
CREATE VIEW IF NOT EXISTS taint_critical AS
SELECT func_id, source_type, sink_type, severity, cwe, layer
FROM taint_flows
WHERE severity IN ('Critical', 'High')
ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 END;
CREATE VIEW IF NOT EXISTS semgrep_hotspots AS
SELECT class_name, COUNT(*) AS n, GROUP_CONCAT(DISTINCT check_id) AS rules
FROM semgrep_results
GROUP BY class_name
HAVING n > 1
ORDER BY n DESC;
CREATE VIEW IF NOT EXISTS audit_summary AS
SELECT
(SELECT COUNT(*) FROM findings) AS total_findings,
(SELECT COUNT(*) FROM findings WHERE severity IN ('Critical','High') AND gauge_class='Semantic' AND confidence!='dismissed') AS actionable,
(SELECT COUNT(*) FROM findings WHERE source='taint') AS taint_flows,
(SELECT COUNT(*) FROM findings WHERE source='trufflehog') AS credential_hits,
(SELECT COUNT(*) FROM findings WHERE source='semgrep') AS semgrep_hits,
(SELECT COUNT(*) FROM findings WHERE confidence='confirmed') AS confirmed,
(SELECT COUNT(*) FROM findings WHERE confidence='dismissed') AS dismissed,
(SELECT COUNT(*) FROM finding_xrefs) AS finding_xrefs;
";
pub fn write_findings_db(
findings: &[Finding],
output: &std::path::Path,
) -> anyhow::Result<usize> {
write_findings_db_with_run(findings, output, None, None, true)
}
fn trufflehog_is_known_fp(
detector: &str,
raw: &str,
square_set: ®ex::RegexSet,
) -> bool {
match detector {
"Square" => !square_set.is_match(raw),
_ => false,
}
}
fn square_key_regex_set() -> Result<regex::RegexSet, regex::Error> {
regex::RegexSet::new([
r"^sq0atp-[A-Za-z0-9_-]{43}$",
r"^sq0rtp-[A-Za-z0-9_-]{43}$",
r"^sandbox-sq0[a-z]tb-[A-Za-z0-9_-]{22}$",
r"^sq0idp-[A-Za-z0-9_-]{22}$",
])
}
pub fn write_credentials_db(
trufflehog_ndjson: &str,
output: &std::path::Path,
) -> anyhow::Result<usize> {
use crate::detectors::{calibrate, Calibration};
let square_set = square_key_regex_set()?;
let mut db = rusqlite::Connection::open(output)?;
let tx = db.transaction()?;
let mut count = 0usize;
{
let mut stmt = tx.prepare(
"INSERT INTO findings (severity, layer, id_tag, gauge_class, source, confidence, detail, cwe, extra) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
)?;
for line in trufflehog_ndjson.lines() {
let Ok(v): Result<serde_json::Value, _> = serde_json::from_str(line) else {
continue;
};
let detector = v.get("DetectorName").and_then(|s| s.as_str()).unwrap_or("unknown");
let raw = v.get("Raw").and_then(|s| s.as_str()).unwrap_or("");
if trufflehog_is_known_fp(detector, raw, &square_set) {
continue;
}
let verified = v.get("Verified").and_then(|s| s.as_bool()).unwrap_or(false);
let circle_fp_shape = if !verified && credentials_fp::is_circle_detector(detector) {
credentials_fp::classify_circle_raw_shape(raw)
} else {
None
};
let honeycomb_fp_shape = if !verified && honeycomb_fp::is_honeycomb_detector(detector) {
honeycomb_fp::classify_honeycomb_raw_shape(raw)
} else {
None
};
let honeycomb_anchor = if !verified && honeycomb_fp::is_honeycomb_detector(detector) {
honeycomb_fp::classify_honeycomb_raw_anchor(raw)
} else {
None
};
let (severity, id_tag, detail, extra) = if let Some(shape) = circle_fp_shape {
let id_tag = format!(
"CREDENTIAL_{}_FP_{}",
detector.to_uppercase(),
shape.as_tag()
);
let detail = format!(
"{}: pattern match downgraded — raw matches well-known FP shape ({})",
detector,
shape.as_tag()
);
let extra = serde_json::json!({
"detector": detector,
"raw": raw,
"verified": verified,
"fp_shape": shape.as_tag(),
})
.to_string();
("Info", id_tag, detail, extra)
} else if let Some(shape) = honeycomb_fp_shape {
let id_tag = format!(
"CREDENTIAL_{}_FP_{}",
detector.to_uppercase(),
shape.as_tag()
);
let detail = format!(
"{}: pattern match downgraded — raw matches AOSP/SDK class identifier shape ({})",
detector,
shape.as_tag()
);
let extra = serde_json::json!({
"detector": detector,
"raw": raw,
"verified": verified,
"fp_shape": shape.as_tag(),
})
.to_string();
("Info", id_tag, detail, extra)
} else if let Some(anchor) = honeycomb_anchor {
let id_tag = format!("CREDENTIAL_{}", detector.to_uppercase());
let detail = format!(
"{}: pattern match on credential material (unverified; raw matches documented key anchor {})",
detector,
anchor.as_tag()
);
let extra = serde_json::json!({
"detector": detector,
"raw": raw,
"verified": verified,
"anchor": anchor.as_tag(),
})
.to_string();
("Critical", id_tag, detail, extra)
} else if verified {
let id_tag = format!("CREDENTIAL_{}", detector.to_uppercase());
let detail = format!(
"{}: pattern match on credential material (verified)",
detector
);
let extra = serde_json::json!({
"detector": detector,
"raw": raw,
"verified": true,
})
.to_string();
("Critical", id_tag, detail, extra)
} else {
match calibrate(detector, raw) {
Calibration::Skip => continue,
Calibration::Emit(sev) => {
let id_tag = format!("CREDENTIAL_{}", detector.to_uppercase());
let detail = format!(
"{}: pattern match on credential material (unverified)",
detector
);
let extra = serde_json::json!({
"detector": detector,
"raw": raw,
"verified": false,
})
.to_string();
(sev.as_str(), id_tag, detail, extra)
}
}
};
stmt.execute(rusqlite::params![
severity, "Apk", &id_tag, "Semantic", "trufflehog", "unverified",
&detail, Option::<u16>::None, &extra,
])?;
count = count.saturating_add(1);
}
}
tx.commit()?;
db.execute("INSERT INTO findings_fts(findings_fts) VALUES('rebuild')", [])?;
Ok(count)
}
pub fn write_xrefs_db(
entries: &[(String, String, Option<u32>, String)],
output: &std::path::Path,
) -> anyhow::Result<usize> {
let mut db = rusqlite::Connection::open(output)?;
let tx = db.transaction()?;
{
let mut stmt = tx.prepare(
"INSERT INTO xrefs (layer, string_value, function_id, function_name) VALUES (?1, ?2, ?3, ?4)",
)?;
for (layer, sv, fid, fname) in entries {
stmt.execute(rusqlite::params![layer, sv, fid, fname])?;
}
}
tx.commit()?;
db.execute("INSERT INTO xrefs_fts(xrefs_fts) VALUES('rebuild')", [])?;
Ok(entries.len())
}
pub fn write_semgrep_db(
semgrep_json: &str,
output: &std::path::Path,
) -> anyhow::Result<usize> {
let parsed: serde_json::Value = serde_json::from_str(semgrep_json)?;
let results = parsed
.get("results")
.and_then(|v| v.as_array())
.cloned()
.unwrap_or_default();
let mut db = rusqlite::Connection::open(output)?;
let tx = db.transaction()?;
let mut count = 0usize;
{
let mut stmt = tx.prepare(
"INSERT INTO findings (severity, layer, id_tag, gauge_class, source, confidence, detail, cwe, extra) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
)?;
for r in &results {
let check_id = r["check_id"].as_str().unwrap_or("");
let path = r.get("path").and_then(Value::as_str).unwrap_or("");
let class_name = std::path::Path::new(path)
.file_stem()
.and_then(|s| s.to_str())
.map(|s| s.replace('_', "."));
let start_line = r.get("start").and_then(|v| v.get("line")).and_then(Value::as_i64);
let end_line = r.get("end").and_then(|v| v.get("line")).and_then(Value::as_i64);
let extra = r.get("extra");
let sg_severity = extra
.and_then(|e| e.get("severity"))
.and_then(Value::as_str)
.unwrap_or("INFO");
let message = extra
.and_then(|e| e.get("message"))
.and_then(Value::as_str)
.unwrap_or("");
let metadata = extra.and_then(|e| e.get("metadata"));
let cwe_str = metadata
.and_then(|m| m.get("cwe"))
.and_then(Value::as_array)
.map(|arr| arr.iter().filter_map(Value::as_str).collect::<Vec<_>>().join("; "));
let category = metadata.and_then(|m| m.get("category")).and_then(Value::as_str);
let severity = match sg_severity {
"ERROR" => "High",
"WARNING" => "Medium",
_ => "Info",
};
let id_tag = format!("SEMGREP_{}", check_id);
let detail = format!("{}: {} ({})", check_id, message, class_name.as_deref().unwrap_or(path));
let extra = serde_json::json!({
"check_id": check_id, "path": path, "class_name": class_name,
"start_line": start_line, "end_line": end_line,
"message": message, "cwe": cwe_str, "category": category,
}).to_string();
stmt.execute(rusqlite::params![
severity, "Dex", &id_tag, "Semantic", "semgrep", "unverified",
&detail, Option::<u16>::None, &extra,
])?;
count = count.saturating_add(1);
}
}
tx.commit()?;
if count > 0 {
db.execute("INSERT INTO findings_fts(findings_fts) VALUES('rebuild')", [])?;
}
Ok(count)
}
pub fn triage_finding(
db_path: &std::path::Path,
rowid: i64,
action: &str,
reason: Option<&str>,
) -> anyhow::Result<serde_json::Value> {
let db = rusqlite::Connection::open(db_path)?;
match action {
"confirm" => {
db.execute(
"UPDATE findings SET confidence = 'confirmed' WHERE rowid = ?1",
rusqlite::params![rowid],
)?;
}
"dismiss" => {
let reason = reason.unwrap_or("dismissed by analyst");
db.execute(
"UPDATE findings SET confidence = 'dismissed', dismiss_reason = ?1 WHERE rowid = ?2",
rusqlite::params![reason, rowid],
)?;
}
_ => anyhow::bail!("action must be 'confirm' or 'dismiss'"),
}
let mut stmt = db.prepare(
"SELECT rowid, severity, id_tag, source, confidence, dismiss_reason, detail FROM findings WHERE rowid = ?1",
)?;
let row = stmt.query_row(rusqlite::params![rowid], |r| {
Ok(serde_json::json!({
"rowid": r.get::<_, i64>(0)?,
"severity": r.get::<_, String>(1)?,
"id_tag": r.get::<_, String>(2)?,
"source": r.get::<_, String>(3)?,
"confidence": r.get::<_, String>(4)?,
"dismiss_reason": r.get::<_, Option<String>>(5)?,
"detail": r.get::<_, String>(6)?,
}))
})?;
Ok(row)
}
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` dex-layer label bounded by ctx.dex.len() ≤ isize::MAX.")]
pub fn write_finding_xrefs_db(
ctx: &CrossLayerContext,
_findings: &[Finding],
output: &std::path::Path,
) -> anyhow::Result<usize> {
use std::collections::HashMap;
let class_ref_re = regex::Regex::new(r"L([a-zA-Z0-9_$/]+);")?;
let mut db = rusqlite::Connection::open(output)?;
let mut class_to_rowids: HashMap<String, Vec<i64>> = HashMap::new();
{
let mut stmt = db.prepare(
"SELECT rowid, detail FROM findings WHERE severity IN ('High', 'Critical')",
)?;
let mut rows = stmt.query([])?;
while let Some(row) = rows.next()? {
let rowid: i64 = row.get(0)?;
let detail: String = row.get(1)?;
for cap in class_ref_re.captures_iter(&detail) {
let class_dotted = cap[1].replace('/', ".");
class_to_rowids.entry(class_dotted).or_default().push(rowid);
}
}
}
if class_to_rowids.is_empty() {
return Ok(0);
}
let apk = ctx.require_apk()?;
let tx = db.transaction()?;
let mut count = 0usize;
{
let mut stmt = tx.prepare(
"INSERT INTO finding_xrefs (finding_rowid, layer, string_value, function_name) \
VALUES (?1, ?2, ?3, ?4)",
)?;
for ((li, dex), apk_dex) in ctx.dex.iter().enumerate().zip(apk.dex.iter()) {
let label = format!("dex{}", li + 1);
let raw = &apk_dex.data;
let xrefs = match droidsaw_dex::xrefs::Xrefs::build(dex, raw) {
Ok(x) => x,
Err(_) => continue,
};
for (s, methods) in &xrefs.string_to_methods {
for m in methods {
let class_dotted = m.class.trim_start_matches('L')
.trim_end_matches(';')
.replace('/', ".");
let Some(rowids) = class_to_rowids.get(&class_dotted) else {
continue;
};
let func_name = format!("{}->{}{}", m.class, m.name, m.proto);
for &rowid in rowids {
stmt.execute(rusqlite::params![rowid, &label, s, &func_name])?;
count = count.saturating_add(1);
}
}
}
}
}
tx.commit()?;
if count > 0 {
db.execute(
"INSERT INTO finding_xrefs_fts(finding_xrefs_fts) VALUES('rebuild')",
[],
)?;
}
Ok(count)
}
pub fn write_taint_flows_db(
findings: &[Finding],
output: &std::path::Path,
) -> anyhow::Result<usize> {
let mut db = rusqlite::Connection::open(output)?;
let tx = db.transaction()?;
let mut count = 0usize;
{
let mut stmt = tx.prepare(
"INSERT INTO taint_flows \
(layer, func_id, source_type, sink_type, severity, cwe, source_offset, sink_offset) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
)?;
for f in findings.iter().filter(|f| matches!(f.id.as_str(), "DEX_TAINT_FLOW" | "BRIDGE_TAINT_FLOW" | "HBC_TAINT_FLOW")) {
let (source_type, sink_type) = parse_taint_source_sink(&f.detail);
let func_id = f.func_id.unwrap_or(0);
let (source_offset, sink_offset): (Option<i64>, Option<i64>) = f.extra
.as_deref()
.and_then(|s| serde_json::from_str::<serde_json::Value>(s).ok())
.map(|v| {
let src = v.get("source_offset").and_then(|x| x.as_i64());
let snk = v.get("sink_offset").and_then(|x| x.as_i64());
(src, snk)
})
.unwrap_or((None, None));
stmt.execute(rusqlite::params![
format!("{:?}", f.layer),
func_id,
source_type,
sink_type,
format!("{:?}", f.severity),
f.cwe,
source_offset,
sink_offset,
])?;
count = count.saturating_add(1);
}
}
tx.commit()?;
if count > 0 {
db.execute("INSERT INTO taint_flows_fts(taint_flows_fts) VALUES('rebuild')", [])?;
}
Ok(count)
}
pub fn write_cross_layer_taint_flows_db(
findings: &[Finding],
output: &std::path::Path,
) -> anyhow::Result<usize> {
use droidsaw_common::cross_layer_taint::CrossLayerTaintFinding;
use droidsaw_dex::ids::MethodIdx;
let mut db = rusqlite::Connection::open(output)?;
let tx = db.transaction()?;
let mut count = 0usize;
{
let mut stmt = tx.prepare(
"INSERT INTO cross_layer_taint_flows \
(js_module, js_method, dex_idx, method_idx, js_func_id, \
native_func_id, native_class_descriptor, native_method_signature, \
source_type, sink_type, severity, cwe) \
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
)?;
for f in findings.iter().filter(|f| f.id.as_str() == "CROSS_LAYER_TAINT_FLOW") {
let Some(extra) = f.extra.as_deref() else {
continue;
};
let Ok(c) = serde_json::from_str::<CrossLayerTaintFinding<MethodIdx>>(extra) else {
continue;
};
let source_type = format!("{:?}", c.js_source);
let source_type = source_type.split('{').next().unwrap_or("Unknown").trim();
let sink_type = format!("{:?}", c.native_sink);
let sink_type = sink_type.split('{').next().unwrap_or("Unknown").trim();
let dex_idx_i64 = i64::try_from(c.bridge.dex_idx).unwrap_or(i64::MAX);
stmt.execute(rusqlite::params![
c.bridge.js_module.as_str(),
c.bridge.js_method.as_str(),
dex_idx_i64,
i64::from(c.bridge.method_idx.0),
c.js_func_id,
c.native_func_id,
c.native_class_descriptor.as_deref(),
c.native_method_signature.as_deref(),
source_type,
sink_type,
format!("{:?}", c.severity),
c.cwe,
])?;
count = count.saturating_add(1);
}
}
tx.commit()?;
if count > 0 {
db.execute(
"INSERT INTO cross_layer_taint_flows_fts(cross_layer_taint_flows_fts) VALUES('rebuild')",
[],
)?;
}
Ok(count)
}
pub(super) fn parse_taint_source_sink(detail: &str) -> (String, String) {
let inner = detail
.strip_prefix("intra-method taint: ")
.or_else(|| detail.strip_prefix("bridge taint: "))
.or_else(|| detail.strip_prefix("hbc taint: "))
.unwrap_or(detail);
let (flow_part, _) = inner.split_once(" (func").unwrap_or((inner, ""));
let (source, sink) = flow_part.split_once(" → ").unwrap_or((flow_part, "Unknown"));
(source.trim().to_string(), sink.trim().to_string())
}
#[cfg(test)]
mod trufflehog_fp_tests {
use super::{square_key_regex_set, trufflehog_is_known_fp};
#[test]
fn square_real_production_access_token_admitted() {
let set = square_key_regex_set().expect("regex set");
let real = "sq0atp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-aBcDe";
assert_eq!(real.len(), 7 + 43, "test fixture length sanity");
assert!(
!trufflehog_is_known_fp("Square", real, &set),
"real production access token must be admitted"
);
}
#[test]
fn square_real_production_refresh_token_admitted() {
let set = square_key_regex_set().expect("regex set");
let real = "sq0rtp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-aBcDe";
assert_eq!(real.len(), 7 + 43);
assert!(!trufflehog_is_known_fp("Square", real, &set));
}
#[test]
fn square_real_sandbox_access_token_admitted() {
let set = square_key_regex_set().expect("regex set");
let real = "sandbox-sq0atb-AbCdEfGhIjKlMnOpQrStUv";
assert_eq!(real.len(), 8 + 3 + 4 + 22);
assert!(!trufflehog_is_known_fp("Square", real, &set));
}
#[test]
fn square_real_application_id_admitted() {
let set = square_key_regex_set().expect("regex set");
let real = "sq0idp-AbCdEfGhIjKlMnOpQrStUv";
assert_eq!(real.len(), 7 + 22);
assert!(!trufflehog_is_known_fp("Square", real, &set));
}
#[test]
fn square_substring_in_class_metadata_rejected() {
let set = square_key_regex_set().expect("regex set");
let fp = "sq0abcXYZdefGHI012345jkl_-mnopqr"; assert!(
trufflehog_is_known_fp("Square", fp, &set),
"substring-shaped FP must be suppressed"
);
}
#[test]
fn square_wrong_length_rejected() {
let set = square_key_regex_set().expect("regex set");
let too_short = "sq0atp-AbCdEfGhIj";
assert!(trufflehog_is_known_fp("Square", too_short, &set));
let too_long = "sq0idp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789";
assert!(trufflehog_is_known_fp("Square", too_long, &set));
}
#[test]
fn square_anchored_no_substring_match() {
let set = square_key_regex_set().expect("regex set");
let leading = "junk_sq0atp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-aBcDe";
let trailing = "sq0atp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-aBcDe_junk";
assert!(trufflehog_is_known_fp("Square", leading, &set));
assert!(trufflehog_is_known_fp("Square", trailing, &set));
}
#[test]
fn other_detectors_pass_through() {
let set = square_key_regex_set().expect("regex set");
assert!(!trufflehog_is_known_fp("AWS", "AKIAIOSFODNN7EXAMPLE", &set));
assert!(!trufflehog_is_known_fp("PrivateKey", "-----BEGIN", &set));
assert!(!trufflehog_is_known_fp("Slack", "xoxb-1234", &set));
assert!(!trufflehog_is_known_fp("UnknownDetector", "anything", &set));
}
}
#[cfg(test)]
mod shadow_gate_tests {
use super::export;
use crate::analysis::dup_class_fixture;
use crate::context::CrossLayerContext;
#[test]
fn export_persists_only_canonical_class_row() {
let dex = dup_class_fixture::for_export();
let ctx = CrossLayerContext {
path: "test://dup-class".to_string(),
apk: None,
hbc: None,
hbc_parse_error: None,
dex: vec![dex],
dex_direct_bytes: None,
loaded_split_names: Vec::new(),
hermes_findings: Vec::new(),
permissive_recovery: droidsaw_apk::PermissiveRecoveryOpts::default(),
};
let tmp = tempfile::NamedTempFile::new().expect("temp db");
let db_path = tmp.path().to_str().expect("utf-8 path").to_string();
export(&ctx, &db_path).expect("export succeeds");
let conn = rusqlite::Connection::open(&db_path).expect("open db");
let class_count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM classes WHERE layer = 'dex1'",
[],
|row| row.get(0),
)
.expect("count classes");
assert_eq!(
class_count, 1,
"only the canonical (first-wins) class row may persist; \
the shadow duplicate-class_idx row must be gated out"
);
}
}