droidsaw 2.0.0

DROIDSAW — unified Android reverse engineering CLI. Hermes, DEX, APK signing. JSON output, MCP server. Bytecode is not a security layer.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
use droidsaw_common::Finding;
use serde_json::{json, Value};

use crate::analysis;
use crate::context::CrossLayerContext;

use super::{credentials_fp, honeycomb_fp, meta, progress};

#[allow(
    clippy::arithmetic_side_effects,
    clippy::as_conversions,
    clippy::cast_possible_wrap,
    clippy::cast_possible_truncation,
    reason = "DEX struct-field indices widen u32→usize for `.get()` bounds-checked lookup; `i + 1` / `dex_idx + 1` dex-layer labels bounded by ctx.dex.len(); `j as i64` / `len() as i64` casts widen for SQLite bind params (realistic row counts ≪ i64::MAX). Row counters (string_rows/class_rows/etc.) are display-only and handled in the saturating commit."
)]
pub fn export(ctx: &CrossLayerContext, output: &str) -> anyhow::Result<Value> {
    // RAII drain guard: hbc.string_get / function_get emit hermes
    // findings on adversarial input. Without a drain on Drop, findings
    // leak into the next bundle on the same blocking-pool worker.
    let _drain_guard = crate::context::HermesFindingDrainGuard::install_discard();

    let mut db = rusqlite::Connection::open(output)?;

    db.execute_batch(
        "
        CREATE TABLE IF NOT EXISTS strings (id INTEGER, kind TEXT, value TEXT, length INTEGER, layer TEXT, PRIMARY KEY (id, layer));
        CREATE TABLE IF NOT EXISTS functions (id INTEGER, name TEXT, param_count INTEGER, offset INTEGER, size INTEGER, layer TEXT, PRIMARY KEY (id, layer));
        CREATE TABLE IF NOT EXISTS classes (id INTEGER, name TEXT, superclass TEXT, layer TEXT, PRIMARY KEY (id, layer));
        CREATE TABLE IF NOT EXISTS edges (caller_id INTEGER, caller_layer TEXT, callee_id INTEGER, callee_layer TEXT, bridge_type TEXT, PRIMARY KEY (caller_id, caller_layer, callee_id, callee_layer));
        CREATE VIRTUAL TABLE IF NOT EXISTS strings_fts USING fts5(value, content=strings, content_rowid=rowid);
    ",
    )?;

    let tx = db.transaction()?;

    let mut string_rows = 0u64;
    let mut function_rows = 0u64;
    let mut class_rows = 0u64;
    let mut edge_rows = 0u64;

    if let Some(hbc_owned) = ctx.hbc.as_ref() {
        let hbc = hbc_owned.hbc();
        for i in 0..hbc.string_count {
            // Index-build: corrupt entries get skipped so the audit
            // index has only well-formed rows. Typed signal preserved
            // via the predecessor stream's `HermesFinding` channel
            // for OOR (still emitted by `string_get`).
            let kind = match hbc.string_get(i) {
                Ok(Some(sd)) => match sd.kind {
                    0 => "literal",
                    1 => "ident",
                    _ => "other",
                },
                Ok(None) | Err(_) => continue,
            };
            let value = hbc.string_as_str_or_empty(i);
            tx.execute(
                "INSERT OR REPLACE INTO strings VALUES (?1,?2,?3,?4,'hbc')",
                rusqlite::params![i, kind, &*value, value.len() as i64],
            )?;
            // DISPLAY-ONLY: row counter for `row_counts` JSON field.
            string_rows = string_rows.saturating_add(1);
        }
        for fid in 0..hbc.function_count {
            let f = hbc.function_get(fid);
            let name = if f.name_id < hbc.string_count {
                hbc.string_as_str_or_empty(f.name_id).into_owned()
            } else {
                String::new()
            };
            tx.execute(
                "INSERT OR REPLACE INTO functions VALUES (?1,?2,?3,?4,?5,'hbc')",
                rusqlite::params![fid, name, f.param_count, f.offset, f.size],
            )?;
            function_rows = function_rows.saturating_add(1);
        }
    }

    for (i, dex) in ctx.dex.iter().enumerate() {
        let layer = format!("dex{}", i + 1);
        for (j, entry) in dex.strings.iter().enumerate() {
            let s = entry.as_str_lossy();
            tx.execute(
                "INSERT OR REPLACE INTO strings VALUES (?1,'dex',?2,?3,?4)",
                rusqlite::params![j as i64, s, s.len() as i64, layer],
            )?;
            string_rows = string_rows.saturating_add(1);
        }
        for (j, cd) in dex.class_defs.iter().enumerate() {
            // Shadow gate: the `classes` table is keyed by the enumerate
            // position `j`, so a duplicate-`class_idx` shadow row gets a
            // distinct key and BOTH rows persist — amplifying class counts
            // and introducing `class_idx`-join ambiguity for downstream
            // SQLite consumers (xrefs queries, finding correlation). Skip
            // the shadow row before the INSERT so the export reflects the
            // first-wins canonical class that `class_def_for_type` pins.
            if dex.class_def_is_shadowed(j) {
                continue;
            }
            let name = dex
                .type_descriptors
                .get(cd.class_idx.0 as usize)
                .cloned()
                .unwrap_or_default();
            let super_name = cd
                .superclass_idx
                .and_then(|s| dex.type_descriptors.get(s.0 as usize))
                .cloned()
                .unwrap_or_default();
            tx.execute(
                "INSERT OR REPLACE INTO classes VALUES (?1,?2,?3,?4)",
                rusqlite::params![j as i64, name, super_name, layer],
            )?;
            class_rows = class_rows.saturating_add(1);
        }
        for (j, m) in dex.methods.iter().enumerate() {
            let name = dex
                .strings
                .get(m.name_idx.0 as usize)
                .map(|e| e.as_str_lossy().to_string())
                .unwrap_or_default();
            tx.execute(
                "INSERT OR REPLACE INTO functions VALUES (?1,?2,0,0,0,?3)",
                rusqlite::params![j as i64, name, layer],
            )?;
            function_rows = function_rows.saturating_add(1);
        }
    }

    if let Some(hbc_owned) = ctx.hbc.as_ref() {
        let hbc = hbc_owned.hbc();
        let hbc_data = hbc_owned.bytes();
        let scan = droidsaw_hermes::scanner::scan_parsed(hbc, hbc_data);
        let bridge = analysis::bridge::BridgeResolver::resolve(ctx);

        let nm_id =
            (0..hbc.string_count).find(|&i| hbc.string_as_str_or_empty(i) == "NativeModules");
        if let Some(nm_id) = nm_id
            && let Some(ref_fids) = scan.string_refs.get(&nm_id)
        {
            for &fid in ref_fids {
                for (&str_id, fids) in &scan.string_refs {
                    if fids.contains(&fid) && str_id != nm_id {
                        let method_name = hbc.string_as_str_or_empty(str_id).into_owned();
                        // Legacy by-method-name lookup — this xref export
                        // builder has only the JS-side method-name string
                        // from the string-table scan, no (module, method)
                        // pair, so the per-(module, method) `bridge.mappings`
                        // index can't be keyed against it.
                        if let Some(targets) = bridge.by_method.get(&method_name) {
                            for (dex_idx, m_idx) in targets {
                                let layer = format!("dex{}", dex_idx + 1);
                                tx.execute(
                                    "INSERT OR IGNORE INTO edges VALUES (?1,'hbc',?2,?3,'bridge')",
                                    rusqlite::params![fid, m_idx.0, layer],
                                )?;
                                edge_rows = edge_rows.saturating_add(1);
                            }
                        }
                    }
                }
            }
        }
    }

    tx.commit()?;
    db.execute("INSERT INTO strings_fts(strings_fts) VALUES('rebuild')", [])?;

    progress!("exported to {output:?}");
    let out = json!({
        "output": output,
        "tables": ["strings", "functions", "classes", "edges", "strings_fts"],
        "row_counts": {
            "strings": string_rows,
            "functions": function_rows,
            "classes": class_rows,
            "edges": edge_rows,
        },
        "_meta": meta(
            1,
            false,
            "query with `sqlite3 <output>`; strings_fts is a full-text search index over strings.value",
            &["audit", "info", "sbom"],
        ),
    });
    Ok(out)
}

/// Write a slice of findings to a SQLite database.
///
/// Schema:
/// - `findings(id, severity, layer, id_tag, detail, cwe)` — structured rows
/// - `findings_fts` — FTS5 content table over `detail`, ranked by BM25
///
/// Returns the path written and the row count.
/// Write findings, credentials, and xrefs to a single audit SQLite database.
///
/// Tables:
/// Creates (or opens) the audit SQLite database and ensures all tables exist:
/// - `findings(severity, layer, id_tag, detail, cwe)` + `findings_fts` BM25
/// - `credentials(detector, raw, verified, extra)` + `credentials_fts` BM25
/// - `xrefs(layer, string_value, function_id, function_name)` + `xrefs_fts` BM25
/// - `taint_flows(layer, func_id, source_type, sink_type, severity, cwe)` + `taint_flows_fts` BM25
///
/// `taint_flows` is separate from `xrefs`: xrefs are string→function edges
/// (what strings appear in what functions), while taint_flows are structured
/// source→sink propagation results (user input reaching a dangerous call).
/// They answer different questions and are best queried independently.
/// Schema revision recorded in `schema_meta`. Bumped whenever
/// `migrate_findings_schema_to_current` adds a migration step.
///
/// **v2** — threat-model foundations: adds new columns on
/// `findings` (`adversary_profile_relevance`, `completeness`, `source_api`,
/// `source_api_args`, `resolution`) plus three new tables (`third_parties`,
/// `acquisition_metadata`, `declared_claims`). Strictly additive: every v1
/// reader continues to read its old columns at the old indices.
///
/// **v3** — credentials-view extension: drops + recreates the
/// `credentials` view so the documented audit-response queries
/// (`SELECT detector,raw,verified FROM credentials WHERE verified=1`)
/// actually parse + execute. The new projection exposes
/// `detector` / `raw` / `verified` columns via `json_extract` over the
/// underlying `findings.extra` JSON. Strictly additive at the SQL level
/// (DROP VIEW + CREATE VIEW preserves the underlying findings rows).
///
/// **v4** — modular-modes incremental DB: adds `run_id`,
/// `mode`, `signature_hash` columns to `findings` (all nullable to keep
/// the v1/v2/v3 reader backward-compatible) plus a partial UNIQUE index
/// `findings_signature_hash_uniq` on `signature_hash WHERE signature_hash
/// IS NOT NULL`. Powers the `audit --mode=<basic|full|semgrep|trufflehog>`
/// CLI surface and the matching MCP `mode`/`update_db` params: re-running
/// the same mode against an existing DB upserts by `signature_hash`
/// (`INSERT OR IGNORE` returns 0 inserts on duplicate identity); running a
/// different mode adds rows without clobbering. Strictly additive: legacy
/// `write_findings_db` callers leave `run_id`/`mode` NULL and the partial
/// index ignores those rows, so legacy readers are unaffected.
///
/// **v5** — taint offset wiring: adds `source_offset` and
/// `sink_offset` (both nullable `INTEGER`) to the `taint_flows` table.
/// DEX taint findings populate both from the Dalvik instruction address at
/// the source invoke and the sink invoke respectively. HBC and bridge-seeded
/// findings leave them NULL by design (see `hbc_taint.rs` doc-comment).
/// Strictly additive: existing `taint_flows` readers see the new columns
/// as NULL until re-audited with the v5 binary.
///
/// **v6** — cross-layer taint composite: adds the
/// `cross_layer_taint_flows` table + `cross_layer_taint_flows_fts` FTS5
/// virtual table + two indexes (`idx_cross_layer_bridge` on `(js_module,
/// js_method)`, `idx_cross_layer_severity` on `(severity)`) + the
/// `cross_layer_taint_critical` view. One row per
/// `CROSS_LAYER_TAINT_FLOW` finding stitched by `cross_layer_stitch.rs`:
/// columns project the `CrossLayerTaintFinding<MethodIdx>` payload that
/// rides `Finding.extra` (js_module / js_method / dex_idx / method_idx /
/// js_func_id / native_func_id / native_class_descriptor /
/// native_method_signature / source_type / sink_type / severity / cwe).
/// Strictly additive: legacy `taint_flows` rows unchanged; pre-v6
/// readers ignore the new table.
pub const FINDINGS_SCHEMA_REV: i64 = 6;

/// Apply additive schema migrations on `db` so it is at the current
/// `FINDINGS_SCHEMA_REV`. Reads `schema_meta` to determine the starting
/// revision; runs every step strictly greater than that revision; records
/// the new revision in `schema_meta` on success.
///
/// Called by `write_findings_db` after the initial `CREATE TABLE IF NOT
/// EXISTS` batch creates the v1 tables. The migrations land additively:
/// `ADD COLUMN` for findings; `CREATE TABLE IF NOT EXISTS` for the new
/// threat-model tables. Reads of the v1 schema continue to work unchanged.
pub fn migrate_findings_schema_to_current(db: &mut rusqlite::Connection) -> anyhow::Result<()> {
    db.execute(
        "CREATE TABLE IF NOT EXISTS schema_meta (
            schema_rev INTEGER PRIMARY KEY,
            applied_at TEXT NOT NULL
        )",
        [],
    )?;
    let current: i64 = db.query_row(
        "SELECT COALESCE(MAX(schema_rev), 0) FROM schema_meta",
        [],
        |r| r.get(0),
    )?;
    if current >= FINDINGS_SCHEMA_REV {
        return Ok(());
    }

    let tx = db.transaction()?;

    if current < 2 {
        // Findings columns — additive. ALTER TABLE … ADD COLUMN has no IF NOT
        // EXISTS form in SQLite, so we guard via PRAGMA table_info to keep the
        // call idempotent across re-runs that somehow saw schema_meta blanked.
        if !findings_has_column(&tx, "adversary_profile_relevance")? {
            tx.execute_batch(
                "ALTER TABLE findings ADD COLUMN adversary_profile_relevance TEXT NOT NULL DEFAULT '[]';
                 ALTER TABLE findings ADD COLUMN completeness               TEXT NOT NULL DEFAULT 'present';
                 ALTER TABLE findings ADD COLUMN source_api                 TEXT;
                 ALTER TABLE findings ADD COLUMN source_api_args            TEXT;
                 ALTER TABLE findings ADD COLUMN resolution                 TEXT;",
            )?;
        }

        // New threat-model tables. CREATE TABLE IF NOT EXISTS is idempotent,
        // so a second migration pass leaves the tables alone.
        tx.execute_batch(
            "CREATE TABLE IF NOT EXISTS third_parties (
                rowid               INTEGER PRIMARY KEY,
                identity            TEXT NOT NULL,
                version             TEXT,
                first_seen_location TEXT,
                jurisdictions       TEXT NOT NULL DEFAULT '[]'
            );
            CREATE TABLE IF NOT EXISTS acquisition_metadata (
                rowid              INTEGER PRIMARY KEY CHECK (rowid = 1),
                source_kind        TEXT NOT NULL DEFAULT 'unknown',
                operator           TEXT,
                authority_ref      TEXT,
                acquired_at        TEXT,
                pre_analysis_hash  TEXT
            );
            CREATE TABLE IF NOT EXISTS declared_claims (
                rowid      INTEGER PRIMARY KEY,
                source     TEXT NOT NULL,
                category   TEXT NOT NULL,
                declared   TEXT NOT NULL,
                hash       TEXT NOT NULL
            );",
        )?;
    }

    if current < 3 {
        // Replace the v2 credentials view (which projected only
        // `rowid, severity, confidence, detail, extra`) with a v3 view
        // that exposes the documented `detector`/`raw`/`verified` columns
        // via `json_extract` over the underlying `findings.extra` JSON.
        // The trufflehog ingestion path (`write_credentials_db`) writes
        // `extra = {"detector":..., "raw":..., "verified":...}`; v3 makes
        // those queryable by name to match the audit-response query
        // examples emitted at `mcp/mod.rs:768/770/801`.
        tx.execute_batch(
            "DROP VIEW IF EXISTS credentials;
             CREATE VIEW credentials AS
                 SELECT rowid, severity, confidence, detail,
                        json_extract(extra, '$.detector') AS detector,
                        json_extract(extra, '$.raw')      AS raw,
                        json_extract(extra, '$.verified') AS verified,
                        extra
                 FROM findings WHERE source = 'trufflehog';",
        )?;
    }

    if current < 4 {
        // Modular-modes columns. Nullable so legacy `write_findings_db`
        // (which doesn't know about runs/modes) can keep inserting
        // without re-tagging every row. PRAGMA-guard each column for
        // idempotency (a partially-applied v4 leaves us free to re-run).
        if !findings_has_column(&tx, "run_id")? {
            tx.execute_batch("ALTER TABLE findings ADD COLUMN run_id TEXT;")?;
        }
        if !findings_has_column(&tx, "mode")? {
            tx.execute_batch("ALTER TABLE findings ADD COLUMN mode TEXT;")?;
        }
        if !findings_has_column(&tx, "signature_hash")? {
            tx.execute_batch("ALTER TABLE findings ADD COLUMN signature_hash TEXT;")?;
        }
        // Partial UNIQUE index: legacy NULL-signature rows (from the
        // `write_findings_db` shim, threat-model envelope writer, etc.)
        // coexist freely; only modular-mode rows participate in
        // upsert-by-identity. `IF NOT EXISTS` makes the call idempotent
        // across re-runs that saw the column-add already applied.
        tx.execute_batch(
            "CREATE UNIQUE INDEX IF NOT EXISTS findings_signature_hash_uniq \
             ON findings(signature_hash) WHERE signature_hash IS NOT NULL;",
        )?;
    }

    if current < 5 {
        // Taint-offset wiring: add nullable source_offset / sink_offset to
        // taint_flows for existing DBs. Fresh DBs get the columns from the
        // CREATE TABLE DDL; only v1–v4 DBs that already have taint_flows
        // need ALTER TABLE. Skip entirely when taint_flows doesn't exist
        // yet — it will be created with the correct schema by write_findings_db.
        if taint_flows_has_column(&tx, "func_id")? {
            // Table exists (has at least one old column). Add offset columns.
            if !taint_flows_has_column(&tx, "source_offset")? {
                tx.execute_batch("ALTER TABLE taint_flows ADD COLUMN source_offset INTEGER;")?;
            }
            if !taint_flows_has_column(&tx, "sink_offset")? {
                tx.execute_batch("ALTER TABLE taint_flows ADD COLUMN sink_offset INTEGER;")?;
            }
        }
        // If taint_flows doesn't exist yet, skip: write_findings_db will
        // CREATE it with source_offset + sink_offset already in the DDL.
    }

    if current < 6 {
        // Cross-layer taint composite: new table + FTS5 + 2 indexes + view.
        // Every DDL is `IF NOT EXISTS` so a fresh DB (which gets the same
        // batch from WRITE_FINDINGS_DB_SCHEMA on first creation) is a no-op
        // here.
        tx.execute_batch(CROSS_LAYER_TAINT_FLOWS_SCHEMA)?;
    }

    tx.execute(
        "INSERT INTO schema_meta(schema_rev, applied_at) VALUES (?1, ?2)",
        rusqlite::params![FINDINGS_SCHEMA_REV, chrono::Utc::now().to_rfc3339()],
    )?;
    tx.commit()?;
    Ok(())
}

/// DDL batch for the v6 cross-layer-taint-flows surface. Shared between the
/// fresh-DB path (embedded in [`WRITE_FINDINGS_DB_SCHEMA`]) and the v5→v6
/// migration step in [`migrate_findings_schema_to_current`]. Every statement
/// is `IF NOT EXISTS` so applying twice is a no-op.
const CROSS_LAYER_TAINT_FLOWS_SCHEMA: &str = "
    CREATE TABLE IF NOT EXISTS cross_layer_taint_flows (
        rowid                    INTEGER PRIMARY KEY,
        js_module                TEXT NOT NULL,
        js_method                TEXT NOT NULL,
        dex_idx                  INTEGER NOT NULL,
        method_idx               INTEGER NOT NULL,
        js_func_id               INTEGER NOT NULL,
        native_func_id           INTEGER NOT NULL,
        native_class_descriptor  TEXT,
        native_method_signature  TEXT,
        source_type              TEXT NOT NULL,
        sink_type                TEXT NOT NULL,
        severity                 TEXT NOT NULL,
        cwe                      INTEGER
    );
    CREATE VIRTUAL TABLE IF NOT EXISTS cross_layer_taint_flows_fts
        USING fts5(js_module, js_method, native_class_descriptor,
                   native_method_signature, source_type, sink_type,
                   content=cross_layer_taint_flows, content_rowid=rowid,
                   tokenize='porter unicode61');
    CREATE INDEX IF NOT EXISTS idx_cross_layer_bridge
        ON cross_layer_taint_flows(js_module, js_method);
    CREATE INDEX IF NOT EXISTS idx_cross_layer_severity
        ON cross_layer_taint_flows(severity);
    CREATE VIEW IF NOT EXISTS cross_layer_taint_critical AS
        SELECT rowid, js_module, js_method, native_class_descriptor,
               native_method_signature, source_type, sink_type, severity, cwe
        FROM cross_layer_taint_flows
        WHERE severity IN ('Critical', 'High')
        ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 END,
                 rowid;
";

fn findings_has_column(
    db: &rusqlite::Connection,
    column: &str,
) -> rusqlite::Result<bool> {
    let mut stmt = db.prepare("PRAGMA table_info(findings)")?;
    let mut rows = stmt.query([])?;
    while let Some(row) = rows.next()? {
        let name: String = row.get(1)?;
        if name == column {
            return Ok(true);
        }
    }
    Ok(false)
}

fn taint_flows_has_column(
    db: &rusqlite::Connection,
    column: &str,
) -> rusqlite::Result<bool> {
    let mut stmt = db.prepare("PRAGMA table_info(taint_flows)")?;
    let mut rows = stmt.query([])?;
    while let Some(row) = rows.next()? {
        let name: String = row.get(1)?;
        if name == column {
            return Ok(true);
        }
    }
    Ok(false)
}

/// Stable finding-identity hash used by the v4 modular-modes upsert path.
///
/// The identity tuple is `(layer, id_tag, source, detail)`. Fields fed
/// through ASCII labels (`Layer::as_str`, `Source::as_str`) so a finding's
/// hash does not depend on `Debug`-formatting (which is allowed to change
/// freely for diagnostics). Severity is intentionally NOT part of the
/// identity — re-classification under a calibration update should still
/// upsert as the same finding rather than landing twice.
///
/// Fields are joined with a NUL byte (`0x00`) which can never appear in
/// any production input (id_tag is a Rust identifier, layer/source are
/// closed lowercase enums, detail is human-readable text). Any input
/// that managed to embed a NUL would still produce a deterministic
/// hash; collisions across distinct finding tuples require finding two
/// inputs that differ only in NUL placement, which is not feasible
/// against SHA-256.
///
/// SHA-256 hex output is 64 characters, sized for a TEXT column without
/// growing past sqlite's row-size budget for the rest of `findings`.
pub fn finding_signature_hash(f: &Finding) -> String {
    use sha2::{Digest, Sha256};
    let mut h = Sha256::new();
    h.update(f.layer.as_str().as_bytes());
    h.update(b"\0");
    h.update(f.id.as_bytes());
    h.update(b"\0");
    h.update(f.source.as_str().as_bytes());
    h.update(b"\0");
    h.update(f.detail.as_bytes());
    let digest = h.finalize();
    let mut out = String::with_capacity(64);
    for byte in digest.iter() {
        // Two-hex-char render. `format!` would allocate; this preallocates.
        let hi = byte >> 4;
        let lo = byte & 0x0f;
        out.push(hex_nibble(hi));
        out.push(hex_nibble(lo));
    }
    out
}

#[inline]
pub(super) const fn hex_nibble(n: u8) -> char {
    #[allow(clippy::arithmetic_side_effects, reason = "every arithmetic expression below is bounded by the match arm. `0..=9` ⇒ `b'0' + n ∈ 0x30..=0x39` (no u8 overflow). `10..=15` ⇒ `b'a' + n ∈ 0x6b..=0x70` then `- 10` ⇒ `0x61..=0x66` (no u8 overflow on either step). `_` arm returns a sentinel since `unreachable!()` would trip the panic-deny lint; callers feed `byte >> 4` or `byte & 0x0f` so this arm is never reached.")]
    #[allow(clippy::as_conversions, reason = "PROOF: u8 → char widening; the arm guards confine the u8 to the 0x30..=0x39 (digits) or 0x61..=0x66 (lowercase hex) range, which are all valid ASCII code points. `as char` is the canonical byte-to-ASCII-char conversion.")]
    match n {
        0..=9 => (b'0' + n) as char,
        10..=15 => (b'a' + (n - 10)) as char,
        _ => '?',
    }
}

/// Modular-modes incremental-DB writer: appends `findings` to the audit
/// SQLite at `output` under the supplied `run_id` / `mode` tags, with
/// upsert-by-identity semantics.
///
/// - **`update_db = true` (default for `audit --mode=<mode>`)**: opens
///   the DB additively. `INSERT OR IGNORE` against the partial UNIQUE
///   index on `signature_hash` means re-running the same mode does not
///   accumulate duplicate rows. Running a different mode adds new rows;
///   prior modes' rows remain untouched (the index's IGNORE collision
///   set is bounded to actual signature collisions, not mode-keyed).
///
/// - **`update_db = false` (`--no-update-db`)**: clears prior rows
///   filtered by `(mode == self.mode)` before inserting the new rows.
///   When `mode` is `None`, clears every row in `findings` (legacy
///   one-shot mode — equivalent to `write_findings_db` semantics on a
///   cleared file). The supporting tables (`finding_xrefs`,
///   `taint_flows`, etc.) are not touched here; the caller is
///   responsible for re-populating them.
///
/// Returns the count of newly-inserted rows (i.e. excluding rows
/// suppressed by the UNIQUE-index IGNORE on duplicate identity). The
/// `findings_fts` virtual table is rebuilt at the end so FTS reads see
/// the freshly-inserted rows.
pub fn write_findings_db_with_run(
    findings: &[Finding],
    output: &std::path::Path,
    run_id: Option<&str>,
    mode: Option<&str>,
    update_db: bool,
) -> anyhow::Result<usize> {
    let mut db = rusqlite::Connection::open(output)?;
    ensure_findings_schema(&mut db)?;

    if !update_db {
        // One-shot semantics. If `mode` is set, scope the delete; else
        // clear the whole table (legacy `write_findings_db` shape).
        match mode {
            Some(m) => {
                db.execute("DELETE FROM findings WHERE mode = ?1", rusqlite::params![m])?;
            }
            None => {
                db.execute("DELETE FROM findings", [])?;
            }
        }
    }

    let tx = db.transaction()?;
    let mut inserted = 0usize;
    {
        let mut stmt = tx.prepare(
            "INSERT OR IGNORE INTO findings \
             (severity, layer, id_tag, gauge_class, source, confidence, detail, cwe, extra, run_id, mode, signature_hash) \
             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
        )?;
        for f in findings {
            let sig = finding_signature_hash(f);
            let n = stmt.execute(rusqlite::params![
                format!("{:?}", f.severity),
                format!("{:?}", f.layer),
                f.id,
                f.gauge_class.as_str(),
                f.source.as_str(),
                f.confidence.as_str(),
                f.detail,
                f.cwe,
                f.extra,
                run_id,
                mode,
                sig,
            ])?;
            // `INSERT OR IGNORE` returns 0 on conflict-suppress, 1 on
            // actual insert. Saturating-add is exact for any realistic
            // findings.len() < usize::MAX.
            //
            // PROOF: `n` is `INSERT OR IGNORE` result, in {0, 1}; bounded.
            #[allow(clippy::arithmetic_side_effects, reason = "`n` is `INSERT OR IGNORE` result, in {0, 1}; bounded.")]
            { inserted += n; }
        }
    }
    tx.commit()?;
    db.execute("INSERT INTO findings_fts(findings_fts) VALUES('rebuild')", [])?;
    Ok(inserted)
}

/// Apply the `write_findings_db` schema-create batch to `db`. Factored
/// out so `write_findings_db_with_run` can opt into the same DDL on a
/// fresh DB without duplicating the SQL string.
fn ensure_findings_schema(db: &mut rusqlite::Connection) -> anyhow::Result<()> {
    db.execute_batch(WRITE_FINDINGS_DB_SCHEMA)?;
    migrate_findings_schema_to_current(db)?;
    Ok(())
}

/// Path-taking wrapper around `ensure_findings_schema` for callers
/// that hold a `&Path` rather than an open `Connection`. Used by
/// `crate::semgrep::run::run_and_persist` to ensure the findings table
/// plus `semgrep_hits` / `semgrep_hotspots` views exist before
/// [`write_semgrep_db`] inserts into them. Idempotent — safe to call
/// against a DB that already has the schema (every DDL is
/// `CREATE … IF NOT EXISTS` and the migration table tracks applied revs).
pub fn ensure_findings_db_schema(output: &std::path::Path) -> anyhow::Result<()> {
    let mut db = rusqlite::Connection::open(output)?;
    ensure_findings_schema(&mut db)
}

/// Schema-create batch shared by `write_findings_db` (legacy shim) and
/// `write_findings_db_with_run` (modular-modes path). Kept verbatim to
/// `IF NOT EXISTS` form for idempotency across both callers.
const WRITE_FINDINGS_DB_SCHEMA: &str =
    "
        CREATE TABLE IF NOT EXISTS findings (
            rowid          INTEGER PRIMARY KEY,
            severity       TEXT NOT NULL,
            layer          TEXT NOT NULL,
            id_tag         TEXT NOT NULL,
            gauge_class    TEXT NOT NULL DEFAULT 'Semantic',
            source         TEXT NOT NULL DEFAULT 'manifest',
            confidence     TEXT NOT NULL DEFAULT 'unverified',
            dismiss_reason TEXT,
            detail         TEXT NOT NULL,
            cwe            INTEGER,
            extra          TEXT
        );
        CREATE VIRTUAL TABLE IF NOT EXISTS findings_fts
            USING fts5(detail, id_tag, source, content=findings, content_rowid=rowid, tokenize='porter unicode61');

        CREATE VIEW IF NOT EXISTS credentials AS
            SELECT rowid, severity, confidence, detail,
                   json_extract(extra, '$.detector') AS detector,
                   json_extract(extra, '$.raw')      AS raw,
                   json_extract(extra, '$.verified') AS verified,
                   extra
            FROM findings WHERE source = 'trufflehog';

        CREATE TABLE IF NOT EXISTS xrefs (
            rowid         INTEGER PRIMARY KEY,
            layer         TEXT NOT NULL,
            string_value  TEXT NOT NULL,
            function_id   INTEGER,
            function_name TEXT
        );
        CREATE VIRTUAL TABLE IF NOT EXISTS xrefs_fts
            USING fts5(string_value, function_name, content=xrefs, content_rowid=rowid, tokenize='porter unicode61');

        CREATE TABLE IF NOT EXISTS taint_flows (
            rowid          INTEGER PRIMARY KEY,
            layer          TEXT NOT NULL,
            func_id        INTEGER NOT NULL,
            source_type    TEXT NOT NULL,
            sink_type      TEXT NOT NULL,
            severity       TEXT NOT NULL,
            cwe            INTEGER,
            source_offset  INTEGER,
            sink_offset    INTEGER
        );
        CREATE VIRTUAL TABLE IF NOT EXISTS taint_flows_fts
            USING fts5(source_type, sink_type, content=taint_flows, content_rowid=rowid, tokenize='porter unicode61');

        CREATE TABLE IF NOT EXISTS cross_layer_taint_flows (
            rowid                    INTEGER PRIMARY KEY,
            js_module                TEXT NOT NULL,
            js_method                TEXT NOT NULL,
            dex_idx                  INTEGER NOT NULL,
            method_idx               INTEGER NOT NULL,
            js_func_id               INTEGER NOT NULL,
            native_func_id           INTEGER NOT NULL,
            native_class_descriptor  TEXT,
            native_method_signature  TEXT,
            source_type              TEXT NOT NULL,
            sink_type                TEXT NOT NULL,
            severity                 TEXT NOT NULL,
            cwe                      INTEGER
        );
        CREATE VIRTUAL TABLE IF NOT EXISTS cross_layer_taint_flows_fts
            USING fts5(js_module, js_method, native_class_descriptor,
                       native_method_signature, source_type, sink_type,
                       content=cross_layer_taint_flows, content_rowid=rowid,
                       tokenize='porter unicode61');
        CREATE INDEX IF NOT EXISTS idx_cross_layer_bridge
            ON cross_layer_taint_flows(js_module, js_method);
        CREATE INDEX IF NOT EXISTS idx_cross_layer_severity
            ON cross_layer_taint_flows(severity);
        CREATE VIEW IF NOT EXISTS cross_layer_taint_critical AS
            SELECT rowid, js_module, js_method, native_class_descriptor,
                   native_method_signature, source_type, sink_type, severity, cwe
            FROM cross_layer_taint_flows
            WHERE severity IN ('Critical', 'High')
            ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 END,
                     rowid;

        CREATE VIEW IF NOT EXISTS semgrep_hits AS
            SELECT rowid, severity, id_tag, detail,
                   json_extract(extra, '$.class_name') AS class_name,
                   json_extract(extra, '$.check_id') AS check_id,
                   json_extract(extra, '$.message') AS message
            FROM findings WHERE source = 'semgrep';

        CREATE TABLE IF NOT EXISTS finding_xrefs (
            rowid         INTEGER PRIMARY KEY,
            finding_rowid INTEGER NOT NULL,
            layer         TEXT NOT NULL,
            string_value  TEXT NOT NULL,
            function_name TEXT NOT NULL
        );
        CREATE VIRTUAL TABLE IF NOT EXISTS finding_xrefs_fts
            USING fts5(string_value, function_name, content=finding_xrefs, content_rowid=rowid, tokenize='porter unicode61');

        CREATE VIEW IF NOT EXISTS actionable_findings AS
            SELECT rowid, severity, id_tag, source, confidence, detail, cwe
            FROM findings
            WHERE severity IN ('Critical', 'High') AND gauge_class = 'Semantic'
              AND confidence != 'dismissed'
            ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 END, rowid;

        CREATE VIEW IF NOT EXISTS dismissed_findings AS
            SELECT rowid, severity, id_tag, source, detail, dismiss_reason
            FROM findings WHERE confidence = 'dismissed';

        CREATE VIEW IF NOT EXISTS confirmed_findings AS
            SELECT rowid, severity, id_tag, source, detail, cwe
            FROM findings WHERE confidence = 'confirmed';

        CREATE VIEW IF NOT EXISTS confidence_summary AS
            SELECT source, confidence, COUNT(*) AS n
            FROM findings GROUP BY source, confidence ORDER BY source, confidence;

        CREATE VIEW IF NOT EXISTS finding_context AS
            SELECT f.rowid AS finding_rowid, f.severity, f.id_tag, f.detail,
                   fx.string_value, fx.function_name, fx.layer
            FROM findings f
            JOIN finding_xrefs fx ON f.rowid = fx.finding_rowid
            ORDER BY f.severity, fx.string_value;

        CREATE VIEW IF NOT EXISTS finding_urls AS
            SELECT DISTINCT f.id_tag, f.severity, fx.string_value AS url
            FROM findings f
            JOIN finding_xrefs fx ON f.rowid = fx.finding_rowid
            WHERE fx.string_value LIKE 'http%' OR fx.string_value LIKE '%://%'
            ORDER BY f.severity, fx.string_value;

        CREATE VIEW IF NOT EXISTS taint_critical AS
            SELECT func_id, source_type, sink_type, severity, cwe, layer
            FROM taint_flows
            WHERE severity IN ('Critical', 'High')
            ORDER BY CASE severity WHEN 'Critical' THEN 0 WHEN 'High' THEN 1 END;

        CREATE VIEW IF NOT EXISTS semgrep_hotspots AS
            SELECT class_name, COUNT(*) AS n, GROUP_CONCAT(DISTINCT check_id) AS rules
            FROM semgrep_results
            GROUP BY class_name
            HAVING n > 1
            ORDER BY n DESC;

        CREATE VIEW IF NOT EXISTS audit_summary AS
            SELECT
                (SELECT COUNT(*) FROM findings) AS total_findings,
                (SELECT COUNT(*) FROM findings WHERE severity IN ('Critical','High') AND gauge_class='Semantic' AND confidence!='dismissed') AS actionable,
                (SELECT COUNT(*) FROM findings WHERE source='taint') AS taint_flows,
                (SELECT COUNT(*) FROM findings WHERE source='trufflehog') AS credential_hits,
                (SELECT COUNT(*) FROM findings WHERE source='semgrep') AS semgrep_hits,
                (SELECT COUNT(*) FROM findings WHERE confidence='confirmed') AS confirmed,
                (SELECT COUNT(*) FROM findings WHERE confidence='dismissed') AS dismissed,
                (SELECT COUNT(*) FROM finding_xrefs) AS finding_xrefs;
        ";

/// Legacy single-shot findings writer. Backed by the modular-modes
/// upsert path with `(run_id=None, mode=None, update_db=true)`, which
/// matches the prior "open or create the DB, append findings"
/// semantics. Rows inserted via this path leave `run_id` and `mode`
/// NULL and DO carry a `signature_hash` (so callers that later switch
/// to `write_findings_db_with_run` get upsert-by-identity for free).
///
/// Returns the number of newly-inserted rows. Repeated calls with the
/// same `findings` slice on the same `output` path return 0 on
/// subsequent calls because the partial UNIQUE index suppresses
/// re-inserts of identical signature hashes — a strictly stronger
/// guarantee than the prior "always reports findings.len()" shape, but
/// useful for callers that want the actual delta.
pub fn write_findings_db(
    findings: &[Finding],
    output: &std::path::Path,
) -> anyhow::Result<usize> {
    write_findings_db_with_run(findings, output, None, None, true)
}

/// Detector-local FP suppressor for trufflehog NDJSON ingestion.
///
/// Returns `true` if the `(detector, raw)` pair is a known false-positive
/// shape that should be dropped before insertion into the `findings`
/// table. Returns `false` (admit the row) for every detector this
/// function does not have an explicit calibration for — i.e. the default
/// is fail-open, NOT fail-closed.
///
/// **Why this lives at ingestion, not as a CLI flag.** Trufflehog's
/// upstream regex for the Square detector matches the substring `sq0`
/// followed by 30-43 chars of base64-ish content (`[A-Za-z0-9_-]`).
/// On a 4.8 MB DEX with thousands of long base64-encoded resource
/// identifiers (Facebook Audience Network's `assets/audience_network.dex`
/// is the motivating case), substring hits are inevitable and
/// meaningless: a class-metadata entry like `Lcom/foo/sq0abcXYZ...`
/// trips the upstream regex but is not a Square API key.
///
/// The fix is detector-local regex anchoring per Square's documented
/// key shapes (Square API documentation):
///
/// - Production access token: `^sq0atp-[A-Za-z0-9_-]{43}$` (50 chars total).
/// - Production refresh token: `^sq0rtp-[A-Za-z0-9_-]{43}$` (50 chars total).
/// - Sandbox access token: `^sandbox-sq0[a-z]tb-[A-Za-z0-9_-]{22}$`.
/// - Application ID: `^sq0idp-[A-Za-z0-9_-]{22}$` (29 chars total).
///
/// A `Raw` value emitted by trufflehog under `DetectorName: "Square"`
/// that does NOT match any of those four anchored shapes is a
/// substring-shaped FP and is suppressed.
///
/// Siblings (Circle, Honeycomb) extend this function with their own
/// detector branches as they land. The shape is intentionally
/// extension-friendly: each detector gets its own branch, no shared
/// state across detectors.
fn trufflehog_is_known_fp(
    detector: &str,
    raw: &str,
    square_set: &regex::RegexSet,
) -> bool {
    match detector {
        "Square" => !square_set.is_match(raw),
        _ => false,
    }
}

/// Build the anchored Square-key regex set used by
/// [`trufflehog_is_known_fp`]. Constructed once per
/// [`write_credentials_db`] call, outside the per-row loop. Returns the
/// typed `regex::Error` via `?` (typed `Err` on regex-eval failures,
/// no `.unwrap()` on regex construction).
fn square_key_regex_set() -> Result<regex::RegexSet, regex::Error> {
    // Square API key shapes per Square's documentation. Anchored on
    // both ends so substring `sq0` hits inside other strings (DEX class
    // metadata, base64-encoded resource identifiers) are rejected.
    regex::RegexSet::new([
        // Production access token: sq0atp- + 43 chars.
        r"^sq0atp-[A-Za-z0-9_-]{43}$",
        // Production refresh token: sq0rtp- + 43 chars.
        r"^sq0rtp-[A-Za-z0-9_-]{43}$",
        // Sandbox access token: sandbox-sq0[a-z]tb- + 22 chars.
        // The internal `[a-z]` covers documented variants (atb/rtb).
        r"^sandbox-sq0[a-z]tb-[A-Za-z0-9_-]{22}$",
        // Application ID: sq0idp- + 22 chars.
        r"^sq0idp-[A-Za-z0-9_-]{22}$",
    ])
}

/// Append trufflehog NDJSON output to the `credentials` table in the audit DB.
/// Append trufflehog NDJSON output as findings with source='trufflehog'.
///
/// Detector-local FP suppression applies before insertion. Four filters
/// run in sequence per line:
///
/// 1. `trufflehog_is_known_fp` (Square) — substring-shaped FPs are
///    dropped silently and do NOT contribute to the returned count.
/// 2. `credentials_fp::classify_circle_raw_shape` (Circle) — Circle
///    /CircleCI hits whose `Raw` matches an EIP-55 Ethereum address or
///    ASN.1 DER ECDSA signature prefix are downgraded Critical → Info
///    with `id_tag` suffix `_FP_<SHAPE>` and `extra.fp_shape` recorded.
/// 3. `honeycomb_fp` (Honeycomb) — Honeycomb hits whose `Raw` matches
///    a Java class-name shape (`Honeycomb[A-Z][A-Za-z0-9_$]*`) are
///    downgraded Critical → Info with `id_tag` suffix `_FP_ANDROID_CLASS`.
///    Honeycomb hits whose `Raw` matches a documented key anchor (32
///    lowercase hex / `hc[abc]i[ack]_<28-alphanum>` ingest key) are
///    re-promoted to Critical, overriding row-1a Low downgrade,
///    with `extra.anchor` recording which shape matched.
/// 4. [`crate::detectors::calibrate`] (detector-pattern-noise-calibration)
///    — per-line `(detector, raw)` pair routed through:
///    - `Calibration::Skip` — line dropped (class-name FP shapes).
///    - `Calibration::Emit(CalibratedSeverity::{Critical, Low})`.
///
/// Real-positive surface preserved: `Verified: true` findings bypass the
/// Circle + Honeycomb FP shape suppressors + calibration entirely;
/// verified credentials always emit at Critical.
pub fn write_credentials_db(
    trufflehog_ndjson: &str,
    output: &std::path::Path,
) -> anyhow::Result<usize> {
    use crate::detectors::{calibrate, Calibration};

    // Pre-compile the Square anchored-prefix regex set once outside the
    // row loop. `?` propagates any regex-construction error typed.
    let square_set = square_key_regex_set()?;

    let mut db = rusqlite::Connection::open(output)?;
    let tx = db.transaction()?;
    let mut count = 0usize;
    {
        let mut stmt = tx.prepare(
            "INSERT INTO findings (severity, layer, id_tag, gauge_class, source, confidence, detail, cwe, extra) \
             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
        )?;
        for line in trufflehog_ndjson.lines() {
            let Ok(v): Result<serde_json::Value, _> = serde_json::from_str(line) else {
                continue;
            };
            let detector = v.get("DetectorName").and_then(|s| s.as_str()).unwrap_or("unknown");
            let raw = v.get("Raw").and_then(|s| s.as_str()).unwrap_or("");
            // Detector-local FP suppression. Known substring-shaped FPs
            // (e.g. Square's `sq0` substring without the full anchored
            // prefix) are dropped before insertion.
            if trufflehog_is_known_fp(detector, raw, &square_set) {
                continue;
            }
            // Trufflehog's `Verified: bool` field — survives the trip into
            // `extra` JSON so the v3 `credentials` view's `verified` column
            // (json_extract(extra,'$.verified')) returns 1/0 for the
            // documented `WHERE verified=1` query.
            let verified = v.get("Verified").and_then(|s| s.as_bool()).unwrap_or(false);

            // Filter 2: Circle/CircleCI FP shape — downgrade Critical → Info
            // with shape tag. Only applies when verified=false (active-verification
            // confirmation overrides any shape heuristic).
            let circle_fp_shape = if !verified && credentials_fp::is_circle_detector(detector) {
                credentials_fp::classify_circle_raw_shape(raw)
            } else {
                None
            };

            // Filter 3: Honeycomb FP shape + real-key anchor. Two outcomes:
            //   - class-name shape → Info downgrade with `_FP_ANDROID_CLASS` tag
            //   - real-key anchor → Critical re-promotion (overrides row-1a Low)
            // Verified=true bypasses both. Anchor is checked first; if a raw
            // matches the anchor we keep it Critical even though the class-name
            // gate would not match anyway.
            let honeycomb_fp_shape = if !verified && honeycomb_fp::is_honeycomb_detector(detector) {
                honeycomb_fp::classify_honeycomb_raw_shape(raw)
            } else {
                None
            };
            let honeycomb_anchor = if !verified && honeycomb_fp::is_honeycomb_detector(detector) {
                honeycomb_fp::classify_honeycomb_raw_anchor(raw)
            } else {
                None
            };

            // Filter 4 (compose with circle + honeycomb): #5 multi-detector
            // calibration runs for non-Circle non-Honeycomb-anchor non-verified
            // hits. Verified bypasses all filters.
            let (severity, id_tag, detail, extra) = if let Some(shape) = circle_fp_shape {
                // Circle FP shape: Info with shape tag (overrides #5 for these hits)
                let id_tag = format!(
                    "CREDENTIAL_{}_FP_{}",
                    detector.to_uppercase(),
                    shape.as_tag()
                );
                let detail = format!(
                    "{}: pattern match downgraded — raw matches well-known FP shape ({})",
                    detector,
                    shape.as_tag()
                );
                let extra = serde_json::json!({
                    "detector": detector,
                    "raw": raw,
                    "verified": verified,
                    "fp_shape": shape.as_tag(),
                })
                .to_string();
                ("Info", id_tag, detail, extra)
            } else if let Some(shape) = honeycomb_fp_shape {
                // Honeycomb class-name FP: Info with shape tag (overrides #5
                // row-1a Low). Mirrors the Circle `_FP_<SHAPE>` convention —
                // preserves the audit trail rather than dropping the line.
                let id_tag = format!(
                    "CREDENTIAL_{}_FP_{}",
                    detector.to_uppercase(),
                    shape.as_tag()
                );
                let detail = format!(
                    "{}: pattern match downgraded — raw matches AOSP/SDK class identifier shape ({})",
                    detector,
                    shape.as_tag()
                );
                let extra = serde_json::json!({
                    "detector": detector,
                    "raw": raw,
                    "verified": verified,
                    "fp_shape": shape.as_tag(),
                })
                .to_string();
                ("Info", id_tag, detail, extra)
            } else if let Some(anchor) = honeycomb_anchor {
                // Honeycomb real-key anchor: Critical re-promotion (overrides
                // #5's row-1a blanket Low downgrade for the documented key
                // shapes). The detectors module's docstring (`src/detectors/mod.rs:18-23`)
                // explicitly anticipates this re-promotion.
                let id_tag = format!("CREDENTIAL_{}", detector.to_uppercase());
                let detail = format!(
                    "{}: pattern match on credential material (unverified; raw matches documented key anchor {})",
                    detector,
                    anchor.as_tag()
                );
                let extra = serde_json::json!({
                    "detector": detector,
                    "raw": raw,
                    "verified": verified,
                    "anchor": anchor.as_tag(),
                })
                .to_string();
                ("Critical", id_tag, detail, extra)
            } else if verified {
                // Verified=true: Critical regardless of calibration.
                let id_tag = format!("CREDENTIAL_{}", detector.to_uppercase());
                let detail = format!(
                    "{}: pattern match on credential material (verified)",
                    detector
                );
                let extra = serde_json::json!({
                    "detector": detector,
                    "raw": raw,
                    "verified": true,
                })
                .to_string();
                ("Critical", id_tag, detail, extra)
            } else {
                // Unverified non-Circle: run through #5 multi-detector calibration.
                match calibrate(detector, raw) {
                    Calibration::Skip => continue,
                    Calibration::Emit(sev) => {
                        let id_tag = format!("CREDENTIAL_{}", detector.to_uppercase());
                        let detail = format!(
                            "{}: pattern match on credential material (unverified)",
                            detector
                        );
                        let extra = serde_json::json!({
                            "detector": detector,
                            "raw": raw,
                            "verified": false,
                        })
                        .to_string();
                        (sev.as_str(), id_tag, detail, extra)
                    }
                }
            };


            stmt.execute(rusqlite::params![
                severity, "Apk", &id_tag, "Semantic", "trufflehog", "unverified",
                &detail, Option::<u16>::None, &extra,
            ])?;
            // DISPLAY-ONLY: returned row count; saturates to stay monotonic.
            count = count.saturating_add(1);
        }
    }
    tx.commit()?;
    db.execute("INSERT INTO findings_fts(findings_fts) VALUES('rebuild')", [])?;
    Ok(count)
}


/// Append xref rows to the audit DB.
/// `entries` is a slice of (layer, string_value, function_id, function_name).
pub fn write_xrefs_db(
    entries: &[(String, String, Option<u32>, String)],
    output: &std::path::Path,
) -> anyhow::Result<usize> {
    let mut db = rusqlite::Connection::open(output)?;
    let tx = db.transaction()?;
    {
        let mut stmt = tx.prepare(
            "INSERT INTO xrefs (layer, string_value, function_id, function_name) VALUES (?1, ?2, ?3, ?4)",
        )?;
        for (layer, sv, fid, fname) in entries {
            stmt.execute(rusqlite::params![layer, sv, fid, fname])?;
        }
    }
    tx.commit()?;
    db.execute("INSERT INTO xrefs_fts(xrefs_fts) VALUES('rebuild')", [])?;
    Ok(entries.len())
}

/// Persist semgrep JSON results as findings with source='semgrep'.
pub fn write_semgrep_db(
    semgrep_json: &str,
    output: &std::path::Path,
) -> anyhow::Result<usize> {
    let parsed: serde_json::Value = serde_json::from_str(semgrep_json)?;
    let results = parsed
        .get("results")
        .and_then(|v| v.as_array())
        .cloned()
        .unwrap_or_default();
    let mut db = rusqlite::Connection::open(output)?;
    let tx = db.transaction()?;
    let mut count = 0usize;
    {
        let mut stmt = tx.prepare(
            "INSERT INTO findings (severity, layer, id_tag, gauge_class, source, confidence, detail, cwe, extra) \
             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
        )?;
        for r in &results {
            let check_id = r["check_id"].as_str().unwrap_or("");
            let path = r.get("path").and_then(Value::as_str).unwrap_or("");
            let class_name = std::path::Path::new(path)
                .file_stem()
                .and_then(|s| s.to_str())
                .map(|s| s.replace('_', "."));
            let start_line = r.get("start").and_then(|v| v.get("line")).and_then(Value::as_i64);
            let end_line = r.get("end").and_then(|v| v.get("line")).and_then(Value::as_i64);
            let extra = r.get("extra");
            let sg_severity = extra
                .and_then(|e| e.get("severity"))
                .and_then(Value::as_str)
                .unwrap_or("INFO");
            let message = extra
                .and_then(|e| e.get("message"))
                .and_then(Value::as_str)
                .unwrap_or("");
            let metadata = extra.and_then(|e| e.get("metadata"));
            let cwe_str = metadata
                .and_then(|m| m.get("cwe"))
                .and_then(Value::as_array)
                .map(|arr| arr.iter().filter_map(Value::as_str).collect::<Vec<_>>().join("; "));
            let category = metadata.and_then(|m| m.get("category")).and_then(Value::as_str);

            let severity = match sg_severity {
                "ERROR" => "High",
                "WARNING" => "Medium",
                _ => "Info",
            };
            let id_tag = format!("SEMGREP_{}", check_id);
            let detail = format!("{}: {} ({})", check_id, message, class_name.as_deref().unwrap_or(path));
            let extra = serde_json::json!({
                "check_id": check_id, "path": path, "class_name": class_name,
                "start_line": start_line, "end_line": end_line,
                "message": message, "cwe": cwe_str, "category": category,
            }).to_string();

            stmt.execute(rusqlite::params![
                severity, "Dex", &id_tag, "Semantic", "semgrep", "unverified",
                &detail, Option::<u16>::None, &extra,
            ])?;
            // DISPLAY-ONLY: returned row count; saturates to stay monotonic.
            count = count.saturating_add(1);
        }
    }
    tx.commit()?;
    if count > 0 {
        db.execute("INSERT INTO findings_fts(findings_fts) VALUES('rebuild')", [])?;
    }
    Ok(count)
}

/// Triage a finding: confirm or dismiss.
pub fn triage_finding(
    db_path: &std::path::Path,
    rowid: i64,
    action: &str,
    reason: Option<&str>,
) -> anyhow::Result<serde_json::Value> {
    let db = rusqlite::Connection::open(db_path)?;
    match action {
        "confirm" => {
            db.execute(
                "UPDATE findings SET confidence = 'confirmed' WHERE rowid = ?1",
                rusqlite::params![rowid],
            )?;
        }
        "dismiss" => {
            let reason = reason.unwrap_or("dismissed by analyst");
            db.execute(
                "UPDATE findings SET confidence = 'dismissed', dismiss_reason = ?1 WHERE rowid = ?2",
                rusqlite::params![reason, rowid],
            )?;
        }
        _ => anyhow::bail!("action must be 'confirm' or 'dismiss'"),
    }
    // Return the updated finding
    let mut stmt = db.prepare(
        "SELECT rowid, severity, id_tag, source, confidence, dismiss_reason, detail FROM findings WHERE rowid = ?1",
    )?;
    let row = stmt.query_row(rusqlite::params![rowid], |r| {
        Ok(serde_json::json!({
            "rowid": r.get::<_, i64>(0)?,
            "severity": r.get::<_, String>(1)?,
            "id_tag": r.get::<_, String>(2)?,
            "source": r.get::<_, String>(3)?,
            "confidence": r.get::<_, String>(4)?,
            "dismiss_reason": r.get::<_, Option<String>>(5)?,
            "detail": r.get::<_, String>(6)?,
        }))
    })?;
    Ok(row)
}

/// Persist finding-scoped xrefs to the audit DB.
///
/// For each High/Critical finding, extracts the Dalvik class descriptor
/// embedded in the `detail` string (pattern `Lcom/foo/Bar;`) and writes
/// every string constant loaded by any method of that class as a
/// `finding_xrefs` row, joined to the finding via its DB rowid.
///
/// **Root-cause fix:** DEX_TAINT_FLOW / BRIDGE_TAINT_FLOW findings
/// previously stored no class descriptor in their detail string —
/// only `func #{:#x}` — so `target_classes` was always empty and the
/// function returned 0.  The fix adds `class_descriptor` (the containing
/// method's Dalvik descriptor) to the `TaintFinding` struct and embeds it
/// in the detail string via `taint_finding_to_finding`, so the regex
/// extraction here works for taint-flow findings as well as manifest/APK
/// findings that always carried descriptors.
///
/// # Failure modes
/// - `require_apk` returns `Err` → propagated (no APK context, no DEX bytes).
/// - `Xrefs::build` fails for one DEX layer → that layer is skipped.
/// - No High/Critical findings with extractable class refs → returns `Ok(0)`.
///
// PROOF: `i + 1` dex-layer label bounded by ctx.dex.len() ≤ isize::MAX.
#[allow(clippy::arithmetic_side_effects, clippy::as_conversions, reason = "`i + 1` dex-layer label bounded by ctx.dex.len() ≤ isize::MAX.")]
pub fn write_finding_xrefs_db(
    ctx: &CrossLayerContext,
    _findings: &[Finding],
    output: &std::path::Path,
) -> anyhow::Result<usize> {
    use std::collections::HashMap;

    // Regex matches Dalvik class descriptors embedded in detail strings:
    //   "... in Lcom/foo/Bar; ..."  →  cap[1] = "com/foo/Bar"
    let class_ref_re = regex::Regex::new(r"L([a-zA-Z0-9_$/]+);")?;

    // ── Step 1: read finding rowids and extract target classes from DB ──────
    // class_to_rowids: dotted class name → list of DB rowids for findings
    // that mention that class.  One class can match multiple findings (e.g.
    // two taint flows in the same class), and one finding can add multiple
    // classes.
    let mut db = rusqlite::Connection::open(output)?;
    let mut class_to_rowids: HashMap<String, Vec<i64>> = HashMap::new();

    {
        let mut stmt = db.prepare(
            "SELECT rowid, detail FROM findings WHERE severity IN ('High', 'Critical')",
        )?;
        let mut rows = stmt.query([])?;
        while let Some(row) = rows.next()? {
            let rowid: i64 = row.get(0)?;
            let detail: String = row.get(1)?;
            for cap in class_ref_re.captures_iter(&detail) {
                let class_dotted = cap[1].replace('/', ".");
                class_to_rowids.entry(class_dotted).or_default().push(rowid);
            }
        }
    }

    if class_to_rowids.is_empty() {
        // No High/Critical findings carry extractable class descriptors.
        return Ok(0);
    }

    // ── Step 2: walk xrefs per dex layer and insert finding_xrefs rows ─────
    let apk = ctx.require_apk()?;
    let tx = db.transaction()?;
    let mut count = 0usize;
    {
        let mut stmt = tx.prepare(
            "INSERT INTO finding_xrefs (finding_rowid, layer, string_value, function_name) \
             VALUES (?1, ?2, ?3, ?4)",
        )?;

        for ((li, dex), apk_dex) in ctx.dex.iter().enumerate().zip(apk.dex.iter()) {
            let label = format!("dex{}", li + 1);
            let raw = &apk_dex.data;
            let xrefs = match droidsaw_dex::xrefs::Xrefs::build(dex, raw) {
                Ok(x) => x,
                Err(_) => continue,
            };
            // For each (string, method) pair: if the method's class appears in
            // class_to_rowids, emit a finding_xrefs row for each matching finding.
            for (s, methods) in &xrefs.string_to_methods {
                for m in methods {
                    let class_dotted = m.class.trim_start_matches('L')
                        .trim_end_matches(';')
                        .replace('/', ".");
                    let Some(rowids) = class_to_rowids.get(&class_dotted) else {
                        continue;
                    };
                    let func_name = format!("{}->{}{}", m.class, m.name, m.proto);
                    for &rowid in rowids {
                        stmt.execute(rusqlite::params![rowid, &label, s, &func_name])?;
                        count = count.saturating_add(1);
                    }
                }
            }
        }
    }
    tx.commit()?;
    if count > 0 {
        db.execute(
            "INSERT INTO finding_xrefs_fts(finding_xrefs_fts) VALUES('rebuild')",
            [],
        )?;
    }
    Ok(count)
}

/// Append taint flow rows to the audit DB.
/// Each row records a source→sink edge: the layer, function ID, source type,
/// sink type, severity, and CWE. Use `db_query` with FTS5 to search:
///   `SELECT * FROM taint_flows_fts WHERE taint_flows_fts MATCH 'SqlExecute'`
pub fn write_taint_flows_db(
    findings: &[Finding],
    output: &std::path::Path,
) -> anyhow::Result<usize> {
    let mut db = rusqlite::Connection::open(output)?;
    let tx = db.transaction()?;
    let mut count = 0usize;
    {
        let mut stmt = tx.prepare(
            "INSERT INTO taint_flows \
             (layer, func_id, source_type, sink_type, severity, cwe, source_offset, sink_offset) \
             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
        )?;
        for f in findings.iter().filter(|f| matches!(f.id.as_str(), "DEX_TAINT_FLOW" | "BRIDGE_TAINT_FLOW" | "HBC_TAINT_FLOW")) {
            // Source/sink come from the detail string; func_id rides the
            // typed `Finding.func_id` field (set by the 3 taint factories
            // via `.with_func()`). Round-tripping the func_id through
            // `(func #{:#x}; ...)` text was lossy on the `;` suffix —
            // the v2 fix reads it directly off the typed column. See
            // `tests/db_taint_flows_func_id.rs` for the regression.
            //
            // source_offset / sink_offset are extracted from Finding.extra JSON
            // (set by `taint_finding_to_finding` for DEX_TAINT_FLOW; NULL for
            // HBC_TAINT_FLOW and BRIDGE_TAINT_FLOW which have no instruction addr).
            let (source_type, sink_type) = parse_taint_source_sink(&f.detail);
            let func_id = f.func_id.unwrap_or(0);
            let (source_offset, sink_offset): (Option<i64>, Option<i64>) = f.extra
                .as_deref()
                .and_then(|s| serde_json::from_str::<serde_json::Value>(s).ok())
                .map(|v| {
                    let src = v.get("source_offset").and_then(|x| x.as_i64());
                    let snk = v.get("sink_offset").and_then(|x| x.as_i64());
                    (src, snk)
                })
                .unwrap_or((None, None));
            stmt.execute(rusqlite::params![
                format!("{:?}", f.layer),
                func_id,
                source_type,
                sink_type,
                format!("{:?}", f.severity),
                f.cwe,
                source_offset,
                sink_offset,
            ])?;
            // DISPLAY-ONLY: returned row count.
            count = count.saturating_add(1);
        }
    }
    tx.commit()?;
    if count > 0 {
        db.execute("INSERT INTO taint_flows_fts(taint_flows_fts) VALUES('rebuild')", [])?;
    }
    Ok(count)
}

/// Append cross-layer taint composite rows to the audit DB.
///
/// One row per `CROSS_LAYER_TAINT_FLOW` `Finding` produced by
/// [`crate::analysis::cross_layer_stitch`]. The full
/// `CrossLayerTaintFinding<MethodIdx>` payload rides
/// `Finding.extra` (composite_to_finding writes it via
/// `serde_json::to_string`); this writer deserializes it back symmetrically
/// and projects each field into a typed column. Findings with absent or
/// unparseable `extra` are skipped (no panic — pre-Phase-5 DBs sometimes
/// carried bytes that were trimmed; survival over panic is the
/// hardening contract).
///
/// Use `db_query` with FTS5 to search across modules / methods / sinks /
/// native targets:
///   `SELECT * FROM cross_layer_taint_flows_fts WHERE cross_layer_taint_flows_fts MATCH 'Storage'`
///
/// Wraps the insert loop in `db.transaction()` — rusqlite's canonical
/// explicit-BEGIN/COMMIT pattern, avoiding the 100× fsync penalty of
/// per-row autocommit. The `cross_layer_taint_flows_fts` virtual table
/// is rebuilt at the end so FTS reads see the freshly-inserted rows.
///
/// Returns the count of inserted rows (excluding skipped ones).
pub fn write_cross_layer_taint_flows_db(
    findings: &[Finding],
    output: &std::path::Path,
) -> anyhow::Result<usize> {
    use droidsaw_common::cross_layer_taint::CrossLayerTaintFinding;
    use droidsaw_dex::ids::MethodIdx;

    let mut db = rusqlite::Connection::open(output)?;
    let tx = db.transaction()?;
    let mut count = 0usize;
    {
        let mut stmt = tx.prepare(
            "INSERT INTO cross_layer_taint_flows \
             (js_module, js_method, dex_idx, method_idx, js_func_id, \
              native_func_id, native_class_descriptor, native_method_signature, \
              source_type, sink_type, severity, cwe) \
             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
        )?;
        for f in findings.iter().filter(|f| f.id.as_str() == "CROSS_LAYER_TAINT_FLOW") {
            // Parser-path read: Finding.extra is attacker-untrusted only
            // in the sense that the same byte channel carries other findings'
            // serde payloads; stitcher-emitted composites are guaranteed
            // round-trippable by the debug-time gauge in `composite_to_finding`.
            // Silently skipping malformed entries matches the hardening
            // discipline: survival over panic on the audit-DB write path.
            let Some(extra) = f.extra.as_deref() else {
                continue;
            };
            let Ok(c) = serde_json::from_str::<CrossLayerTaintFinding<MethodIdx>>(extra) else {
                continue;
            };
            // Variant-tag string for source/sink columns — matches the
            // `{:?}.split('{').next()` shape used by `composite_to_finding`
            // when building Finding.detail, so FTS searches on either
            // detail or the column return the same hits.
            let source_type = format!("{:?}", c.js_source);
            let source_type = source_type.split('{').next().unwrap_or("Unknown").trim();
            let sink_type = format!("{:?}", c.native_sink);
            let sink_type = sink_type.split('{').next().unwrap_or("Unknown").trim();
            // dex_idx is a usize from the multi-DEX bundle index; saturating
            // to i64::MAX is safe-by-construction since realistic apks carry
            // a handful of DEX entries (Android packaging caps at 100).
            let dex_idx_i64 = i64::try_from(c.bridge.dex_idx).unwrap_or(i64::MAX);
            stmt.execute(rusqlite::params![
                c.bridge.js_module.as_str(),
                c.bridge.js_method.as_str(),
                dex_idx_i64,
                i64::from(c.bridge.method_idx.0),
                c.js_func_id,
                c.native_func_id,
                c.native_class_descriptor.as_deref(),
                c.native_method_signature.as_deref(),
                source_type,
                sink_type,
                format!("{:?}", c.severity),
                c.cwe,
            ])?;
            count = count.saturating_add(1);
        }
    }
    tx.commit()?;
    if count > 0 {
        db.execute(
            "INSERT INTO cross_layer_taint_flows_fts(cross_layer_taint_flows_fts) VALUES('rebuild')",
            [],
        )?;
    }
    Ok(count)
}

/// Parse the source/sink strings out of a taint finding's detail field.
/// Handles all three detail-string shapes emitted by the factory functions:
/// - "intra-method taint: SourceType → SinkType (func #0x1234; ...)"
/// - "bridge taint: ReactBridgeParam[method] → SinkType (func #0x1234)"
/// - "hbc taint: JsParam → SinkType (func #0x1234)"
///
/// Note: func_id is NOT re-parsed from the `(func #...)` substring here
/// — that round-trip is lossy on the `;` suffix and would silently drop
/// to 0. The typed `Finding.func_id` field carries the value; this
/// helper is source/sink-only.
pub(super) fn parse_taint_source_sink(detail: &str) -> (String, String) {
    let inner = detail
        .strip_prefix("intra-method taint: ")
        .or_else(|| detail.strip_prefix("bridge taint: "))
        .or_else(|| detail.strip_prefix("hbc taint: "))
        .unwrap_or(detail);
    let (flow_part, _) = inner.split_once(" (func").unwrap_or((inner, ""));
    let (source, sink) = flow_part.split_once("").unwrap_or((flow_part, "Unknown"));
    (source.trim().to_string(), sink.trim().to_string())
}
#[cfg(test)]
mod trufflehog_fp_tests {
    use super::{square_key_regex_set, trufflehog_is_known_fp};

    #[test]
    fn square_real_production_access_token_admitted() {
        // Real Square production access token shape: `sq0atp-` + 43
        // chars from `[A-Za-z0-9_-]`. Must NOT be flagged as an FP.
        let set = square_key_regex_set().expect("regex set");
        let real = "sq0atp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-aBcDe";
        assert_eq!(real.len(), 7 + 43, "test fixture length sanity");
        assert!(
            !trufflehog_is_known_fp("Square", real, &set),
            "real production access token must be admitted"
        );
    }

    #[test]
    fn square_real_production_refresh_token_admitted() {
        let set = square_key_regex_set().expect("regex set");
        let real = "sq0rtp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-aBcDe";
        assert_eq!(real.len(), 7 + 43);
        assert!(!trufflehog_is_known_fp("Square", real, &set));
    }

    #[test]
    fn square_real_sandbox_access_token_admitted() {
        // Sandbox shape: `sandbox-sq0[a-z]tb-` + 22 chars.
        let set = square_key_regex_set().expect("regex set");
        let real = "sandbox-sq0atb-AbCdEfGhIjKlMnOpQrStUv";
        // 8 + 4 + 3 + 22 = 37 chars total ("sandbox-" = 8, "sq0" = 3, "atb-" = 4, body = 22).
        assert_eq!(real.len(), 8 + 3 + 4 + 22);
        assert!(!trufflehog_is_known_fp("Square", real, &set));
    }

    #[test]
    fn square_real_application_id_admitted() {
        // Application ID shape: `sq0idp-` + 22 chars.
        let set = square_key_regex_set().expect("regex set");
        let real = "sq0idp-AbCdEfGhIjKlMnOpQrStUv";
        assert_eq!(real.len(), 7 + 22);
        assert!(!trufflehog_is_known_fp("Square", real, &set));
    }

    #[test]
    fn square_substring_in_class_metadata_rejected() {
        // The motivating FP: `sq0` substring inside a longer
        // base64-shaped string (no `sq0[a-z]tp-` prefix anchor).
        // Trufflehog's upstream regex would match this; our anchored
        // filter drops it.
        let set = square_key_regex_set().expect("regex set");
        let fp = "sq0abcXYZdefGHI012345jkl_-mnopqr"; // no `-` after `sq0[a-z]tp/idp`
        assert!(
            trufflehog_is_known_fp("Square", fp, &set),
            "substring-shaped FP must be suppressed"
        );
    }

    #[test]
    fn square_wrong_length_rejected() {
        // Right prefix, wrong body length → not a real Square key.
        let set = square_key_regex_set().expect("regex set");
        // sq0atp- needs exactly 43 chars after the dash; this has 10.
        let too_short = "sq0atp-AbCdEfGhIj";
        assert!(trufflehog_is_known_fp("Square", too_short, &set));
        // sq0idp- needs exactly 22 chars; this has 30.
        let too_long = "sq0idp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789";
        assert!(trufflehog_is_known_fp("Square", too_long, &set));
    }

    #[test]
    fn square_anchored_no_substring_match() {
        // Anchored regex must reject content that contains a valid-looking
        // Square key with leading/trailing junk (not a full-string match).
        let set = square_key_regex_set().expect("regex set");
        let leading = "junk_sq0atp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-aBcDe";
        let trailing = "sq0atp-AbCdEfGhIjKlMnOpQrStUvWxYz0123456789_-aBcDe_junk";
        assert!(trufflehog_is_known_fp("Square", leading, &set));
        assert!(trufflehog_is_known_fp("Square", trailing, &set));
    }

    #[test]
    fn other_detectors_pass_through() {
        // Non-Square detectors are NOT subject to Square's anchored
        // filter; they pass through unchanged until siblings extend
        // the dispatch.
        let set = square_key_regex_set().expect("regex set");
        assert!(!trufflehog_is_known_fp("AWS", "AKIAIOSFODNN7EXAMPLE", &set));
        assert!(!trufflehog_is_known_fp("PrivateKey", "-----BEGIN", &set));
        assert!(!trufflehog_is_known_fp("Slack", "xoxb-1234", &set));
        // Unknown detector — fail-open per the function contract.
        assert!(!trufflehog_is_known_fp("UnknownDetector", "anything", &set));
    }
}

#[cfg(test)]
mod shadow_gate_tests {
    use super::export;
    use crate::analysis::dup_class_fixture;
    use crate::context::CrossLayerContext;

    /// The SQLite `classes` table is keyed by the enumerate position
    /// `j`, so a duplicate-`class_idx` pair gets two distinct keys and
    /// — without the shadow gate — BOTH rows persist via
    /// `INSERT OR REPLACE`. This is the highest-stakes site: a persisted
    /// shadow row amplifies class counts and introduces `class_idx`-join
    /// ambiguity for downstream consumers. The gate must skip the shadow
    /// row before its INSERT, leaving exactly one `classes` row.
    #[test]
    fn export_persists_only_canonical_class_row() {
        let dex = dup_class_fixture::for_export();
        let ctx = CrossLayerContext {
            path: "test://dup-class".to_string(),
            apk: None,
            hbc: None,
            hbc_parse_error: None,
            dex: vec![dex],
            dex_direct_bytes: None,
            loaded_split_names: Vec::new(),
            hermes_findings: Vec::new(),
            permissive_recovery: droidsaw_apk::PermissiveRecoveryOpts::default(),
        };

        let tmp = tempfile::NamedTempFile::new().expect("temp db");
        let db_path = tmp.path().to_str().expect("utf-8 path").to_string();
        export(&ctx, &db_path).expect("export succeeds");

        let conn = rusqlite::Connection::open(&db_path).expect("open db");
        let class_count: i64 = conn
            .query_row(
                "SELECT COUNT(*) FROM classes WHERE layer = 'dex1'",
                [],
                |row| row.get(0),
            )
            .expect("count classes");
        assert_eq!(
            class_count, 1,
            "only the canonical (first-wins) class row may persist; \
             the shadow duplicate-class_idx row must be gated out"
        );
    }
}