nyx-scanner 0.5.0

A multi-language static analysis tool for detecting security vulnerabilities
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
use super::conditions::unwrap_parens;
use super::{
    anon_fn_name, collect_idents, collect_idents_with_paths, find_constructor_type_child,
    first_call_ident, root_receiver_text, text_of,
};
use crate::labels::{Cap, Kind, lookup};
use tree_sitter::Node;

/// Find the inner CallFn/CallMethod/CallMacro node within an AST node.
/// For direct call nodes, returns the node itself. For wrappers, searches
/// up to two levels of children.
pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
    match lookup(lang, n.kind()) {
        Kind::CallFn | Kind::CallMethod | Kind::CallMacro => Some(n),
        _ => {
            let mut cursor = n.walk();
            for c in n.children(&mut cursor) {
                match lookup(lang, c.kind()) {
                    Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(c),
                    _ => {}
                }
            }
            // Recurse one more level (handles `expression_statement > variable_declarator > call`)
            let mut cursor2 = n.walk();
            for c in n.children(&mut cursor2) {
                let mut cursor3 = c.walk();
                for gc in c.children(&mut cursor3) {
                    if matches!(
                        lookup(lang, gc.kind()),
                        Kind::CallFn | Kind::CallMethod | Kind::CallMacro
                    ) {
                        return Some(gc);
                    }
                }
            }
            None
        }
    }
}

/// Extract identifiers from specified fields of an object-literal argument.
///
/// Returns:
/// * `Some(names)` if the positional argument at `index` IS an object literal
///   (JS `object`, TS `object`, Python `dictionary`). `names` contains
///   identifiers lifted from pair values whose key matches any entry in
///   `fields` (case-sensitive; JS/TS identifiers). When no destination-field
///   pairs are present, returns `Some(vec![])` — the sink is effectively
///   silenced because no destination identifier exists.
/// * `None` if the arg is absent, is not an object literal (plain string
///   / ident / expression), or has splat/spread children that break static
///   per-field reasoning. Callers fall back to the whole-arg positional
///   filter in this case.
pub(super) fn extract_destination_field_idents(
    call_node: Node,
    arg_index: usize,
    fields: &[&str],
    code: &[u8],
) -> Option<Vec<String>> {
    if fields.is_empty() {
        return None;
    }
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    let arg = args.named_children(&mut cursor).nth(arg_index)?;

    // Only object / dict literal forms carry per-field destination semantics.
    // For anything else (identifier, member expression, string, call), return
    // None so the caller treats the whole arg as destination.
    if !matches!(arg.kind(), "object" | "dictionary") {
        return None;
    }

    let mut out: Vec<String> = Vec::new();
    let mut c = arg.walk();
    for child in arg.named_children(&mut c) {
        match child.kind() {
            // `spread_element` (JS/TS) / `dictionary_splat` (Python): we can't
            // statically attribute spread contents to specific fields, so
            // bail out — caller falls back to the whole-arg filter, matching
            // the conservative posture used by arg_uses for splats.
            "spread_element" | "dictionary_splat" => {
                return None;
            }
            // Shorthand property `{ url }` binds the `url` field to a binding
            // also named `url`. Treat as destination iff the name matches.
            "shorthand_property_identifier" | "shorthand_property_identifier_pattern" => {
                let Some(name) = text_of(child, code) else {
                    continue;
                };
                if fields.iter().any(|&f| f == name) && !out.contains(&name) {
                    out.push(name);
                }
            }
            "pair" => {
                let Some(key_node) = child.child_by_field_name("key") else {
                    continue;
                };
                let key_text = match key_node.kind() {
                    // Strip quotes from string-literal keys so `"url"` and `url`
                    // both match the configured field list.
                    "string" | "string_literal" => text_of(key_node, code).map(|raw| {
                        if raw.len() >= 2 {
                            raw[1..raw.len() - 1].to_string()
                        } else {
                            raw
                        }
                    }),
                    // Computed keys like `[someVar]` can't be statically
                    // resolved — skip (conservative: not a destination field).
                    "computed_property_name" => continue,
                    _ => text_of(key_node, code),
                };
                let Some(key) = key_text else {
                    continue;
                };
                if !fields.iter().any(|&f| f == key) {
                    continue;
                }
                let Some(val_node) = child.child_by_field_name("value") else {
                    continue;
                };
                let mut idents: Vec<String> = Vec::new();
                let mut paths: Vec<String> = Vec::new();
                collect_idents_with_paths(val_node, code, &mut idents, &mut paths);
                for name in paths.into_iter().chain(idents) {
                    if !out.contains(&name) {
                        out.push(name);
                    }
                }
            }
            _ => {}
        }
    }
    Some(out)
}

/// Extract the string-literal content at argument position `index` (0-based).
/// Returns `None` if the argument is not a string literal or the index is out of range.
pub(super) fn extract_const_string_arg(
    call_node: Node,
    index: usize,
    code: &[u8],
) -> Option<String> {
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    let arg = args.named_children(&mut cursor).nth(index)?;
    match arg.kind() {
        // `string` / `string_literal` cover JS/TS, Python, Java, PHP, C/C++, Ruby, Rust;
        // `interpreted_string_literal` / `raw_string_literal` cover Go's
        // tree-sitter grammar (double-quoted vs. backtick-quoted forms).
        "string" | "string_literal" | "interpreted_string_literal" | "raw_string_literal" => {
            let raw = text_of(arg, code)?;
            if raw.len() >= 2 {
                Some(raw[1..raw.len() - 1].to_string())
            } else {
                None
            }
        }
        "template_string" => {
            // Only treat as constant if no interpolation (no template_substitution children)
            let mut c = arg.walk();
            if arg
                .named_children(&mut c)
                .any(|ch| ch.kind() == "template_substitution")
            {
                return None; // dynamic
            }
            let raw = text_of(arg, code)?;
            if raw.len() >= 2 {
                Some(raw[1..raw.len() - 1].to_string())
            } else {
                None
            }
        }
        _ => None,
    }
}

/// Extract the value of a keyword argument from a call node (e.g. Python `shell=True`).
/// Walks argument children looking for `keyword_argument` nodes, matches the keyword
/// name, and extracts the value node text for literals.
pub(super) fn extract_const_keyword_arg(
    call_node: Node,
    keyword_name: &str,
    code: &[u8],
) -> Option<String> {
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    for child in args.named_children(&mut cursor) {
        if child.kind() == "keyword_argument" || child.kind() == "named_argument" {
            // keyword_argument has a "name" field and a "value" field in Python tree-sitter
            let Some(name_node) = child.child_by_field_name("name") else {
                continue;
            };
            let Some(name_text) = text_of(name_node, code) else {
                continue;
            };
            if name_text != keyword_name {
                continue;
            }
            let value_node = child.child_by_field_name("value")?;
            // Only return a literal — identifiers / calls / complex exprs are
            // "dynamic" and must be reported as `None` so the gate can
            // distinguish literal-safe from dynamic.
            return match value_node.kind() {
                "true" | "false" | "none" | "integer" | "float" | "string" | "string_literal"
                | "identifier" => text_of(value_node, code).map(|s| s.to_string()),
                _ => None,
            }
            .filter(|_| {
                // identifiers are only "literal" when they're the Python
                // booleans True/False/None (tree-sitter-python classifies
                // these as identifiers in older grammar versions).
                match value_node.kind() {
                    "identifier" => text_of(value_node, code)
                        .as_deref()
                        .is_some_and(|s| matches!(s, "True" | "False" | "None")),
                    _ => true,
                }
            });
        }
    }
    None
}

/// Return `true` if the call node has a keyword/named argument whose name
/// matches `keyword_name` (regardless of whether the value is a literal).
/// Used by gated-sink classification to distinguish an absent kwarg (language
/// default) from a present-but-dynamic kwarg (conservative).
pub(super) fn has_keyword_arg(call_node: Node, keyword_name: &str, code: &[u8]) -> bool {
    let Some(args) = call_node.child_by_field_name("arguments") else {
        return false;
    };
    let mut cursor = args.walk();
    for child in args.named_children(&mut cursor) {
        if child.kind() != "keyword_argument" && child.kind() != "named_argument" {
            continue;
        }
        let Some(name_node) = child.child_by_field_name("name") else {
            continue;
        };
        if text_of(name_node, code).as_deref() == Some(keyword_name) {
            return true;
        }
    }
    false
}

/// Inspect the first positional argument of a call node and return its
/// tree-sitter `kind()` plus a flag indicating whether any descendant is an
/// `interpolation` node.  Skips parenthesisation (`(arg0)` is treated as
/// `arg0`).  Returns `None` when the call has no arguments.
///
/// Used by per-language shape-aware sink suppression — for example, Ruby
/// ActiveRecord query methods (`where`, `order`, `pluck`, …) are intrinsically
/// parameterised when arg 0 is a hash/symbol/array/non-interpolated string,
/// regardless of taint reaching that argument.
pub(super) fn arg0_kind_and_interpolation(call_node: Node) -> Option<(String, bool)> {
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    let arg0 = args.named_children(&mut cursor).next()?;
    let arg0 = unwrap_parens(arg0);
    let kind = arg0.kind().to_string();
    let has_interp = subtree_has_interpolation(arg0);
    Some((kind, has_interp))
}

/// Walk a Java method-chain receiver looking for an inner `method_invocation`
/// whose method name matches one of `target_methods` (e.g. `createQuery`,
/// `prepareStatement`).  Returns the kind of that inner call's arg 0 — used
/// to verify the SQL-bearing call up-chain was given a string literal rather
/// than a concatenation / method call.
///
/// Conservative: returns `None` when no matching call is found in the chain.
/// Stops drilling into args of an unrelated call, so the chain walk is
/// strictly down the receiver spine.
pub(super) fn java_chain_arg0_kind_for_method(
    expr: Node,
    target_methods: &[&str],
    code: &[u8],
) -> Option<String> {
    let n = unwrap_parens(expr);
    if n.kind() == "method_invocation"
        && let Some(name_node) = n.child_by_field_name("name")
        && let Some(name) = text_of(name_node, code)
        && target_methods.iter().any(|m| *m == name)
    {
        let args = n.child_by_field_name("arguments")?;
        let mut cursor = args.walk();
        let arg0 = args.named_children(&mut cursor).next()?;
        let arg0 = unwrap_parens(arg0);
        return Some(arg0.kind().to_string());
    }
    // Drill down the receiver spine.  Java grammar uses `object` for the
    // receiver of a `method_invocation`.
    if n.kind() == "method_invocation"
        && let Some(recv) = n.child_by_field_name("object")
        && let Some(found) = java_chain_arg0_kind_for_method(recv, target_methods, code)
    {
        return Some(found);
    }
    None
}

/// Walk a Ruby method-chain receiver-side looking for the inner call whose
/// method identifier matches one of `target_methods`, then return that
/// inner call's [`arg0_kind_and_interpolation`].  Used when the CFG node
/// represents a chained expression like `Model.where(...).preload(...).to_a`
/// — the outermost call (`to_a`) has no arguments, so the shape suppressor
/// must reach down the chain to inspect `where`'s arg 0.
///
/// Conservative: returns `None` if the chain doesn't contain a matching
/// method, so callers fall through to the no-suppression path.
pub(super) fn ruby_chain_arg0_for_method(
    expr: Node,
    target_methods: &[&str],
    code: &[u8],
) -> Option<(String, bool)> {
    let n = unwrap_parens(expr);
    if n.kind() == "call"
        && let Some(method) = n.child_by_field_name("method")
        && let Some(name) = text_of(method, code)
        && target_methods.iter().any(|m| *m == name)
    {
        return arg0_kind_and_interpolation(n);
    }
    // Recurse into the receiver chain (`call.receiver` → next call up).
    if n.kind() == "call"
        && let Some(recv) = n
            .child_by_field_name("receiver")
            .or_else(|| n.child_by_field_name("object"))
        && let Some(found) = ruby_chain_arg0_for_method(recv, target_methods, code)
    {
        return Some(found);
    }
    // Also descend into named children to handle wrapping (assignment RHS,
    // begin-end blocks, parenthesised expressions, etc.).
    let mut cursor = n.walk();
    for c in n.named_children(&mut cursor) {
        if let Some(found) = ruby_chain_arg0_for_method(c, target_methods, code) {
            return Some(found);
        }
    }
    None
}

fn subtree_has_interpolation(n: Node) -> bool {
    if n.kind() == "interpolation" || n.kind() == "string_interpolation" {
        return true;
    }
    let mut cursor = n.walk();
    n.named_children(&mut cursor).any(subtree_has_interpolation)
}

/// For a chained method call (`a.b().c().d()`), walk down the receiver
/// chain (`function.object`) and return the innermost call_expression
/// alongside its callee text (e.g. `"http.get"`).
///
/// Returns `None` when:
/// * `outer` is not itself a CallFn / CallMethod node, or
/// * its `function`/`method` field is not a member-style expression whose
///   `object` field is itself a call (i.e. there is no chained receiver).
///
/// Motivated by CVE-2025-64430 (Parse Server SSRF via
/// `http.get(uri, cb).on('error', e => ...)`).  Without this, the outer
/// `.on(...)` call swallows classification of the inner gated sink.
pub(super) fn find_chained_inner_call<'a>(
    outer: Node<'a>,
    lang: &str,
    code: &[u8],
) -> Option<(Node<'a>, String)> {
    if !matches!(lookup(lang, outer.kind()), Kind::CallFn | Kind::CallMethod) {
        return None;
    }
    let function = outer
        .child_by_field_name("function")
        .or_else(|| outer.child_by_field_name("method"))?;
    // The function/method field for a chained call is a member_expression
    // (JS/TS) or attribute (Python) etc.; its `object` field is the
    // receiver expression.  Only proceed when that receiver is itself a
    // call.
    let object = function.child_by_field_name("object")?;
    if !matches!(lookup(lang, object.kind()), Kind::CallFn | Kind::CallMethod) {
        return None;
    }
    // Recurse: the inner call may itself be chained
    // (`axios.get(u).then(h).catch(h)` — innermost is `axios.get`).
    if let Some(inner) = find_chained_inner_call(object, lang, code) {
        return Some(inner);
    }
    // `object` is the innermost call_expression in the chain.  Extract
    // its callee identifier the same way `first_call_ident_with_span`
    // does for a CallFn (member_expression text → "http.get").
    let inner_func = object
        .child_by_field_name("function")
        .or_else(|| object.child_by_field_name("method"))
        .or_else(|| object.child_by_field_name("name"))?;
    // Multi-line dotted member expressions (`http\n  .get`) include
    // formatting whitespace in the source-text slice. The labels map
    // keys are literal `"http.get"` etc. — strip whitespace so the
    // chained-call inner-gate rebinding fires for both single-line and
    // multi-line chain styles. Also strips `\r` for CRLF sources.
    // Motivated by upstream Parse Server CVE-2025-64430 which uses the
    // multi-line `http\n  .get(uri, ...)\n  .on(...)` form.
    let raw = text_of(inner_func, code)?;
    let inner_text: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
    Some((object, inner_text))
}

/// Recursively walk the receiver chain of `outer` (a CallFn / CallMethod
/// node) and yield each *named argument* of every inner call along the
/// way.  Outer's own arguments are NOT included — the caller already
/// handles those via the standard `pre_emit_arg_source_nodes` pass over
/// `outer.arguments`.
///
/// For `json.NewDecoder(r.Body).Decode(emoji)`:
///   outer  = `.Decode(emoji)`           — caller iterates `emoji`
///   inner  = `json.NewDecoder(r.Body)`  — yielded arg: `r.Body`
///
/// We only pull from each inner call's `arguments` field, never from its
/// `function`/`method`/receiver expressions.  That distinction matters
/// because chained source-receivers like `r.URL.Query()` expose a
/// member-text path that classifies as a Source — but it's the OUTER
/// chain text (`r.URL.Query.Get`) that already classifies, so emitting
/// a synth source for the inner-call's own callee would double-count.
///
/// Used by Go (where chain shapes like `json.NewDecoder(r.Body).Decode`
/// hide source-labeled args inside parens between dots, leaving the
/// outer callee text un-classifiable).  The helper itself is
/// language-neutral, but callers should gate per-language until each
/// language's regression coverage catches up.
pub(super) fn walk_chain_inner_call_args<'a>(outer: Node<'a>, lang: &str, out: &mut Vec<Node<'a>>) {
    if !matches!(lookup(lang, outer.kind()), Kind::CallFn | Kind::CallMethod) {
        return;
    }
    let function = outer
        .child_by_field_name("function")
        .or_else(|| outer.child_by_field_name("method"));
    let Some(function) = function else { return };
    let object = function
        .child_by_field_name("object")
        .or_else(|| function.child_by_field_name("operand"))
        .or_else(|| function.child_by_field_name("value"));
    let Some(inner) = object else { return };
    if !matches!(lookup(lang, inner.kind()), Kind::CallFn | Kind::CallMethod) {
        return;
    }
    if let Some(args) = inner.child_by_field_name("arguments") {
        let mut cursor = args.walk();
        for arg in args.named_children(&mut cursor) {
            out.push(arg);
        }
    }
    walk_chain_inner_call_args(inner, lang, out);
}

/// Recursively find a call-expression node within an AST subtree (up to
/// 4 levels deep).  Unlike `find_call_node` which only checks 2 levels,
/// this handles `await`-wrapped calls inside declarations.
pub(super) fn find_call_node_deep<'a>(n: Node<'a>, lang: &str, depth: u8) -> Option<Node<'a>> {
    if depth == 0 {
        return None;
    }
    match lookup(lang, n.kind()) {
        Kind::CallFn | Kind::CallMethod | Kind::CallMacro => Some(n),
        _ => {
            let mut cursor = n.walk();
            for c in n.children(&mut cursor) {
                if let Some(found) = find_call_node_deep(c, lang, depth - 1) {
                    return Some(found);
                }
            }
            None
        }
    }
}

/// Detect whether a call node is a parameterized SQL query.
///
/// Returns `true` when:
/// 1. The first argument (arg 0) is a string literal (including template
///    strings without interpolation) containing SQL placeholder patterns:
///    `$1`..`$N`, `?`, `%s`, or `:identifier`.
/// 2. The call has at least 2 arguments (the second being the params
///    array/tuple).
///
/// This is intentionally conservative: if arg 0 is dynamic (variable,
/// concatenation, template with interpolation), returns `false`.
pub(super) fn is_parameterized_query_call(call_node: Node, code: &[u8]) -> bool {
    let Some(args) = call_node.child_by_field_name("arguments") else {
        return false;
    };
    let mut cursor = args.walk();
    let named: Vec<_> = args.named_children(&mut cursor).collect();
    // Need at least 2 arguments: query string + params
    if named.len() < 2 {
        return false;
    }
    let first_arg = named[0];
    // Extract the raw text of arg 0 — must be a string literal or
    // template string without interpolation.
    let query_text = match first_arg.kind() {
        "string" | "string_literal" | "interpreted_string_literal" | "raw_string_literal" => {
            text_of(first_arg, code)
        }
        "template_string" => {
            // Only constant templates (no interpolation)
            let mut c = first_arg.walk();
            if first_arg
                .named_children(&mut c)
                .any(|ch| ch.kind() == "template_substitution")
            {
                return false; // dynamic — not safe
            }
            text_of(first_arg, code)
        }
        // Python concatenated strings: "SELECT" "..." are implicit concat
        "concatenated_string" => {
            // If it's a concatenated_string, get the full text
            text_of(first_arg, code)
        }
        _ => return false, // not a literal
    };
    let Some(qt) = query_text else {
        return false;
    };
    has_sql_placeholders(&qt)
}

/// Check whether a string contains SQL parameterized-query placeholders.
///
/// Recognised patterns:
/// - `$1`, `$2`, …, `$N` (PostgreSQL positional)
/// - `?` (MySQL / SQLite positional)
/// - `%s` (Python DB-API / psycopg2)
/// - `:identifier` (Oracle / named parameters) — requires the colon to be
///   preceded by a space or `=` (to avoid matching JS ternary / object
///   literals).
pub(super) fn has_sql_placeholders(s: &str) -> bool {
    let bytes = s.as_bytes();
    let len = bytes.len();
    let mut i = 0;
    while i < len {
        match bytes[i] {
            b'$' if i + 1 < len && bytes[i + 1].is_ascii_digit() && bytes[i + 1] != b'0' => {
                // $N where N is 1..9 (at minimum)
                return true;
            }
            b'?' => return true,
            b'%' if i + 1 < len && bytes[i + 1] == b's' => {
                return true;
            }
            b':' if i > 0
                && (bytes[i - 1] == b' '
                    || bytes[i - 1] == b'='
                    || bytes[i - 1] == b'('
                    || bytes[i - 1] == b',')
                && i + 1 < len
                && bytes[i + 1].is_ascii_alphabetic() =>
            {
                // :identifier — must be preceded by whitespace/= to avoid
                // false positives on object literals or ternary operators.
                return true;
            }
            _ => {}
        }
        i += 1;
    }
    false
}

/// Returns true when a tree-sitter node is a syntactic literal value.
///
/// Intentionally conservative: if in doubt, returns false. It is better
/// to miss a suppression opportunity than to suppress a real tainted flow.
///
/// NOTE: Literal-kind classification also exists in `ast.rs::is_literal_node`.
/// The two must stay aligned across languages. TODO: consider extracting a
/// shared literal-kind helper if a third call site appears.
#[allow(clippy::only_used_in_recursion)]
pub(super) fn is_syntactic_literal(node: Node, code: &[u8]) -> bool {
    match node.kind() {
        // Scalar strings — but reject if they contain interpolation
        // (e.g. Ruby `"hello #{name}"`, Python f-strings).
        "string"
        | "string_literal"
        | "interpreted_string_literal"
        | "raw_string_literal"
        | "string_content"
        | "string_fragment" => !has_string_interpolation(node),

        // Numbers
        "integer" | "integer_literal" | "int_literal" | "float" | "float_literal" | "number" => {
            true
        }

        // Booleans / null / nil / none
        "true" | "false" | "null" | "nil" | "none" | "null_literal" | "boolean"
        | "boolean_literal" => true,

        // PHP encapsed_string: safe only if no variable interpolation
        "encapsed_string" => !has_interpolation_cfg(node),

        // Wrapper: PHP/Go wrap each arg in an `argument` node — unwrap
        "argument" => {
            node.named_child_count() == 1
                && node
                    .named_child(0)
                    .is_some_and(|c| is_syntactic_literal(c, code))
        }

        // Unary minus on a number literal: `-42`
        "unary_expression" | "unary_op" => {
            node.named_child_count() == 1
                && node
                    .named_child(0)
                    .is_some_and(|c| is_syntactic_literal(c, code))
        }

        // String concatenation of literals: `"a" + "b"` or `"a" . "b"`
        "binary_expression" | "concatenated_string" => {
            let count = node.named_child_count();
            count >= 2
                && (0..count).all(|i| {
                    node.named_child(i as u32)
                        .is_some_and(|c| is_syntactic_literal(c, code))
                })
        }

        // JS/TS template string: only if no interpolation substitution
        "template_string" => {
            let mut c = node.walk();
            !node
                .named_children(&mut c)
                .any(|ch| ch.kind() == "template_substitution")
        }

        // Containers: all elements must be syntactic literals
        "list"
        | "array"
        | "array_expression"
        | "array_creation_expression"
        | "tuple"
        | "tuple_expression" => {
            let mut c = node.walk();
            node.named_children(&mut c)
                .all(|ch| is_syntactic_literal(ch, code))
        }

        // Container entries: `{"key": "value"}` style pairs
        "pair" => {
            let mut c = node.walk();
            node.named_children(&mut c)
                .all(|ch| is_syntactic_literal(ch, code))
        }

        _ => false,
    }
}

/// Check if a string node contains interpolation children
/// (e.g. Ruby `"hello #{name}"` has `interpolation` children,
/// Python f-strings may have `interpolation` children).
pub(super) fn has_string_interpolation(node: Node) -> bool {
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        if child.kind().contains("interpolation") {
            return true;
        }
    }
    false
}

/// Check if an encapsed_string node contains interpolation (PHP).
pub(super) fn has_interpolation_cfg(node: Node) -> bool {
    for i in 0..node.child_count() as u32 {
        if let Some(child) = node.child(i) {
            let kind = child.kind();
            if kind == "variable_name"
                || kind == "simple_variable"
                || kind.contains("interpolation")
            {
                return true;
            }
        }
    }
    false
}

/// Extract the raw literal text from the RHS of a declaration/assignment AST node.
///
/// Walks the same value/right child paths as `def_use` and returns the text
/// if the RHS is a syntactic literal. Used to populate `NodeInfo::const_text`.
pub(super) fn extract_literal_rhs(ast: Node, lang: &str, code: &[u8]) -> Option<String> {
    use crate::labels::lookup;

    // Direct value/right field (Rust let, Go short_var, etc.)
    let val_node = ast
        .child_by_field_name("value")
        .or_else(|| ast.child_by_field_name("right"));

    if let Some(val) = val_node {
        if is_syntactic_literal(val, code) {
            return text_of(val, code);
        }
    }

    // Nested declarator pattern (JS let/const → variable_declarator, etc.)
    if matches!(
        lookup(lang, ast.kind()),
        Kind::CallWrapper | Kind::Assignment
    ) {
        let mut cursor = ast.walk();
        for child in ast.children(&mut cursor) {
            let child_val = child.child_by_field_name("value").or_else(|| {
                if matches!(lookup(lang, child.kind()), Kind::Assignment) {
                    child.child_by_field_name("right")
                } else {
                    None
                }
            });
            if let Some(val) = child_val {
                if is_syntactic_literal(val, code) {
                    return text_of(val, code);
                }
            }
        }
    }

    // Return statement with a literal argument (`return []`, `return {}`).
    // Lets SSA's const-return path ([`crate::ssa::lower`] line ~1066) emit
    // `SsaOp::Const(Some(text))` instead of `Const(None)` so downstream
    // container-literal detection (heap points-to, fresh-alloc summary)
    // can recognise the fresh allocation.
    if matches!(lookup(lang, ast.kind()), Kind::Return) {
        let mut cursor = ast.walk();
        for child in ast.named_children(&mut cursor) {
            if is_syntactic_literal(child, code) {
                return text_of(child, code);
            }
        }
    }

    None
}

/// Returns true when every argument in the call's argument list is a
/// syntactic literal (per `is_syntactic_literal`). Returns true for calls
/// with zero arguments (no argument-carried taint vector). Returns false
/// when the argument list cannot be found.
///
/// For method chains like `a("x").b(y).c()`, the outermost call node
/// represents the entire chain. This function walks nested call expressions
/// to verify ALL argument lists in the chain contain only literals.
pub(super) fn has_only_literal_args(call_node: Node, code: &[u8]) -> bool {
    let Some(args) = call_node.child_by_field_name("arguments") else {
        return false;
    };
    let mut cursor = args.walk();
    let mut any_arg = false;
    for ch in args.named_children(&mut cursor) {
        any_arg = true;
        if !is_syntactic_literal(ch, code) {
            return false;
        }
    }
    // Zero-arg calls are not "all literal" — taint can still flow via a
    // non-literal receiver (e.g. `tainted.readObject()`), and the sink-
    // suppression gate (`info.all_args_literal`) must not skip these.
    if !any_arg {
        return false;
    }
    // Walk nested call expressions in the callee chain.
    check_inner_call_args(call_node, code)
}

/// Recursively check nested call expressions in a method chain for
/// non-literal arguments.
pub(super) fn check_inner_call_args(node: Node, code: &[u8]) -> bool {
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        let kind = child.kind();
        // Skip argument lists — those are checked by the caller.
        if kind == "arguments" || kind == "argument_list" || kind == "actual_parameters" {
            continue;
        }
        // If this child is itself a call expression, check its arguments.
        if child.child_by_field_name("arguments").is_some() {
            if !has_only_literal_args(child, code) {
                return false;
            }
        } else {
            // Recurse through non-call structural nodes (field_expression, etc.)
            if !check_inner_call_args(child, code) {
                return false;
            }
        }
    }
    true
}

/// Extract per-argument identifiers from a call node's argument list.
/// Returns one `Vec<String>` per argument (in parameter-position order).
/// Returns empty if argument list can't be found or contains spread/keyword args.
pub(super) fn extract_arg_uses(call_node: Node, code: &[u8]) -> Vec<Vec<String>> {
    // Ruby `subshell` (backticks) has no `arguments` field — its children are
    // string fragments and `interpolation` nodes. Lift each interpolation's
    // identifiers into a positional arg so taint flows from `#{var}` into the
    // synthetic "subshell" sink.
    if call_node.kind() == "subshell" {
        let mut result = Vec::new();
        let mut cursor = call_node.walk();
        for child in call_node.named_children(&mut cursor) {
            if child.kind() == "interpolation" {
                let mut idents = Vec::new();
                let mut paths = Vec::new();
                collect_idents_with_paths(child, code, &mut idents, &mut paths);
                let mut combined = paths;
                combined.extend(idents);
                if !combined.is_empty() {
                    result.push(combined);
                }
            }
        }
        return result;
    }

    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut result = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        let kind = child.kind();
        // Named / keyword arguments are tracked separately in `CallMeta.kwargs`
        // and do not participate in positional indexing — skip them here so
        // `arg_uses` remains strictly positional.  Splats (spread/dict splat)
        // still invalidate positional mapping; bail out in that case.
        if kind == "spread_element"
            || kind == "dictionary_splat"
            || kind == "list_splat"
            || kind == "splat_argument"
            || kind == "hash_splat_argument"
        {
            return Vec::new();
        }
        if kind == "keyword_argument" || kind == "named_argument" {
            continue;
        }
        let mut idents = Vec::new();
        let mut paths = Vec::new();
        collect_idents_with_paths(child, code, &mut idents, &mut paths);
        // Dotted paths first, then individual idents as fallback
        let mut combined = paths;
        combined.extend(idents);
        result.push(combined);
    }
    result
}

/// Extract keyword / named argument bindings for a call node.
///
/// Returns `Vec<(name, uses)>` where `uses` are the identifier references
/// from the keyword's value expression, in the same shape used by
/// `arg_uses` entries.  Empty for calls with no named arguments, or for
/// languages whose grammar does not produce `keyword_argument` / `named_argument`
/// children (C, Java, Go, …).
pub(super) fn extract_kwargs(call_node: Node, code: &[u8]) -> Vec<(String, Vec<String>)> {
    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut out = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        let kind = child.kind();
        if kind != "keyword_argument" && kind != "named_argument" {
            continue;
        }
        // Python `keyword_argument` uses `name`/`value`; Ruby `named_argument`
        // uses `name`/`value` as well (with `:` syntax in source).  Fall back
        // to the first/last named children if fields are absent.
        let named_count = child.named_child_count();
        let name_node = child
            .child_by_field_name("name")
            .or_else(|| child.named_child(0));
        let value_node = child
            .child_by_field_name("value")
            .or_else(|| child.named_child(named_count.saturating_sub(1) as u32));
        let (Some(nn), Some(vn)) = (name_node, value_node) else {
            continue;
        };
        let Some(name) = text_of(nn, code) else {
            continue;
        };
        let mut idents = Vec::new();
        let mut paths = Vec::new();
        collect_idents_with_paths(vn, code, &mut idents, &mut paths);
        let mut combined = paths;
        combined.extend(idents);
        out.push((name, combined));
    }
    out
}

/// Caps that a search literal is known to strip, provided the replacement
/// itself does not reintroduce any dangerous sequence.
///
/// Policy is deliberately narrow and conservative: only literals that contain
/// *known-dangerous* payloads earn a strip credit, so an arbitrary
/// `.replace("foo", "bar")` is never promoted to a sanitizer.
///   * `..`, `/`, `\\`         → path-traversal     → `Cap::FILE_IO`
///   * `<`, `>`                → HTML metachars     → `Cap::HTML_ESCAPE`
///   * `;`, `|`, `&`, `$`, `\`` → shell metachars   → `Cap::SHELL_ESCAPE`
///   * `'`, `"`, `--`          → SQL metachars      → `Cap::SQL_QUERY`
pub(super) fn caps_stripped_by_literal_pattern(search: &str) -> Cap {
    let mut caps = Cap::empty();
    if search.contains("..") || search.contains('/') || search.contains('\\') {
        caps |= Cap::FILE_IO;
    }
    if search.contains('<') || search.contains('>') {
        caps |= Cap::HTML_ESCAPE;
    }
    if search.contains(';')
        || search.contains('|')
        || search.contains('&')
        || search.contains('$')
        || search.contains('`')
    {
        caps |= Cap::SHELL_ESCAPE;
    }
    if search.contains('\'') || search.contains('"') || search.contains("--") {
        caps |= Cap::SQL_QUERY;
    }
    caps
}

/// Maximum number of `.replace(LIT, LIT)` hops we'll walk on a single chain.
const MAX_REPLACE_CHAIN_HOPS: usize = 16;

/// Recognise a Rust `param.replace(LIT, LIT)[.replace(LIT, LIT)]*` chain whose
/// receiver bottoms out at a plain identifier, and infer which caps the chain
/// provably strips.
///
/// In tree-sitter-rust a method call is encoded as a `call_expression` whose
/// `function` field is a `field_expression` (`receiver.method`). Chained method
/// calls therefore nest `call_expression` nodes recursively through the
/// `field_expression.value` slot.  The detector walks that nest, requiring
/// every hop to be a pure literal-to-literal `replace` / `replacen` call and
/// the innermost receiver to be a bare identifier.  Returns the union of caps
/// stripped across the chain when at least one literal contains a recognised
/// dangerous pattern, or `None` when the pattern doesn't apply (so the caller
/// falls back to normal unresolved-call propagation).
pub(super) fn detect_rust_replace_chain_sanitizer(call_ast: Node, code: &[u8]) -> Option<Cap> {
    fn is_rust_str_literal(k: &str) -> bool {
        matches!(k, "string_literal" | "raw_string_literal")
    }

    fn extract_rust_str_content<'a>(n: Node<'a>, code: &'a [u8]) -> Option<String> {
        // A `string_literal` node in tree-sitter-rust has a `string_content`
        // child that holds the unquoted bytes.  Fall back to whole-node text
        // with outer-character trimming only as a last resort.
        let mut cur = n.walk();
        for c in n.named_children(&mut cur) {
            if c.kind() == "string_content" {
                return text_of(c, code);
            }
        }
        let raw = text_of(n, code)?;
        if raw.len() >= 2 {
            Some(
                raw.trim_start_matches('r')
                    .trim_start_matches('#')
                    .trim_end_matches('#')
                    .trim_matches('"')
                    .to_string(),
            )
        } else {
            None
        }
    }

    let mut current = call_ast;
    let mut earned = Cap::empty();

    for _ in 0..MAX_REPLACE_CHAIN_HOPS {
        if current.kind() != "call_expression" {
            // Chain base: must be a plain identifier (parameter / local) to
            // qualify.  A base that's another expression (field access,
            // nested non-method call, …) breaks the sanitizer invariant.
            if current.kind() == "identifier" && !earned.is_empty() {
                return Some(earned);
            }
            return None;
        }

        // Must be a method-style call: function is a field_expression whose
        // `field` names a `replace`-like method.
        let func = current.child_by_field_name("function")?;
        if func.kind() != "field_expression" {
            return None;
        }
        let method_ident = func.child_by_field_name("field")?;
        let method_name = text_of(method_ident, code)?;
        if method_name != "replace" && method_name != "replacen" {
            return None;
        }

        let args_node = current.child_by_field_name("arguments")?;
        let mut cursor = args_node.walk();
        let positional: Vec<Node<'_>> = args_node
            .named_children(&mut cursor)
            .filter(|c| {
                !matches!(
                    c.kind(),
                    "keyword_argument"
                        | "named_argument"
                        | "spread_element"
                        | "list_splat"
                        | "dictionary_splat"
                        | "splat_argument"
                        | "hash_splat_argument"
                )
            })
            .collect();
        let (arg0, arg1) = match positional.as_slice() {
            [a, b, ..] => (*a, *b),
            _ => return None,
        };
        if !is_rust_str_literal(arg0.kind()) || !is_rust_str_literal(arg1.kind()) {
            return None;
        }
        let search = extract_rust_str_content(arg0, code)?;
        let replacement = extract_rust_str_content(arg1, code)?;

        // If the replacement itself contains a dangerous sequence, this hop
        // can reintroduce the pattern that a later hop tries to strip.  Be
        // conservative: abandon all credit.
        if !caps_stripped_by_literal_pattern(&replacement).is_empty() {
            return None;
        }
        earned |= caps_stripped_by_literal_pattern(&search);

        // Walk to receiver via field_expression.value.
        current = func.child_by_field_name("value")?;
    }

    None
}

/// Recognise a Go `strings.Replace(s, OLD, NEW, n)` /
/// `strings.ReplaceAll(s, OLD, NEW)` call that provably strips one of the
/// known-dangerous metacharacter classes from its first argument.
///
/// Returns the union of caps stripped, or `None` when the pattern doesn't
/// apply (so the caller falls back to normal unresolved-call propagation).
///
/// Mirrors [`detect_rust_replace_chain_sanitizer`] but for the single-call
/// (non-method-chain) Go shape.  The caller wires the resulting cap into
/// the call's [`crate::labels::DataLabel::Sanitizer`] label, which the
/// taint engine consumes via the standard sanitizer pathway — taint flows
/// in on `s`, the matching cap is stripped from the result.
pub(super) fn detect_go_replace_call_sanitizer(call_ast: Node, code: &[u8]) -> Option<Cap> {
    if call_ast.kind() != "call_expression" {
        return None;
    }
    // The call's `function` field is a `selector_expression` — `operand`
    // is the package ident (`strings`), `field` is the method ident.
    let func = call_ast.child_by_field_name("function")?;
    if func.kind() != "selector_expression" {
        return None;
    }
    let operand = func.child_by_field_name("operand")?;
    if text_of(operand, code).as_deref() != Some("strings") {
        return None;
    }
    let field = func.child_by_field_name("field")?;
    let method_name = text_of(field, code)?;
    if method_name != "Replace" && method_name != "ReplaceAll" {
        return None;
    }
    // Args layout: (s, old, new[, n]).  Need positional args 1 (old) and
    // 2 (new) to be string literals.
    let old_lit = extract_const_string_arg(call_ast, 1, code)?;
    let new_lit = extract_const_string_arg(call_ast, 2, code)?;

    // If the replacement itself reintroduces a dangerous sequence, don't
    // credit the strip — matches the Rust chain detector's policy.
    if !caps_stripped_by_literal_pattern(&new_lit).is_empty() {
        return None;
    }
    let caps = caps_stripped_by_literal_pattern(&old_lit);
    if caps.is_empty() { None } else { Some(caps) }
}

/// Like `first_call_ident`, but also checks if `n` itself is a call node.
/// `first_call_ident` only searches children, so when `n` IS the call
/// expression (e.g. the argument `sanitize(cmd)`), this function catches it.
pub(super) fn call_ident_of<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<String> {
    // C++ new/delete: normalize callee before field extraction.
    if lang == "cpp" && n.kind() == "new_expression" {
        return Some("new".to_string());
    }
    if lang == "cpp" && n.kind() == "delete_expression" {
        return Some("delete".to_string());
    }
    match lookup(lang, n.kind()) {
        Kind::Function => {
            // Function/closure expression passed as argument — return the same
            // synthetic anon name used by build_sub so callback_bindings and
            // source_to_callback can match it to the extracted BodyCfg.
            n.child_by_field_name("name")
                .and_then(|nm| text_of(nm, code))
                .or_else(|| Some(anon_fn_name(n.start_byte())))
        }
        Kind::CallFn => n
            .child_by_field_name("function")
            .or_else(|| n.child_by_field_name("method"))
            .or_else(|| n.child_by_field_name("name"))
            .or_else(|| n.child_by_field_name("type"))
            .or_else(|| find_constructor_type_child(n))
            .and_then(|f| {
                let unwrapped = unwrap_parens(f);
                if lookup(lang, unwrapped.kind()) == Kind::Function {
                    Some(anon_fn_name(unwrapped.start_byte()))
                } else {
                    text_of(f, code)
                }
            }),
        Kind::CallMethod => {
            let func = n
                .child_by_field_name("method")
                .or_else(|| n.child_by_field_name("name"))
                .and_then(|f| text_of(f, code));
            let recv = n
                .child_by_field_name("object")
                .or_else(|| n.child_by_field_name("receiver"))
                .or_else(|| n.child_by_field_name("scope"))
                .and_then(|f| root_receiver_text(f, lang, code));
            match (recv, func) {
                (Some(r), Some(f)) => Some(format!("{r}.{f}")),
                (_, Some(f)) => Some(f),
                _ => None,
            }
        }
        Kind::CallMacro => n
            .child_by_field_name("macro")
            .and_then(|f| text_of(f, code)),
        _ => first_call_ident(n, lang, code),
    }
}

/// For each argument of `call_node`, return `Some(s)` when the argument is a
/// syntactic string literal (unquoted contents) and `None` otherwise.  The
/// returned vector is parallel to [`extract_arg_uses`] / [`extract_arg_callees`].
///
/// Bails on splats so that a variadic call (`f(*args)`, `f(...xs)`) produces
/// an empty vector — positional indices past the splat are meaningless and
/// downstream passes already treat an empty vector as "no info".
pub(super) fn extract_arg_string_literals(call_node: Node, code: &[u8]) -> Vec<Option<String>> {
    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut result = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        let kind = child.kind();
        // Splat → positional indexing breaks; bail.
        if kind == "spread_element"
            || kind == "dictionary_splat"
            || kind == "list_splat"
            || kind == "splat_argument"
            || kind == "hash_splat_argument"
        {
            return Vec::new();
        }
        // Named / keyword arguments are tracked separately in `kwargs` and
        // don't participate in positional indexing — skip them here so this
        // vector stays aligned with `arg_uses`.
        if kind == "keyword_argument" || kind == "named_argument" {
            continue;
        }
        // PHP wraps each call argument in an `argument` node whose first
        // named child is the actual expression.  Unwrap one level so the
        // string-literal arm below sees the literal directly rather than
        // the wrapper kind, otherwise PHP `f("https://…")` records
        // `None` for arg 0 and downstream prefix-aware suppressions miss.
        let target = if kind == "argument" {
            child.named_child(0).unwrap_or(child)
        } else {
            child
        };
        let target_kind = target.kind();
        let literal = match target_kind {
            "string"
            | "string_literal"
            | "interpreted_string_literal"
            | "raw_string_literal"
            // PHP's double-quoted form (single-quoted maps to `string`).
            // Only safe to lift when there is no `encapsed_string` /
            // `embedded_expression` interpolation child — checked below.
            | "encapsed_string" => {
                let raw = text_of(target, code);
                raw.and_then(|s| strip_literal_quotes(&s, target, code))
            }
            _ => None,
        };
        result.push(literal);
    }
    result
}

/// Strip surrounding quotes from a syntactic string literal, resolving the
/// `string_content` child for Rust-style two-level string nodes.  Returns the
/// raw inner text (no escape-sequence processing) — sufficient for whitelist
/// matching against shell-metachar sets.
pub(super) fn strip_literal_quotes(raw: &str, node: Node, code: &[u8]) -> Option<String> {
    // Rust/tree-sitter-rust: `string_literal` wraps a `string_content` child.
    // Prefer the content text so the caller doesn't have to deal with quote
    // pairing for raw strings (`r"..."`, `r#"..."#`, etc.).
    let mut cursor = node.walk();
    for child in node.named_children(&mut cursor) {
        if child.kind() == "string_content" {
            return text_of(child, code).map(|s| s.to_string());
        }
    }
    if raw.len() >= 2 {
        let bytes = raw.as_bytes();
        let first = bytes[0];
        let last = bytes[raw.len() - 1];
        if (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') {
            return Some(raw[1..raw.len() - 1].to_string());
        }
    }
    None
}

/// For each argument of `call_node`, find the callee name if that argument
/// is itself a call expression (e.g. `sanitize(x)` in `os.system(sanitize(x))`).
/// Returns a `Vec<Option<String>>` parallel to `extract_arg_uses` output.
pub(super) fn extract_arg_callees(call_node: Node, lang: &str, code: &[u8]) -> Vec<Option<String>> {
    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut result = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        // Bail on spread/splat like extract_arg_uses does
        let kind = child.kind();
        if kind == "spread_element"
            || kind == "dictionary_splat"
            || kind == "list_splat"
            || kind == "keyword_argument"
            || kind == "splat_argument"
            || kind == "hash_splat_argument"
            || kind == "named_argument"
        {
            return Vec::new();
        }
        result.push(call_ident_of(child, lang, code));
    }
    result
}

/// Return `(defines, uses)` for the AST fragment `ast`.
/// Returns (defines, uses, extra_defines) where extra_defines captures additional
/// bindings from destructuring patterns beyond the primary define.
pub(super) fn def_use(
    ast: Node,
    lang: &str,
    code: &[u8],
) -> (Option<String>, Vec<String>, Vec<String>) {
    match lookup(lang, ast.kind()) {
        // Declaration wrappers (let, var, short_var_declaration, etc.)
        Kind::CallWrapper => {
            let mut defs = None;
            let mut extra_defs = Vec::new();
            let mut uses = Vec::new();

            // Try direct field names first (Rust `let_declaration`, Go `short_var_declaration`)
            let def_node = ast
                .child_by_field_name("pattern")
                .or_else(|| ast.child_by_field_name("name"))
                .or_else(|| ast.child_by_field_name("left"))
                // Python `with_item`: value is `as_pattern` whose `alias` holds the target
                .or_else(|| {
                    ast.child_by_field_name("value")
                        .and_then(|v| v.child_by_field_name("alias"))
                });

            let val_node = ast
                .child_by_field_name("value")
                .or_else(|| ast.child_by_field_name("right"));

            if def_node.is_some() || val_node.is_some() {
                if let Some(pat) = def_node {
                    let mut idents = Vec::new();
                    let mut paths = Vec::new();
                    collect_idents_with_paths(pat, code, &mut idents, &mut paths);
                    let first = paths.pop().or_else(|| idents.first().cloned());
                    // Remaining idents are extra defines (for destructuring)
                    for ident in &idents {
                        if first.as_ref() != Some(ident) {
                            extra_defs.push(ident.clone());
                        }
                    }
                    defs = first;
                }
                if let Some(val) = val_node {
                    let mut idents = Vec::new();
                    let mut paths = Vec::new();
                    collect_idents_with_paths(val, code, &mut idents, &mut paths);
                    uses.extend(paths);
                    uses.extend(idents);
                }
            } else {
                // Try nested declarator pattern (JS/TS `lexical_declaration` → `variable_declarator`,
                // Java `local_variable_declaration` → `variable_declarator`,
                // C/C++ `declaration` → `init_declarator`,
                // Python/Ruby `expression_statement` → `assignment`)
                let mut cursor = ast.walk();
                for child in ast.children(&mut cursor) {
                    // Only use left/right fields for actual assignment nodes — binary
                    // expressions also have left/right but are not definitions.
                    let is_assign = matches!(lookup(lang, child.kind()), Kind::Assignment);
                    let child_name = child
                        .child_by_field_name("name")
                        .or_else(|| child.child_by_field_name("declarator"))
                        .or_else(|| {
                            if is_assign {
                                child.child_by_field_name("left")
                            } else {
                                None
                            }
                        });
                    let child_value = child.child_by_field_name("value").or_else(|| {
                        if is_assign {
                            child.child_by_field_name("right")
                        } else {
                            None
                        }
                    });

                    // Only treat this child as a declarator if it has BOTH a name
                    // and a value (or at least a value). This prevents method_invocation
                    // nodes (which have a `name` field) from being misinterpreted.
                    if child_value.is_some() {
                        if let Some(name_node) = child_name
                            && defs.is_none()
                        {
                            let mut idents = Vec::new();
                            let mut paths = Vec::new();
                            collect_idents_with_paths(name_node, code, &mut idents, &mut paths);
                            let first = paths.pop().or_else(|| idents.first().cloned());
                            for ident in &idents {
                                if first.as_ref() != Some(ident) {
                                    extra_defs.push(ident.clone());
                                }
                            }
                            defs = first;
                        }
                        if let Some(val_node) = child_value {
                            let mut idents = Vec::new();
                            let mut paths = Vec::new();
                            collect_idents_with_paths(val_node, code, &mut idents, &mut paths);
                            uses.extend(paths);
                            uses.extend(idents);
                        }
                    }
                }

                // Fallback: if still nothing found, collect all idents as uses.
                // This handles expression_statement wrappers.
                if defs.is_none() && uses.is_empty() {
                    let mut idents = Vec::new();
                    let mut paths = Vec::new();
                    collect_idents_with_paths(ast, code, &mut idents, &mut paths);
                    uses.extend(paths);
                    uses.extend(idents);
                }
            }
            (defs, uses, extra_defs)
        }

        // Plain assignment `x = y`
        Kind::Assignment => {
            let mut defs = None;
            let mut uses = Vec::new();
            if let Some(lhs) = ast.child_by_field_name("left") {
                let mut idents = Vec::new();
                let mut paths = Vec::new();
                collect_idents_with_paths(lhs, code, &mut idents, &mut paths);
                // Prefer dotted path (member expression) over last ident
                defs = paths.pop().or_else(|| idents.pop());
            }
            if let Some(rhs) = ast.child_by_field_name("right") {
                let mut idents = Vec::new();
                let mut paths = Vec::new();
                collect_idents_with_paths(rhs, code, &mut idents, &mut paths);
                uses.extend(paths);
                uses.extend(idents);
            }
            (defs, uses, vec![])
        }

        // if‑let / while‑let — the `let_condition` binds a variable from
        // the value expression.  E.g. `if let Ok(cmd) = env::var("CMD")`
        // defines `cmd` and uses `env`, `var`, `CMD`.
        Kind::If | Kind::While => {
            let cond = ast.child_by_field_name("condition");
            if let Some(c) = cond
                && c.kind() == "let_condition"
            {
                let mut defs = None;
                let mut uses = Vec::new();

                if let Some(pat) = c.child_by_field_name("pattern") {
                    let mut tmp = Vec::<String>::new();
                    collect_idents(pat, code, &mut tmp);
                    // The first plain identifier in the pattern is the binding.
                    // Skip type identifiers (e.g. "Ok" in Ok(cmd)) — take the
                    // last ident which is the inner binding name.
                    defs = tmp.into_iter().last();
                }
                if let Some(val) = c.child_by_field_name("value") {
                    collect_idents(val, code, &mut uses);
                }
                return (defs, uses, vec![]);
            }

            let mut idents = Vec::new();
            let mut paths = Vec::new();
            collect_idents_with_paths(ast, code, &mut idents, &mut paths);
            let mut uses = paths;
            uses.extend(idents);
            (None, uses, vec![])
        }

        // everything else – no definition, but may read vars
        _ => {
            let mut idents = Vec::new();
            let mut paths = Vec::new();
            collect_idents_with_paths(ast, code, &mut idents, &mut paths);
            let mut uses = paths;
            uses.extend(idents);
            (None, uses, vec![])
        }
    }
}