Skip to main content

mir_extractor/rules/
injection.rs

1//! Injection vulnerability rules.
2//!
3//! Rules detecting injection vulnerabilities:
4//! - Untrusted env input (RUSTCOLA006)
5//! - Command injection (RUSTCOLA007, RUSTCOLA098)
6//! - Command argument concatenation (RUSTCOLA031)
7//! - Log injection (RUSTCOLA076)
8//! - SQL injection (RUSTCOLA087)
9//! - Path traversal (RUSTCOLA086)
10//! - SSRF (RUSTCOLA088)
11//! - Regex injection (RUSTCOLA079)
12//! - Unchecked index (RUSTCOLA080)
13
14use std::collections::{HashMap, HashSet};
15
16use crate::dataflow::taint::TaintAnalysis;
17use crate::rules::utils::{command_rule_should_skip, INPUT_SOURCE_PATTERNS, LOG_SINK_PATTERNS};
18use crate::{
19    detect_command_invocations, extract_span_from_mir_line, Confidence, Exploitability, Finding,
20    MirPackage, Rule, RuleMetadata, RuleOrigin, Severity,
21};
22
23// ============================================================================
24// RUSTCOLA006 - UntrustedEnvInputRule
25// ============================================================================
26
27pub struct UntrustedEnvInputRule {
28    metadata: RuleMetadata,
29}
30
31impl UntrustedEnvInputRule {
32    pub fn new() -> Self {
33        Self {
34            metadata: RuleMetadata {
35                id: "RUSTCOLA006".to_string(),
36                name: "untrusted-env-input".to_string(),
37                short_description: "Reads environment-provided input".to_string(),
38                full_description: "Highlights reads from environment variables or command-line arguments which should be validated before use.".to_string(),
39                help_uri: None,
40                default_severity: Severity::Medium,
41                origin: RuleOrigin::BuiltIn,
42                cwe_ids: Vec::new(),
43                fix_suggestion: None,
44                exploitability: Exploitability::default(),
45            },
46        }
47    }
48}
49
50impl Rule for UntrustedEnvInputRule {
51    fn metadata(&self) -> &RuleMetadata {
52        &self.metadata
53    }
54
55    fn evaluate(
56        &self,
57        package: &MirPackage,
58        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
59    ) -> Vec<Finding> {
60        let taint_analysis = TaintAnalysis::new();
61        let mut findings = Vec::new();
62
63        for function in &package.functions {
64            let (_tainted_vars, flows) = taint_analysis.analyze(function);
65
66            for flow in flows {
67                if !flow.sanitized {
68                    let sink_span = extract_span_from_mir_line(&flow.sink.sink_line);
69                    let span = sink_span.or(function.span.clone());
70
71                    let finding =
72                        flow.to_finding(&self.metadata, &function.name, &function.signature, span);
73                    findings.push(finding);
74                }
75            }
76        }
77
78        findings
79    }
80}
81
82// ============================================================================
83// RUSTCOLA007 - CommandInjectionRiskRule
84// ============================================================================
85
86pub struct CommandInjectionRiskRule {
87    metadata: RuleMetadata,
88}
89
90impl CommandInjectionRiskRule {
91    pub fn new() -> Self {
92        Self {
93            metadata: RuleMetadata {
94                id: "RUSTCOLA007".to_string(),
95                name: "process-command-execution".to_string(),
96                short_description: "Spawns external commands".to_string(),
97                full_description: "Detects uses of std::process::Command which should carefully sanitize inputs to avoid command injection.".to_string(),
98                help_uri: None,
99                default_severity: Severity::High,
100                origin: RuleOrigin::BuiltIn,
101                cwe_ids: Vec::new(),
102                fix_suggestion: None,
103                exploitability: Exploitability::default(),
104            },
105        }
106    }
107}
108
109impl Rule for CommandInjectionRiskRule {
110    fn metadata(&self) -> &RuleMetadata {
111        &self.metadata
112    }
113
114    fn evaluate(
115        &self,
116        package: &MirPackage,
117        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
118    ) -> Vec<Finding> {
119        let mut findings = Vec::new();
120
121        for function in &package.functions {
122            if command_rule_should_skip(function, package) {
123                continue;
124            }
125
126            let invocations = detect_command_invocations(function);
127            if invocations.is_empty() {
128                continue;
129            }
130
131            for invocation in invocations {
132                let mut evidence = vec![invocation.command_line.clone()];
133                if !invocation.tainted_args.is_empty() {
134                    evidence.push(format!(
135                        "tainted arguments: {}",
136                        invocation.tainted_args.join(", ")
137                    ));
138                }
139
140                let (severity, message) = if invocation.tainted_args.is_empty() {
141                    (
142                        Severity::Medium,
143                        format!(
144                            "Process command execution detected in `{}`; review argument construction",
145                            function.name
146                        ),
147                    )
148                } else {
149                    (
150                        Severity::High,
151                        format!(
152                            "Potential command injection: tainted arguments reach Command::arg in `{}`",
153                            function.name
154                        ),
155                    )
156                };
157
158                findings.push(Finding {
159                    rule_id: self.metadata.id.clone(),
160                    rule_name: self.metadata.name.clone(),
161                    severity,
162                    message,
163                    function: function.name.clone(),
164                    function_signature: function.signature.clone(),
165                    evidence,
166                    span: function.span.clone(),
167                    confidence: Confidence::Medium,
168                    cwe_ids: Vec::new(),
169                    fix_suggestion: None,
170                    code_snippet: None,
171                    exploitability: Exploitability::default(),
172                    exploitability_score: Exploitability::default().score(),
173                ..Default::default()
174                });
175            }
176        }
177
178        findings
179    }
180}
181
182// ============================================================================
183// RUSTCOLA031 - CommandArgConcatenationRule
184// ============================================================================
185
186pub struct CommandArgConcatenationRule {
187    metadata: RuleMetadata,
188}
189
190impl CommandArgConcatenationRule {
191    pub fn new() -> Self {
192        Self {
193            metadata: RuleMetadata {
194                id: "RUSTCOLA031".to_string(),
195                name: "command-arg-concatenation".to_string(),
196                short_description: "Command built with string concatenation or formatting".to_string(),
197                full_description: "Detects Command::new or Command::arg calls that use format!, format_args!, concat!, or string concatenation operators, which can enable command injection if user input is involved.".to_string(),
198                help_uri: Some("https://cwe.mitre.org/data/definitions/78.html".to_string()),
199                default_severity: Severity::High,
200                origin: RuleOrigin::BuiltIn,
201                cwe_ids: Vec::new(),
202                fix_suggestion: None,
203                exploitability: Exploitability::default(),
204            },
205        }
206    }
207
208    fn concatenation_patterns() -> &'static [&'static str] {
209        &[
210            "format!",
211            "format_args!",
212            "concat!",
213            "std::format",
214            "core::format",
215            "alloc::format",
216            "String::from",
217            "+ &str",
218            "+ String",
219        ]
220    }
221
222    fn command_construction_patterns() -> &'static [&'static str] {
223        &["Command::new(", "Command::arg(", "Command::args("]
224    }
225}
226
227impl Rule for CommandArgConcatenationRule {
228    fn metadata(&self) -> &RuleMetadata {
229        &self.metadata
230    }
231
232    fn evaluate(
233        &self,
234        package: &MirPackage,
235        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
236    ) -> Vec<Finding> {
237        if package.crate_name == "mir-extractor" {
238            return Vec::new();
239        }
240
241        let mut findings = Vec::new();
242
243        for function in &package.functions {
244            let mut concat_lines: Vec<(usize, String)> = Vec::new();
245            let mut command_lines: Vec<(usize, String)> = Vec::new();
246
247            // First pass: collect concatenation and command lines
248            for (idx, line) in function.body.iter().enumerate() {
249                let trimmed = line.trim();
250
251                // Check for concatenation patterns
252                for pattern in Self::concatenation_patterns() {
253                    if trimmed.contains(pattern) {
254                        concat_lines.push((idx, trimmed.to_string()));
255                        break;
256                    }
257                }
258
259                // Check for command construction
260                for pattern in Self::command_construction_patterns() {
261                    if trimmed.contains(pattern) {
262                        command_lines.push((idx, trimmed.to_string()));
263                        break;
264                    }
265                }
266            }
267
268            // Second pass: check if command lines use concatenated values
269            for (cmd_idx, cmd_line) in &command_lines {
270                // Look for concatenation that happens before or near this command
271                let relevant_concat: Vec<&String> = concat_lines
272                    .iter()
273                    .filter(|(concat_idx, _)| concat_idx < cmd_idx && cmd_idx - concat_idx < 10)
274                    .map(|(_, line)| line)
275                    .collect();
276
277                if relevant_concat.is_empty() {
278                    continue;
279                }
280
281                let mut evidence = vec![cmd_line.clone()];
282                evidence.extend(relevant_concat.iter().map(|s| (*s).clone()));
283
284                findings.push(Finding {
285                    rule_id: self.metadata.id.clone(),
286                    rule_name: self.metadata.name.clone(),
287                    severity: self.metadata.default_severity,
288                    message: format!(
289                        "Command argument uses string concatenation in `{}`, potential injection risk",
290                        function.name
291                    ),
292                    function: function.name.clone(),
293                    function_signature: function.signature.clone(),
294                    evidence,
295                    span: function.span.clone(),
296                    confidence: Confidence::Medium,
297                    cwe_ids: Vec::new(),
298                    fix_suggestion: None,
299                    code_snippet: None,
300                exploitability: Exploitability::default(),
301                exploitability_score: Exploitability::default().score(),
302                ..Default::default()
303                });
304            }
305        }
306
307        findings
308    }
309}
310
311// ============================================================================
312// RUSTCOLA076 - LogInjectionRule
313// ============================================================================
314
315pub struct LogInjectionRule {
316    metadata: RuleMetadata,
317}
318
319impl LogInjectionRule {
320    pub fn new() -> Self {
321        Self {
322            metadata: RuleMetadata {
323                id: "RUSTCOLA076".to_string(),
324                name: "log-injection".to_string(),
325                short_description: "Untrusted input may enable log injection".to_string(),
326                full_description: "Detects environment variables or command-line arguments \
327                    that flow to logging functions without newline sanitization. Attackers can \
328                    inject newline characters to forge log entries, evade detection, or corrupt \
329                    log analysis. Sanitize by replacing or escaping \\n, \\r characters, or use \
330                    structured logging formats (JSON) that properly escape special characters."
331                    .to_string(),
332                help_uri: Some("https://cwe.mitre.org/data/definitions/117.html".to_string()),
333                default_severity: Severity::Medium,
334                origin: RuleOrigin::BuiltIn,
335                cwe_ids: Vec::new(),
336                fix_suggestion: None,
337                exploitability: Exploitability::default(),
338            },
339        }
340    }
341
342    /// Sanitizer patterns that remove/escape newlines
343    fn newline_sanitizer_patterns() -> &'static [&'static str] {
344        &[
345            "::replace", // .replace() - MIR format: str::replace::<...>
346            "::trim(",   // .trim() removes trailing newlines
347            "::trim_end(",
348            "::trim_matches(",
349            "escape_",    // escape_default, escape_debug
350            "::lines(",   // .lines() splits on newlines
351            "::split(",   // .split('\n')
352            "::parse::<", // .parse::<T>() converts to different type (no newlines)
353        ]
354    }
355
356    /// Track untrusted input variables
357    fn track_untrusted_vars(body: &[String]) -> HashSet<String> {
358        let mut untrusted_vars = HashSet::new();
359
360        for line in body {
361            let trimmed = line.trim();
362
363            // Check if this line contains an input source
364            let is_source = INPUT_SOURCE_PATTERNS.iter().any(|p| trimmed.contains(p));
365
366            if is_source {
367                // Extract target variable
368                if let Some(eq_pos) = trimmed.find(" = ") {
369                    let target = trimmed[..eq_pos].trim();
370                    if let Some(var) = target
371                        .split(|c: char| !c.is_alphanumeric() && c != '_')
372                        .find(|s| s.starts_with('_'))
373                    {
374                        untrusted_vars.insert(var.to_string());
375                    }
376                }
377            }
378
379            // Propagate through assignments (but check for sanitizers)
380            if trimmed.contains(" = ") && !is_source {
381                if let Some(eq_pos) = trimmed.find(" = ") {
382                    let target = trimmed[..eq_pos].trim();
383                    let source = trimmed[eq_pos + 3..].trim();
384
385                    // Check if source uses an untrusted var (with word boundaries)
386                    let uses_untrusted =
387                        untrusted_vars.iter().any(|v| Self::contains_var(source, v));
388
389                    if uses_untrusted {
390                        // Check if there's a sanitizer on this line
391                        let has_sanitizer = Self::newline_sanitizer_patterns()
392                            .iter()
393                            .any(|p| source.contains(p));
394
395                        if !has_sanitizer {
396                            // Propagate taint
397                            if let Some(target_var) = target
398                                .split(|c: char| !c.is_alphanumeric() && c != '_')
399                                .find(|s| s.starts_with('_'))
400                            {
401                                untrusted_vars.insert(target_var.to_string());
402                            }
403                        }
404                    }
405                }
406            }
407        }
408
409        untrusted_vars
410    }
411
412    /// Check if a MIR line contains a specific variable with proper word boundaries
413    fn contains_var(line: &str, var: &str) -> bool {
414        for (idx, _) in line.match_indices(var) {
415            let after_pos = idx + var.len();
416            if after_pos >= line.len() {
417                return true;
418            }
419            let next_char = line[after_pos..].chars().next().unwrap();
420            if !next_char.is_ascii_digit() {
421                return true;
422            }
423        }
424        false
425    }
426
427    /// Find log sinks using untrusted variables
428    fn find_log_injections(body: &[String], untrusted_vars: &HashSet<String>) -> Vec<String> {
429        let mut evidence = Vec::new();
430
431        for line in body {
432            let trimmed = line.trim();
433
434            // Check if this is a log sink
435            let is_log_sink = LOG_SINK_PATTERNS.iter().any(|p| trimmed.contains(p));
436
437            if is_log_sink {
438                // Check if any untrusted variable is used (with proper word boundaries)
439                for var in untrusted_vars {
440                    if Self::contains_var(trimmed, var) {
441                        evidence.push(trimmed.to_string());
442                        break;
443                    }
444                }
445            }
446        }
447
448        evidence
449    }
450
451    /// Find helper functions that log their parameters
452    fn find_logging_helpers(package: &MirPackage) -> HashSet<String> {
453        let mut helpers = HashSet::new();
454
455        for function in &package.functions {
456            // Skip closures
457            if function.name.contains("{closure") {
458                continue;
459            }
460
461            // Check if function has a parameter (look for "debug X => _1")
462            let has_param = function.body.iter().any(|line| {
463                let trimmed = line.trim();
464                trimmed.starts_with("debug ") && trimmed.contains(" => _1")
465            });
466
467            if !has_param {
468                continue;
469            }
470
471            // Check if the parameter flows to a log sink
472            let mut param_vars: HashSet<String> = HashSet::new();
473            param_vars.insert("_1".to_string());
474
475            // Propagate through simple assignments
476            for line in &function.body {
477                let trimmed = line.trim();
478                if let Some(eq_pos) = trimmed.find(" = ") {
479                    let target = trimmed[..eq_pos].trim();
480                    let source = trimmed[eq_pos + 3..].trim();
481
482                    let uses_param = param_vars.iter().any(|v| Self::contains_var(source, v));
483                    if uses_param {
484                        if let Some(target_var) = target
485                            .split(|c: char| !c.is_alphanumeric() && c != '_')
486                            .find(|s| s.starts_with('_'))
487                        {
488                            param_vars.insert(target_var.to_string());
489                        }
490                    }
491                }
492            }
493
494            // Check if any param-derived var reaches a log sink
495            for line in &function.body {
496                let trimmed = line.trim();
497                let is_log_sink = LOG_SINK_PATTERNS.iter().any(|p| trimmed.contains(p));
498                if is_log_sink {
499                    for var in &param_vars {
500                        if Self::contains_var(trimmed, var) {
501                            helpers.insert(function.name.clone());
502                            break;
503                        }
504                    }
505                }
506            }
507        }
508
509        helpers
510    }
511
512    /// Find calls to logging helper functions with untrusted data
513    fn find_helper_log_injections(
514        body: &[String],
515        untrusted_vars: &HashSet<String>,
516        logging_helpers: &HashSet<String>,
517    ) -> Vec<String> {
518        let mut evidence = Vec::new();
519
520        for line in body {
521            let trimmed = line.trim();
522
523            // Check if this is a call to a logging helper
524            for helper in logging_helpers {
525                let helper_name = helper.split("::").last().unwrap_or(helper);
526                if trimmed.contains(&format!("{}(", helper_name)) {
527                    for var in untrusted_vars {
528                        if Self::contains_var(trimmed, var) {
529                            evidence.push(trimmed.to_string());
530                            break;
531                        }
532                    }
533                }
534            }
535        }
536
537        evidence
538    }
539}
540
541impl Rule for LogInjectionRule {
542    fn metadata(&self) -> &RuleMetadata {
543        &self.metadata
544    }
545
546    fn evaluate(
547        &self,
548        package: &MirPackage,
549        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
550    ) -> Vec<Finding> {
551        let mut findings = Vec::new();
552
553        let logging_helpers = Self::find_logging_helpers(package);
554
555        for function in &package.functions {
556            if function.name.contains("mir_extractor") || function.name.contains("mir-extractor") {
557                continue;
558            }
559
560            let untrusted_vars = Self::track_untrusted_vars(&function.body);
561
562            if untrusted_vars.is_empty() {
563                continue;
564            }
565
566            let mut injections = Self::find_log_injections(&function.body, &untrusted_vars);
567            let helper_injections =
568                Self::find_helper_log_injections(&function.body, &untrusted_vars, &logging_helpers);
569            injections.extend(helper_injections);
570
571            if !injections.is_empty() {
572                findings.push(Finding {
573                    rule_id: self.metadata.id.clone(),
574                    rule_name: self.metadata.name.clone(),
575                    severity: self.metadata.default_severity,
576                    message: format!(
577                        "Untrusted input flows to logging in `{}` without newline sanitization. \
578                        Attackers may inject newlines to forge log entries.",
579                        function.name
580                    ),
581                    function: function.name.clone(),
582                    function_signature: function.signature.clone(),
583                    evidence: injections.into_iter().take(3).collect(),
584                    span: function.span.clone(),
585                    confidence: Confidence::Medium,
586                    cwe_ids: Vec::new(),
587                    fix_suggestion: None,
588                    code_snippet: None,
589                    exploitability: Exploitability::default(),
590                    exploitability_score: Exploitability::default().score(),
591                ..Default::default()
592                });
593            }
594        }
595
596        findings
597    }
598}
599
600// ============================================================================
601// RUSTCOLA079 - RegexInjectionRule
602// ============================================================================
603
604pub struct RegexInjectionRule {
605    metadata: RuleMetadata,
606}
607
608impl RegexInjectionRule {
609    pub fn new() -> Self {
610        Self {
611            metadata: RuleMetadata {
612                id: "RUSTCOLA079".to_string(),
613                name: "regex-injection".to_string(),
614                short_description: "Untrusted input used to construct regex pattern".to_string(),
615                full_description: "Detects environment variables, command-line arguments, or other \
616                    untrusted input flowing to Regex::new(), RegexBuilder::new(), or regex! macro \
617                    without sanitization. Attackers can craft malicious patterns causing catastrophic \
618                    backtracking (ReDoS), consuming excessive CPU and causing denial of service. \
619                    Validate regex patterns, use timeouts, limit pattern complexity, or use \
620                    regex crates with ReDoS protection (e.g., `regex` crate's default is safe, \
621                    but user-controlled patterns can still match unexpectedly)."
622                    .to_string(),
623                help_uri: Some("https://cwe.mitre.org/data/definitions/1333.html".to_string()),
624                default_severity: Severity::High,
625                origin: RuleOrigin::BuiltIn,
626                cwe_ids: Vec::new(),
627                fix_suggestion: None,
628                exploitability: Exploitability::default(),
629            },
630        }
631    }
632
633    /// Input source patterns (untrusted data origins)
634    fn input_source_patterns() -> &'static [&'static str] {
635        INPUT_SOURCE_PATTERNS
636    }
637
638    /// Sanitizer patterns that validate or escape regex input
639    fn sanitizer_patterns() -> &'static [&'static str] {
640        &[
641            "escape(",   // regex::escape()
642            "is_match(", // Pre-validated pattern
643            "validate",  // Custom validation
644            "sanitize",
645            "whitelist",
646            "allowlist",
647            "allowed_pattern",
648            "safe_pattern",
649        ]
650    }
651
652    /// Regex sink patterns where injection can occur
653    fn regex_sink_patterns() -> &'static [&'static str] {
654        &[
655            "Regex::new",        // Matches both Regex::new( and Regex::new::<
656            "RegexBuilder::new", // Matches regex::RegexBuilder::new
657            "RegexSet::new",     // Matches regex::RegexSet::new::<...>
658            "regex!(",
659            "Regex::from_str",
660            "RegexBuilder::from_str",
661            "RegexBuilder::build", // The final build call
662        ]
663    }
664
665    /// Check if there's a validation guard pattern in the MIR body
666    fn has_validation_guard(body: &[String], untrusted_vars: &HashSet<String>) -> bool {
667        let validation_funcs = ["validate", "sanitize", "is_valid", "check_pattern"];
668        let mut validation_result_var: Option<String> = None;
669
670        for line in body {
671            let trimmed = line.trim();
672
673            for validator in &validation_funcs {
674                if trimmed.to_lowercase().contains(validator) {
675                    for var in untrusted_vars {
676                        if Self::contains_var(trimmed, var) {
677                            if let Some(eq_pos) = trimmed.find(" = ") {
678                                let lhs = trimmed[..eq_pos].trim();
679                                if let Some(result_var) = lhs
680                                    .split(|c: char| !c.is_alphanumeric() && c != '_')
681                                    .find(|s| s.starts_with('_'))
682                                {
683                                    validation_result_var = Some(result_var.to_string());
684                                }
685                            }
686                        }
687                    }
688                }
689            }
690
691            if let Some(ref result_var) = validation_result_var {
692                if trimmed.contains("switchInt") && Self::contains_var(trimmed, result_var) {
693                    return true;
694                }
695            }
696        }
697
698        false
699    }
700
701    fn contains_var(line: &str, var: &str) -> bool {
702        for (idx, _) in line.match_indices(var) {
703            let after_pos = idx + var.len();
704            if after_pos >= line.len() {
705                return true;
706            }
707            let next_char = line[after_pos..].chars().next().unwrap();
708            if !next_char.is_ascii_digit() {
709                return true;
710            }
711        }
712        false
713    }
714
715    fn track_untrusted_vars(body: &[String]) -> HashSet<String> {
716        let mut untrusted_vars = HashSet::new();
717        let source_patterns = Self::input_source_patterns();
718        let sanitizer_patterns = Self::sanitizer_patterns();
719
720        let mut ref_aliases: HashMap<String, String> = HashMap::new();
721        for line in body {
722            let trimmed = line.trim();
723            if let Some(eq_pos) = trimmed.find(" = &") {
724                let lhs = trimmed[..eq_pos].trim();
725                let rhs = &trimmed[eq_pos + 3..].trim();
726                let rhs_clean = rhs.trim_start_matches("mut ");
727
728                if let Some(lhs_var) = lhs
729                    .split(|c: char| !c.is_alphanumeric() && c != '_')
730                    .find(|s| s.starts_with('_'))
731                {
732                    if let Some(rhs_var) = rhs_clean
733                        .split(|c: char| !c.is_alphanumeric() && c != '_')
734                        .find(|s| s.starts_with('_'))
735                    {
736                        ref_aliases.insert(lhs_var.to_string(), rhs_var.to_string());
737                    }
738                }
739            }
740        }
741
742        for line in body {
743            let trimmed = line.trim();
744            let is_source = source_patterns.iter().any(|p| trimmed.contains(p));
745
746            if is_source {
747                if let Some(eq_pos) = trimmed.find(" = ") {
748                    let target = trimmed[..eq_pos].trim();
749                    if let Some(var) = target
750                        .split(|c: char| !c.is_alphanumeric() && c != '_')
751                        .find(|s| s.starts_with('_'))
752                    {
753                        untrusted_vars.insert(var.to_string());
754                    }
755                }
756
757                if trimmed.contains("read_line(") {
758                    if let Some(start) = trimmed.find("read_line(") {
759                        let after = &trimmed[start..];
760                        if let Some(copy_pos) = after.rfind("copy _") {
761                            let var_start = &after[copy_pos + 5..];
762                            if let Some(end) =
763                                var_start.find(|c: char| !c.is_alphanumeric() && c != '_')
764                            {
765                                let var = &var_start[..end];
766                                if var.starts_with('_') {
767                                    untrusted_vars.insert(var.to_string());
768                                    if let Some(aliased) = ref_aliases.get(var) {
769                                        untrusted_vars.insert(aliased.clone());
770                                    }
771                                }
772                            }
773                        }
774                    }
775                }
776            }
777        }
778
779        let mut changed = true;
780        while changed {
781            changed = false;
782            for line in body {
783                let trimmed = line.trim();
784
785                if trimmed.contains(" = ") {
786                    if let Some(eq_pos) = trimmed.find(" = ") {
787                        let target = trimmed[..eq_pos].trim();
788                        let source = trimmed[eq_pos + 3..].trim();
789
790                        let uses_untrusted =
791                            untrusted_vars.iter().any(|v| Self::contains_var(source, v));
792
793                        if uses_untrusted {
794                            let has_sanitizer = sanitizer_patterns
795                                .iter()
796                                .any(|p| source.to_lowercase().contains(&p.to_lowercase()));
797
798                            if !has_sanitizer {
799                                if let Some(target_var) = target
800                                    .split(|c: char| !c.is_alphanumeric() && c != '_')
801                                    .find(|s| s.starts_with('_'))
802                                {
803                                    if !untrusted_vars.contains(target_var) {
804                                        untrusted_vars.insert(target_var.to_string());
805                                        changed = true;
806                                    }
807                                }
808                            }
809                        }
810                    }
811                }
812            }
813        }
814
815        untrusted_vars
816    }
817
818    fn propagate_taint_in_body(body: &[String], untrusted_vars: &mut HashSet<String>) {
819        let sanitizer_patterns = Self::sanitizer_patterns();
820
821        let mut changed = true;
822        while changed {
823            changed = false;
824            for line in body {
825                let trimmed = line.trim();
826
827                if trimmed.contains(" = ") {
828                    if let Some(eq_pos) = trimmed.find(" = ") {
829                        let target = trimmed[..eq_pos].trim();
830                        let source = trimmed[eq_pos + 3..].trim();
831
832                        let uses_untrusted =
833                            untrusted_vars.iter().any(|v| Self::contains_var(source, v));
834
835                        if uses_untrusted {
836                            let has_sanitizer = sanitizer_patterns
837                                .iter()
838                                .any(|p| source.to_lowercase().contains(&p.to_lowercase()));
839
840                            if !has_sanitizer {
841                                if let Some(target_var) = target
842                                    .split(|c: char| !c.is_alphanumeric() && c != '_')
843                                    .find(|s| s.starts_with('_'))
844                                {
845                                    if !untrusted_vars.contains(target_var) {
846                                        untrusted_vars.insert(target_var.to_string());
847                                        changed = true;
848                                    }
849                                }
850                            }
851                        }
852                    }
853                }
854            }
855        }
856    }
857
858    fn find_regex_injections(body: &[String], untrusted_vars: &HashSet<String>) -> Vec<String> {
859        let mut evidence = Vec::new();
860        let regex_sinks = Self::regex_sink_patterns();
861
862        for line in body {
863            let trimmed = line.trim();
864            let is_regex_sink = regex_sinks.iter().any(|p| trimmed.contains(p));
865
866            if is_regex_sink {
867                for var in untrusted_vars {
868                    if Self::contains_var(trimmed, var) {
869                        evidence.push(trimmed.to_string());
870                        break;
871                    }
872                }
873            }
874        }
875
876        evidence
877    }
878}
879
880impl Rule for RegexInjectionRule {
881    fn metadata(&self) -> &RuleMetadata {
882        &self.metadata
883    }
884
885    fn evaluate(
886        &self,
887        package: &MirPackage,
888        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
889    ) -> Vec<Finding> {
890        let mut findings = Vec::new();
891
892        let mut tainted_closures: HashSet<String> = HashSet::new();
893
894        for function in &package.functions {
895            if function.name.contains("{closure") {
896                continue;
897            }
898
899            let untrusted_vars = Self::track_untrusted_vars(&function.body);
900            if untrusted_vars.is_empty() {
901                continue;
902            }
903
904            let combinator_patterns = [
905                "and_then",
906                "map(",
907                "filter(",
908                "filter_map(",
909                "unwrap_or_else(",
910            ];
911            for line in &function.body {
912                let trimmed = line.trim();
913                for pattern in &combinator_patterns {
914                    if trimmed.contains(pattern) {
915                        for var in &untrusted_vars {
916                            if Self::contains_var(trimmed, var) {
917                                tainted_closures.insert(function.name.clone());
918                                break;
919                            }
920                        }
921                    }
922                }
923            }
924        }
925
926        for function in &package.functions {
927            if function.name.contains("mir_extractor") || function.name.contains("mir-extractor") {
928                continue;
929            }
930
931            let is_closure = function.name.contains("{closure");
932            let mut untrusted_vars = if is_closure {
933                let parent_name = function.name.split("::{closure").next().unwrap_or("");
934                if tainted_closures.contains(parent_name) {
935                    let mut vars = HashSet::new();
936                    for line in &function.body {
937                        let trimmed = line.trim();
938                        if trimmed.starts_with("debug ") && trimmed.contains(" => _") {
939                            if let Some(var) = trimmed.split(" => _").nth(1) {
940                                let var = var.trim_end_matches(';');
941                                vars.insert(format!("_{}", var));
942                            }
943                        }
944                    }
945                    if vars.is_empty() {
946                        vars.insert("_2".to_string());
947                    }
948                    vars
949                } else {
950                    HashSet::new()
951                }
952            } else {
953                Self::track_untrusted_vars(&function.body)
954            };
955
956            if is_closure && !untrusted_vars.is_empty() {
957                Self::propagate_taint_in_body(&function.body, &mut untrusted_vars);
958            }
959
960            if untrusted_vars.is_empty() {
961                continue;
962            }
963
964            if Self::has_validation_guard(&function.body, &untrusted_vars) {
965                continue;
966            }
967
968            let injections = Self::find_regex_injections(&function.body, &untrusted_vars);
969
970            if !injections.is_empty() {
971                let report_name = if is_closure {
972                    function
973                        .name
974                        .split("::{closure")
975                        .next()
976                        .unwrap_or(&function.name)
977                        .to_string()
978                } else {
979                    function.name.clone()
980                };
981
982                findings.push(Finding {
983                    rule_id: self.metadata.id.clone(),
984                    rule_name: self.metadata.name.clone(),
985                    severity: self.metadata.default_severity,
986                    message: format!(
987                        "Untrusted input flows to regex construction in `{}`. \
988                        Attackers may craft patterns causing ReDoS (catastrophic backtracking) \
989                        or unexpected matches. Use regex::escape() for literal matching or \
990                        validate patterns against an allowlist.",
991                        report_name
992                    ),
993                    function: function.name.clone(),
994                    function_signature: function.signature.clone(),
995                    evidence: injections.into_iter().take(3).collect(),
996                    span: function.span.clone(),
997                    confidence: Confidence::Medium,
998                    cwe_ids: Vec::new(),
999                    fix_suggestion: None,
1000                    code_snippet: None,
1001                    exploitability: Exploitability::default(),
1002                    exploitability_score: Exploitability::default().score(),
1003                ..Default::default()
1004                });
1005            }
1006        }
1007
1008        findings
1009    }
1010}
1011
1012// ============================================================================
1013// RUSTCOLA080 - UncheckedIndexRule
1014// ============================================================================
1015
1016pub struct UncheckedIndexRule {
1017    metadata: RuleMetadata,
1018}
1019
1020impl UncheckedIndexRule {
1021    pub fn new() -> Self {
1022        Self {
1023            metadata: RuleMetadata {
1024                id: "RUSTCOLA080".to_string(),
1025                name: "unchecked-indexing".to_string(),
1026                short_description: "Untrusted input used as array index without bounds check"
1027                    .to_string(),
1028                full_description: "Detects array or slice indexing operations where the index \
1029                    originates from untrusted sources (environment variables, command-line \
1030                    arguments, file contents, network input) without bounds validation. Direct \
1031                    indexing with [] can panic if the index is out of bounds, causing denial of \
1032                    service. Use .get() for safe access that returns Option, or validate the \
1033                    index against the array length before indexing."
1034                    .to_string(),
1035                help_uri: Some("https://cwe.mitre.org/data/definitions/129.html".to_string()),
1036                default_severity: Severity::Medium,
1037                origin: RuleOrigin::BuiltIn,
1038                cwe_ids: Vec::new(),
1039                fix_suggestion: None,
1040                exploitability: Exploitability::default(),
1041            },
1042        }
1043    }
1044
1045    fn input_source_patterns() -> &'static [&'static str] {
1046        INPUT_SOURCE_PATTERNS
1047    }
1048
1049    fn contains_var(line: &str, var: &str) -> bool {
1050        for (idx, _) in line.match_indices(var) {
1051            let after_pos = idx + var.len();
1052            if after_pos >= line.len() {
1053                return true;
1054            }
1055            let next_char = line[after_pos..].chars().next().unwrap();
1056            if !next_char.is_ascii_digit() {
1057                return true;
1058            }
1059        }
1060        false
1061    }
1062
1063    fn track_untrusted_indices(
1064        body: &[String],
1065        tainted_return_funcs: &HashSet<String>,
1066    ) -> HashSet<String> {
1067        let mut untrusted_vars = HashSet::new();
1068        let source_patterns = Self::input_source_patterns();
1069
1070        let mut mut_refs: HashMap<String, String> = HashMap::new();
1071        for line in body {
1072            let trimmed = line.trim();
1073            if trimmed.contains("= &mut _") {
1074                if let Some(eq_pos) = trimmed.find(" = ") {
1075                    let target = trimmed[..eq_pos].trim();
1076                    let source = trimmed[eq_pos + 3..].trim();
1077                    if let Some(target_var) = target
1078                        .split(|c: char| !c.is_alphanumeric() && c != '_')
1079                        .find(|s| s.starts_with('_'))
1080                    {
1081                        if let Some(src_start) = source.find('_') {
1082                            let src_var: String = source[src_start..]
1083                                .chars()
1084                                .take_while(|c| c.is_alphanumeric() || *c == '_')
1085                                .collect();
1086                            if !src_var.is_empty() {
1087                                mut_refs.insert(target_var.to_string(), src_var);
1088                            }
1089                        }
1090                    }
1091                }
1092            }
1093        }
1094
1095        for line in body {
1096            let trimmed = line.trim();
1097            let is_source = source_patterns.iter().any(|p| trimmed.contains(p));
1098
1099            if is_source {
1100                if let Some(eq_pos) = trimmed.find(" = ") {
1101                    let target = trimmed[..eq_pos].trim();
1102                    if let Some(var) = target
1103                        .split(|c: char| !c.is_alphanumeric() && c != '_')
1104                        .find(|s| s.starts_with('_'))
1105                    {
1106                        untrusted_vars.insert(var.to_string());
1107                    }
1108                }
1109
1110                if trimmed.contains("read_line") {
1111                    for (ref_var, target_var) in &mut_refs {
1112                        if trimmed.contains(ref_var) {
1113                            untrusted_vars.insert(target_var.clone());
1114                        }
1115                    }
1116                }
1117            }
1118
1119            if !tainted_return_funcs.is_empty() {
1120                if let Some(eq_pos) = trimmed.find(" = ") {
1121                    let source = trimmed[eq_pos + 3..].trim();
1122                    for func_name in tainted_return_funcs {
1123                        let short_name = func_name.split("::").last().unwrap_or(func_name);
1124                        if source.contains(&format!("{}(", short_name))
1125                            || source.contains(&format!("{}::", short_name))
1126                        {
1127                            let target = trimmed[..eq_pos].trim();
1128                            if let Some(var) = target
1129                                .split(|c: char| !c.is_alphanumeric() && c != '_')
1130                                .find(|s| s.starts_with('_'))
1131                            {
1132                                untrusted_vars.insert(var.to_string());
1133                            }
1134                        }
1135                    }
1136                }
1137            }
1138        }
1139
1140        let mut changed = true;
1141        while changed {
1142            changed = false;
1143            for line in body {
1144                let trimmed = line.trim();
1145                let uses_untrusted = untrusted_vars
1146                    .iter()
1147                    .any(|v| Self::contains_var(trimmed, v));
1148
1149                if !uses_untrusted {
1150                    continue;
1151                }
1152
1153                if let Some(eq_pos) = trimmed.find(" = ") {
1154                    let target = trimmed[..eq_pos].trim();
1155                    if let Some(target_var) = target
1156                        .split(|c: char| !c.is_alphanumeric() && c != '_')
1157                        .find(|s| s.starts_with('_'))
1158                    {
1159                        if !untrusted_vars.contains(target_var) {
1160                            let dominated_by_untrusted = trimmed.contains("::parse")
1161                                || trimmed.contains("parse::")
1162                                || trimmed.contains("from_str")
1163                                || trimmed.contains("::unwrap(")
1164                                || trimmed.contains("::expect(")
1165                                || {
1166                                    let source = trimmed[eq_pos + 3..].trim();
1167                                    untrusted_vars.iter().any(|v| Self::contains_var(source, v))
1168                                };
1169
1170                            if dominated_by_untrusted {
1171                                untrusted_vars.insert(target_var.to_string());
1172                                changed = true;
1173                            }
1174                        }
1175                    }
1176                }
1177            }
1178        }
1179
1180        untrusted_vars
1181    }
1182
1183    fn has_bounds_validation(body: &[String], untrusted_vars: &HashSet<String>) -> bool {
1184        let mut comparison_vars: HashSet<String> = HashSet::new();
1185
1186        for line in body {
1187            let trimmed = line.trim();
1188
1189            if trimmed.contains("::get(")
1190                || trimmed.contains("::get_mut(")
1191                || trimmed.contains("::get::<")
1192            {
1193                continue;
1194            }
1195
1196            if trimmed.contains(".len()") || trimmed.contains("::len(") {
1197                for var in untrusted_vars {
1198                    if Self::contains_var(trimmed, var) {
1199                        return true;
1200                    }
1201                }
1202            }
1203
1204            if (trimmed.contains("::min(") || trimmed.contains("::max("))
1205                && (trimmed.contains("len") || trimmed.contains("_"))
1206            {
1207                for var in untrusted_vars {
1208                    if Self::contains_var(trimmed, var) {
1209                        return true;
1210                    }
1211                }
1212            }
1213
1214            let has_comparison = trimmed.contains("Lt(")
1215                || trimmed.contains("Le(")
1216                || trimmed.contains("Gt(")
1217                || trimmed.contains("Ge(");
1218            if has_comparison {
1219                for var in untrusted_vars {
1220                    if Self::contains_var(trimmed, var) {
1221                        if let Some(eq_pos) = trimmed.find(" = ") {
1222                            let target = trimmed[..eq_pos].trim();
1223                            if let Some(target_var) = target
1224                                .split(|c: char| !c.is_alphanumeric() && c != '_')
1225                                .find(|s| s.starts_with('_'))
1226                            {
1227                                comparison_vars.insert(target_var.to_string());
1228                            }
1229                        }
1230                    }
1231                }
1232            }
1233
1234            if trimmed.contains("switchInt(") {
1235                for comp_var in &comparison_vars {
1236                    if Self::contains_var(trimmed, comp_var) {
1237                        return true;
1238                    }
1239                }
1240            }
1241        }
1242
1243        false
1244    }
1245
1246    fn find_unsafe_indexing(body: &[String], untrusted_vars: &HashSet<String>) -> Vec<String> {
1247        let mut evidence = Vec::new();
1248
1249        for line in body {
1250            let trimmed = line.trim();
1251
1252            if trimmed.contains("::index(") || trimmed.contains("::index_mut(") {
1253                if trimmed.contains("::get(") || trimmed.contains("::get_mut(") {
1254                    continue;
1255                }
1256
1257                if let Some(idx_start) = trimmed.find("::index") {
1258                    let after_index = &trimmed[idx_start..];
1259                    if let Some(comma_pos) = after_index.find(", ") {
1260                        let index_arg = &after_index[comma_pos + 2..];
1261                        for var in untrusted_vars {
1262                            if Self::contains_var(index_arg, var) {
1263                                evidence.push(trimmed.to_string());
1264                                break;
1265                            }
1266                        }
1267                    }
1268                }
1269            }
1270
1271            if trimmed.contains('[') && trimmed.contains(']') {
1272                if trimmed.contains("= [") {
1273                    continue;
1274                }
1275
1276                if trimmed.contains("let ") || trimmed.contains("::get") {
1277                    continue;
1278                }
1279
1280                if let Some(bracket_start) = trimmed.find('[') {
1281                    if let Some(bracket_end) = trimmed[bracket_start..].find(']') {
1282                        let index_content =
1283                            &trimmed[bracket_start + 1..bracket_start + bracket_end];
1284
1285                        for var in untrusted_vars {
1286                            if Self::contains_var(index_content, var) {
1287                                evidence.push(trimmed.to_string());
1288                                break;
1289                            }
1290                        }
1291                    }
1292                }
1293            }
1294        }
1295
1296        evidence
1297    }
1298
1299    fn find_tainted_return_functions(package: &MirPackage) -> HashSet<String> {
1300        let mut tainted_funcs = HashSet::new();
1301        let source_patterns = Self::input_source_patterns();
1302
1303        for function in &package.functions {
1304            if function.name.contains("mir_extractor")
1305                || function.name.contains("mir-extractor")
1306                || function.name.contains("__")
1307            {
1308                continue;
1309            }
1310
1311            let has_source = function
1312                .body
1313                .iter()
1314                .any(|line| source_patterns.iter().any(|p| line.contains(p)));
1315
1316            if !has_source {
1317                continue;
1318            }
1319
1320            let empty_set = HashSet::new();
1321            let tainted = Self::track_untrusted_indices(&function.body, &empty_set);
1322
1323            let returns_tainted = function.body.iter().any(|line| {
1324                let trimmed = line.trim();
1325                if trimmed.starts_with("_0 = ") || trimmed.starts_with("_0 =") {
1326                    tainted.iter().any(|v| Self::contains_var(trimmed, v))
1327                } else {
1328                    false
1329                }
1330            });
1331
1332            if returns_tainted {
1333                tainted_funcs.insert(function.name.clone());
1334            }
1335        }
1336
1337        tainted_funcs
1338    }
1339}
1340
1341impl Rule for UncheckedIndexRule {
1342    fn metadata(&self) -> &RuleMetadata {
1343        &self.metadata
1344    }
1345
1346    fn evaluate(
1347        &self,
1348        package: &MirPackage,
1349        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1350    ) -> Vec<Finding> {
1351        let mut findings = Vec::new();
1352
1353        let tainted_return_funcs = Self::find_tainted_return_functions(package);
1354
1355        for function in &package.functions {
1356            if function.name.contains("mir_extractor")
1357                || function.name.contains("mir-extractor")
1358                || function.name.contains("__")
1359            {
1360                continue;
1361            }
1362
1363            let untrusted_vars =
1364                Self::track_untrusted_indices(&function.body, &tainted_return_funcs);
1365
1366            if untrusted_vars.is_empty() {
1367                continue;
1368            }
1369
1370            if Self::has_bounds_validation(&function.body, &untrusted_vars) {
1371                continue;
1372            }
1373
1374            let unsafe_indexing = Self::find_unsafe_indexing(&function.body, &untrusted_vars);
1375
1376            if !unsafe_indexing.is_empty() {
1377                findings.push(Finding {
1378                    rule_id: self.metadata.id.clone(),
1379                    rule_name: self.metadata.name.clone(),
1380                    severity: self.metadata.default_severity,
1381                    message: format!(
1382                        "Untrusted input used as array index in `{}` without bounds checking. \
1383                        This can cause panic if index is out of bounds. Use .get() for safe \
1384                        access or validate index < array.len() before indexing.",
1385                        function.name
1386                    ),
1387                    function: function.name.clone(),
1388                    function_signature: function.signature.clone(),
1389                    evidence: unsafe_indexing.into_iter().take(3).collect(),
1390                    span: function.span.clone(),
1391                    confidence: Confidence::Medium,
1392                    cwe_ids: Vec::new(),
1393                    fix_suggestion: None,
1394                    code_snippet: None,
1395                    exploitability: Exploitability::default(),
1396                    exploitability_score: Exploitability::default().score(),
1397                ..Default::default()
1398                });
1399            }
1400        }
1401
1402        findings
1403    }
1404}
1405
1406/// Register all injection rules with the rule engine.
1407pub fn register_injection_rules(engine: &mut crate::RuleEngine) {
1408    engine.register_rule(Box::new(UntrustedEnvInputRule::new()));
1409    engine.register_rule(Box::new(CommandInjectionRiskRule::new()));
1410    engine.register_rule(Box::new(CommandArgConcatenationRule::new()));
1411    engine.register_rule(Box::new(LogInjectionRule::new()));
1412    engine.register_rule(Box::new(RegexInjectionRule::new()));
1413    engine.register_rule(Box::new(UncheckedIndexRule::new()));
1414    engine.register_rule(Box::new(PathTraversalRule::new()));
1415    engine.register_rule(Box::new(SsrfRule::new()));
1416    engine.register_rule(Box::new(SqlInjectionRule::new()));
1417    engine.register_rule(Box::new(InterProceduralCommandInjectionRule::new()));
1418}
1419
1420// ============================================================================
1421// RUSTCOLA086 - PathTraversalRule
1422// ============================================================================
1423
1424pub struct PathTraversalRule {
1425    metadata: RuleMetadata,
1426}
1427
1428impl PathTraversalRule {
1429    pub fn new() -> Self {
1430        Self {
1431            metadata: RuleMetadata {
1432                id: "RUSTCOLA086".to_string(),
1433                name: "path-traversal".to_string(),
1434                short_description: "Untrusted input used in filesystem path".to_string(),
1435                full_description: "Detects when user-controlled input flows to filesystem \
1436                    operations without proper validation. Attackers can use path traversal \
1437                    sequences like '../' or absolute paths to access files outside intended \
1438                    directories. Use canonicalize() + starts_with() validation, or strip \
1439                    dangerous path components before use."
1440                    .to_string(),
1441                help_uri: Some(
1442                    "https://owasp.org/www-community/attacks/Path_Traversal".to_string(),
1443                ),
1444                default_severity: Severity::High,
1445                origin: RuleOrigin::BuiltIn,
1446                cwe_ids: Vec::new(),
1447                fix_suggestion: None,
1448                exploitability: Exploitability::default(),
1449            },
1450        }
1451    }
1452
1453    const FS_SINKS: &'static [&'static str] = &[
1454        "fs::read_to_string",
1455        "fs::read",
1456        "File::open",
1457        "std::fs::read_to_string",
1458        "std::fs::read",
1459        "std::fs::File::open",
1460        "OpenOptions::open",
1461        "read_to_string(",
1462        "read_to_string::<",
1463        "fs::write",
1464        "fs::create_dir",
1465        "fs::create_dir_all",
1466        "std::fs::write",
1467        "std::fs::create_dir",
1468        "std::fs::create_dir_all",
1469        "File::create",
1470        "std::fs::File::create",
1471        "create_dir_all::<",
1472        "create_dir::<",
1473        "fs::remove_file",
1474        "fs::remove_dir",
1475        "fs::remove_dir_all",
1476        "std::fs::remove_file",
1477        "std::fs::remove_dir",
1478        "std::fs::remove_dir_all",
1479        "remove_file::<",
1480        "remove_dir::<",
1481        "remove_dir_all::<",
1482        "fs::copy",
1483        "fs::rename",
1484        "std::fs::copy",
1485        "std::fs::rename",
1486        "copy::<",
1487        "rename::<",
1488        "Path::join",
1489        "PathBuf::push",
1490        "PathBuf::join",
1491    ];
1492
1493    const UNTRUSTED_SOURCES: &'static [&'static str] = &[
1494        "env::var(",
1495        "env::var_os(",
1496        "std::env::var(",
1497        "std::env::var_os(",
1498        " = var(",
1499        " = var::",
1500        "env::args()",
1501        "std::env::args()",
1502        " = args(",
1503        "Args>::next(",
1504        " = stdin()",
1505        "Stdin::lock(",
1506        "BufRead>::read_line(",
1507        "read_line(move",
1508        "io::stdin()",
1509    ];
1510
1511    const SANITIZERS: &'static [&'static str] = &[
1512        "canonicalize(",
1513        "starts_with(",
1514        "strip_prefix(",
1515        "is_relative(",
1516        "is_absolute(",
1517        "::contains(move",
1518        "::contains(copy",
1519        "slice::<impl",
1520        "String::replace",
1521        "str::replace",
1522        ".filter(",
1523        "chars().all(",
1524        "is_alphanumeric",
1525        "validate",
1526        "sanitize",
1527        "check_path",
1528        "is_safe",
1529        "safe_join",
1530    ];
1531
1532    fn track_untrusted_paths(&self, body: &[String]) -> HashSet<String> {
1533        let mut untrusted_vars = HashSet::new();
1534
1535        for line in body {
1536            let trimmed = line.trim();
1537            for source in Self::UNTRUSTED_SOURCES {
1538                if trimmed.contains(source) {
1539                    if let Some(target) = self.extract_assignment_target(trimmed) {
1540                        untrusted_vars.insert(target);
1541                    }
1542                }
1543            }
1544        }
1545
1546        let mut changed = true;
1547        let mut iterations = 0;
1548
1549        while changed && iterations < 20 {
1550            changed = false;
1551            iterations += 1;
1552
1553            for line in body {
1554                let trimmed = line.trim();
1555                if !trimmed.contains(" = ") {
1556                    continue;
1557                }
1558
1559                if let Some(target) = self.extract_assignment_target(trimmed) {
1560                    for untrusted in untrusted_vars.clone() {
1561                        if self.contains_var(trimmed, &untrusted) {
1562                            if !untrusted_vars.contains(&target) {
1563                                untrusted_vars.insert(target.clone());
1564                                changed = true;
1565                            }
1566                        }
1567                    }
1568                }
1569            }
1570        }
1571
1572        // Handle read_line buffer tainting
1573        for line in body {
1574            if line.contains("read_line(") {
1575                if let Some(buffer_ref) = Self::extract_read_line_buffer(line) {
1576                    if let Some(actual_var) = Self::resolve_reference(body, &buffer_ref) {
1577                        untrusted_vars.insert(actual_var);
1578                    } else {
1579                        untrusted_vars.insert(buffer_ref);
1580                    }
1581                }
1582            }
1583        }
1584
1585        untrusted_vars
1586    }
1587
1588    fn resolve_reference(body: &[String], ref_var: &str) -> Option<String> {
1589        for line in body {
1590            let trimmed = line.trim();
1591            if trimmed.starts_with(ref_var) && trimmed.contains(" = &") {
1592                if let Some(amp_idx) = trimmed.find('&') {
1593                    let after_amp = &trimmed[amp_idx + 1..];
1594                    let target = if after_amp.starts_with("mut ") {
1595                        after_amp[4..].trim_end_matches(';')
1596                    } else {
1597                        after_amp.trim_end_matches(';')
1598                    };
1599                    let target = target.trim();
1600                    if target.starts_with('_') {
1601                        return Some(target.to_string());
1602                    }
1603                }
1604            }
1605        }
1606        None
1607    }
1608
1609    fn extract_read_line_buffer(line: &str) -> Option<String> {
1610        if let Some(idx) = line.find("read_line(") {
1611            let after = &line[idx..];
1612            if let Some(comma_idx) = after.find(',') {
1613                let second_arg = &after[comma_idx + 1..];
1614                for word in second_arg.split_whitespace() {
1615                    let clean = word.trim_matches(|c| c == ')' || c == '(' || c == '&');
1616                    if clean.starts_with('_') && clean.len() > 1 {
1617                        return Some(clean.to_string());
1618                    }
1619                }
1620            }
1621        }
1622        None
1623    }
1624
1625    fn has_path_sanitization(&self, body: &[String], _untrusted_vars: &HashSet<String>) -> bool {
1626        let body_str = body.join("\n");
1627
1628        for sanitizer in Self::SANITIZERS {
1629            if body_str.contains(sanitizer) {
1630                return true;
1631            }
1632        }
1633
1634        if body_str.contains("switchInt(") {
1635            if body_str.contains("contains(")
1636                || body_str.contains("starts_with(")
1637                || body_str.contains("is_relative()")
1638                || body_str.contains("strip_prefix(")
1639            {
1640                return true;
1641            }
1642        }
1643
1644        if body_str.contains("Err(")
1645            && (body_str.contains("Permission")
1646                || body_str.contains("Invalid")
1647                || body_str.contains("traversal")
1648                || body_str.contains("not in allow"))
1649        {
1650            return true;
1651        }
1652
1653        false
1654    }
1655
1656    fn find_unsafe_fs_operations(
1657        &self,
1658        body: &[String],
1659        untrusted_vars: &HashSet<String>,
1660    ) -> Vec<String> {
1661        let mut evidence = Vec::new();
1662
1663        for line in body {
1664            let trimmed = line.trim();
1665            for sink in Self::FS_SINKS {
1666                if trimmed.contains(sink) {
1667                    for var in untrusted_vars {
1668                        if trimmed.contains(&format!("move {}", var))
1669                            || trimmed.contains(&format!("copy {}", var))
1670                            || trimmed.contains(&format!("&{}", var))
1671                            || trimmed.contains(&format!("({}", var))
1672                        {
1673                            evidence.push(trimmed.to_string());
1674                            break;
1675                        }
1676                    }
1677                }
1678            }
1679        }
1680
1681        evidence
1682    }
1683
1684    fn extract_assignment_target(&self, line: &str) -> Option<String> {
1685        let parts: Vec<&str> = line.split('=').collect();
1686        if parts.len() >= 2 {
1687            let target = parts[0].trim();
1688            if target.starts_with('_') && target.chars().skip(1).all(|c| c.is_ascii_digit()) {
1689                return Some(target.to_string());
1690            }
1691            if let Some(var) = target.split_whitespace().find(|s| s.starts_with('_')) {
1692                let var_clean = var.trim_end_matches(':');
1693                if var_clean.starts_with('_') {
1694                    return Some(var_clean.to_string());
1695                }
1696            }
1697        }
1698        None
1699    }
1700
1701    fn contains_var(&self, line: &str, var: &str) -> bool {
1702        line.contains(&format!("move {}", var))
1703            || line.contains(&format!("copy {}", var))
1704            || line.contains(&format!("&{}", var))
1705            || line.contains(&format!("({})", var))
1706            || line.contains(&format!("{},", var))
1707            || line.contains(&format!(" {} ", var))
1708            || line.contains(&format!("[{}]", var))
1709    }
1710}
1711
1712impl Rule for PathTraversalRule {
1713    fn metadata(&self) -> &RuleMetadata {
1714        &self.metadata
1715    }
1716
1717    fn evaluate(
1718        &self,
1719        package: &MirPackage,
1720        inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1721    ) -> Vec<Finding> {
1722        let mut findings = Vec::new();
1723
1724        for function in &package.functions {
1725            if function.name.contains("mir_extractor")
1726                || function.name.contains("mir-extractor")
1727                || function.name.contains("__")
1728                || function.name.contains("test_")
1729                || function.name.contains("detect_rustup")
1730                || function.name.contains("find_rust_toolchain")
1731                || function.name.contains("detect_toolchain")
1732                || function.name.contains("find_cargo_cola_workspace")
1733            {
1734                continue;
1735            }
1736
1737            let untrusted_vars = self.track_untrusted_paths(&function.body);
1738
1739            if untrusted_vars.is_empty() {
1740                continue;
1741            }
1742
1743            if self.has_path_sanitization(&function.body, &untrusted_vars) {
1744                continue;
1745            }
1746
1747            let unsafe_ops = self.find_unsafe_fs_operations(&function.body, &untrusted_vars);
1748
1749            if !unsafe_ops.is_empty() {
1750                let severity = if unsafe_ops.iter().any(|op| {
1751                    op.contains("remove")
1752                        || op.contains("write")
1753                        || op.contains("create")
1754                        || op.contains("rename")
1755                }) {
1756                    Severity::High
1757                } else {
1758                    Severity::Medium
1759                };
1760
1761                findings.push(Finding {
1762                    rule_id: self.metadata.id.clone(),
1763                    rule_name: self.metadata.name.clone(),
1764                    severity,
1765                    message: format!(
1766                        "Untrusted input used in filesystem path in `{}`. \
1767                        User-controlled paths can enable access to files outside \
1768                        intended directories using '../' sequences or absolute paths. \
1769                        Use canonicalize() + starts_with() validation, or sanitize \
1770                        path input to remove dangerous components.",
1771                        function.name
1772                    ),
1773                    function: function.name.clone(),
1774                    function_signature: function.signature.clone(),
1775                    evidence: unsafe_ops.into_iter().take(3).collect(),
1776                    span: function.span.clone(),
1777                    confidence: Confidence::Medium,
1778                    cwe_ids: Vec::new(),
1779                    fix_suggestion: None,
1780                    code_snippet: None,
1781                    exploitability: Exploitability::default(),
1782                    exploitability_score: Exploitability::default().score(),
1783                ..Default::default()
1784                });
1785            }
1786        }
1787
1788        // Inter-procedural analysis (use shared analysis if available)
1789        if let Some(analysis) = inter_analysis {
1790            let flows = analysis.detect_inter_procedural_flows(package);
1791
1792            let mut reported_functions: HashSet<String> =
1793                findings.iter().map(|f| f.function.clone()).collect();
1794
1795            for flow in flows {
1796                if flow.sink_type != "filesystem" {
1797                    continue;
1798                }
1799
1800                let is_internal = flow.sink_function.contains("mir_extractor")
1801                    || flow.sink_function.contains("mir-extractor")
1802                    || flow.sink_function.contains("cache_envelope")
1803                    || flow.sink_function.contains("detect_toolchain")
1804                    || flow.sink_function.contains("extract_artifacts")
1805                    || flow.sink_function.contains("__")
1806                    || flow.source_function.contains("mir_extractor")
1807                    || flow.source_function.contains("mir-extractor")
1808                    || flow.source_function.contains("cache_envelope")
1809                    || flow.source_function.contains("fingerprint")
1810                    || flow.source_function.contains("toolchain");
1811                if is_internal {
1812                    continue;
1813                }
1814
1815                if reported_functions.contains(&flow.sink_function) {
1816                    continue;
1817                }
1818
1819                if flow.sanitized {
1820                    continue;
1821                }
1822
1823                let sink_func = package
1824                    .functions
1825                    .iter()
1826                    .find(|f| f.name == flow.sink_function);
1827
1828                let span = sink_func.map(|f| f.span.clone()).unwrap_or_default();
1829                let signature = sink_func.map(|f| f.signature.clone()).unwrap_or_default();
1830
1831                findings.push(Finding {
1832                    rule_id: self.metadata.id.clone(),
1833                    rule_name: self.metadata.name.clone(),
1834                    severity: Severity::High,
1835                    message: format!(
1836                        "Inter-procedural path traversal: untrusted input from `{}` \
1837                        flows through {} to filesystem operation in `{}`. \
1838                        User-controlled paths can enable access to files outside \
1839                        intended directories.",
1840                        flow.source_function,
1841                        if flow.call_chain.len() > 2 {
1842                            format!("{} function calls", flow.call_chain.len() - 1)
1843                        } else {
1844                            "helper function".to_string()
1845                        },
1846                        flow.sink_function
1847                    ),
1848                    function: flow.sink_function.clone(),
1849                    function_signature: signature,
1850                    evidence: vec![flow.describe()],
1851                    span,
1852                    ..Default::default()
1853                });
1854
1855                reported_functions.insert(flow.sink_function.clone());
1856            }
1857        }
1858
1859        findings
1860    }
1861}
1862
1863// ============================================================================
1864// RUSTCOLA088 - SsrfRule
1865// ============================================================================
1866
1867pub struct SsrfRule {
1868    metadata: RuleMetadata,
1869}
1870
1871impl SsrfRule {
1872    pub fn new() -> Self {
1873        Self {
1874            metadata: RuleMetadata {
1875                id: "RUSTCOLA088".to_string(),
1876                name: "server-side-request-forgery".to_string(),
1877                short_description: "Untrusted input used as HTTP request URL".to_string(),
1878                full_description: "Detects when user-controlled input is used directly as \
1879                    an HTTP request URL without validation. This enables attackers to make \
1880                    the server send requests to arbitrary destinations, potentially accessing \
1881                    internal services (localhost, cloud metadata at 169.254.169.254), scanning \
1882                    internal networks, or exfiltrating data. Validate URLs against an allowlist \
1883                    of permitted hosts and schemes before making requests."
1884                    .to_string(),
1885                help_uri: Some(
1886                    "https://owasp.org/www-community/attacks/Server_Side_Request_Forgery"
1887                        .to_string(),
1888                ),
1889                default_severity: Severity::High,
1890                origin: RuleOrigin::BuiltIn,
1891                cwe_ids: Vec::new(),
1892                fix_suggestion: None,
1893                exploitability: Exploitability::default(),
1894            },
1895        }
1896    }
1897
1898    // Sinks: Outbound HTTP request methods that actually make network calls
1899    // NOTE: http::Request and hyper::Request are types for INCOMING requests,
1900    // not outbound calls - reading Request::uri() is just parsing, not SSRF
1901    const HTTP_SINKS: &'static [&'static str] = &[
1902        // reqwest - the most common HTTP client
1903        "reqwest::blocking::get",
1904        "reqwest::get",
1905        "blocking::get",
1906        "Client>::get",
1907        "Client>::post",
1908        "Client>::put",
1909        "Client>::delete",
1910        "Client>::patch",
1911        "Client>::head",
1912        "RequestBuilder>::send",
1913        // ureq - lightweight HTTP client
1914        "ureq::get",
1915        "ureq::post",
1916        "ureq::put",
1917        "ureq::delete",
1918        "ureq::request",
1919        "Agent>::get",
1920        "Agent>::post",
1921        "Request>::call",
1922        // hyper client (outbound)
1923        "hyper::Client",
1924        // Generic patterns for constructing outbound requests
1925        "get::<&String>",
1926        "get::<&str>",
1927        "post::<&String>",
1928        "post::<&str>",
1929    ];
1930
1931    // Sources: User-controlled input that could contain malicious URLs
1932    // NOTE: Removed generic "Request" to avoid flagging every HTTP handler
1933    const UNTRUSTED_SOURCES: &'static [&'static str] = &[
1934        "env::var(",
1935        "env::var_os(",
1936        "std::env::var(",
1937        "std::env::var_os(",
1938        " = var(",
1939        " = var::",
1940        "var::<&str>",
1941        "var_os::<",
1942        "env::args()",
1943        "std::env::args()",
1944        " = args()",
1945        "Args>::next(",
1946        "args().collect",
1947        " = stdin()",
1948        "Stdin::lock(",
1949        "Stdin>::lock",
1950        "BufRead>::read_line(",
1951        "read_line(move",
1952        "io::stdin()",
1953        "Lines>::next(",
1954        "fs::read_to_string(",
1955        "read_to_string(move",
1956        "read_to_string::",
1957        "BufReader>::read",
1958        "Read>::read",
1959        // Web framework extractors that contain user input
1960        "axum::extract::Query",
1961        "axum::extract::Path",
1962        "axum::extract::Form",
1963        "axum::Json",
1964        "actix_web::web::Query",
1965        "actix_web::web::Path",
1966        "actix_web::web::Form",
1967        "actix_web::web::Json",
1968        // Request body parsing (user data)
1969        "body::to_bytes",
1970        "hyper::body::to_bytes",
1971        "BodyExt>::collect",
1972    ];
1973
1974    const SANITIZERS: &'static [&'static str] = &[
1975        "Url::parse(",
1976        "url::Url::parse(",
1977        "Uri::from_str(",
1978        "host_str(",
1979        "scheme(",
1980        "starts_with(",
1981        "ends_with(",
1982        "contains(",
1983        "allowed",
1984        "whitelist",
1985        "allowlist",
1986        "trusted",
1987        "permitted",
1988        "localhost",
1989        "127.0.0.1",
1990        "169.254.169.254",
1991        "192.168.",
1992        "10.",
1993        "172.",
1994        ".internal",
1995        "== \"https\"",
1996        "== \"http\"",
1997        "is_alphanumeric",
1998        "chars().all(",
1999        " as Iterator>::all::<",
2000        "Eq>::eq::<",
2001        "PartialEq>::eq::<",
2002        "match ",
2003        "Some(\"",
2004    ];
2005
2006    fn track_untrusted_vars(&self, body: &[String]) -> HashSet<String> {
2007        let mut untrusted_vars = HashSet::new();
2008
2009        for line in body {
2010            let trimmed = line.trim();
2011            for source in Self::UNTRUSTED_SOURCES {
2012                if trimmed.contains(source) {
2013                    if let Some(target) = self.extract_assignment_target(trimmed) {
2014                        untrusted_vars.insert(target);
2015                    }
2016                }
2017            }
2018        }
2019
2020        let mut changed = true;
2021        let mut iterations = 0;
2022
2023        while changed && iterations < 20 {
2024            changed = false;
2025            iterations += 1;
2026
2027            for line in body {
2028                let trimmed = line.trim();
2029                if !trimmed.contains(" = ") {
2030                    continue;
2031                }
2032
2033                if let Some(target) = self.extract_assignment_target(trimmed) {
2034                    for untrusted in untrusted_vars.clone() {
2035                        if self.contains_var(trimmed, &untrusted) {
2036                            if !untrusted_vars.contains(&target) {
2037                                untrusted_vars.insert(target.clone());
2038                                changed = true;
2039                            }
2040                        }
2041                    }
2042                }
2043            }
2044        }
2045
2046        untrusted_vars
2047    }
2048
2049    fn has_ssrf_sanitization(&self, body: &[String]) -> bool {
2050        let body_str = body.join("\n");
2051        for sanitizer in Self::SANITIZERS {
2052            if body_str.contains(sanitizer) {
2053                return true;
2054            }
2055        }
2056        false
2057    }
2058
2059    fn find_unsafe_http_operations(
2060        &self,
2061        body: &[String],
2062        untrusted_vars: &HashSet<String>,
2063    ) -> Vec<String> {
2064        let mut evidence = Vec::new();
2065
2066        for line in body {
2067            let trimmed = line.trim();
2068            for sink in Self::HTTP_SINKS {
2069                if trimmed.contains(sink) {
2070                    for var in untrusted_vars {
2071                        if self.contains_var(trimmed, var) {
2072                            evidence.push(trimmed.to_string());
2073                            break;
2074                        }
2075                    }
2076                }
2077            }
2078        }
2079
2080        evidence
2081    }
2082
2083    fn extract_assignment_target(&self, line: &str) -> Option<String> {
2084        let parts: Vec<&str> = line.split('=').collect();
2085        if parts.len() >= 2 {
2086            let target = parts[0].trim();
2087            if target.starts_with('_') && target.chars().skip(1).all(|c| c.is_ascii_digit()) {
2088                return Some(target.to_string());
2089            }
2090            if let Some(var) = target.split_whitespace().find(|s| s.starts_with('_')) {
2091                let var_clean = var.trim_end_matches(':');
2092                if var_clean.starts_with('_') {
2093                    return Some(var_clean.to_string());
2094                }
2095            }
2096        }
2097        None
2098    }
2099
2100    fn contains_var(&self, line: &str, var: &str) -> bool {
2101        line.contains(&format!("move {}", var))
2102            || line.contains(&format!("copy {}", var))
2103            || line.contains(&format!("&{}", var))
2104            || line.contains(&format!("({})", var))
2105            || line.contains(&format!("{},", var))
2106            || line.contains(&format!(" {} ", var))
2107            || line.contains(&format!("[{}]", var))
2108            || line.contains(&format!("(({} as", var))
2109    }
2110}
2111
2112impl Rule for SsrfRule {
2113    fn metadata(&self) -> &RuleMetadata {
2114        &self.metadata
2115    }
2116
2117    fn evaluate(
2118        &self,
2119        package: &MirPackage,
2120        inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
2121    ) -> Vec<Finding> {
2122        let mut findings = Vec::new();
2123
2124        for function in &package.functions {
2125            if function.name.contains("mir_extractor")
2126                || function.name.contains("mir-extractor")
2127                || function.name.contains("__")
2128                || function.name.contains("test_")
2129                || function.name == "detect_toolchain"
2130            {
2131                continue;
2132            }
2133
2134            let body_str = function.body.join("\n");
2135            let has_http_client = Self::HTTP_SINKS.iter().any(|s| body_str.contains(s))
2136                || body_str.contains("reqwest")
2137                || body_str.contains("ureq")
2138                || body_str.contains("hyper");
2139
2140            if !has_http_client {
2141                continue;
2142            }
2143
2144            let untrusted_vars = self.track_untrusted_vars(&function.body);
2145
2146            if untrusted_vars.is_empty() {
2147                continue;
2148            }
2149
2150            if self.has_ssrf_sanitization(&function.body) {
2151                continue;
2152            }
2153
2154            let unsafe_ops = self.find_unsafe_http_operations(&function.body, &untrusted_vars);
2155
2156            if !unsafe_ops.is_empty() {
2157                findings.push(Finding {
2158                    rule_id: self.metadata.id.clone(),
2159                    rule_name: self.metadata.name.clone(),
2160                    severity: Severity::High,
2161                    message: format!(
2162                        "Server-Side Request Forgery (SSRF) vulnerability in `{}`. \
2163                        User-controlled input is used as an HTTP request URL without \
2164                        validation. Attackers could access internal services, cloud \
2165                        metadata (169.254.169.254), or scan internal networks. Validate \
2166                        URLs against an allowlist of permitted hosts.",
2167                        function.name
2168                    ),
2169                    function: function.name.clone(),
2170                    function_signature: function.signature.clone(),
2171                    evidence: unsafe_ops.into_iter().take(3).collect(),
2172                    span: function.span.clone(),
2173                    confidence: Confidence::Medium,
2174                    cwe_ids: Vec::new(),
2175                    fix_suggestion: None,
2176                    code_snippet: None,
2177                    exploitability: Exploitability::default(),
2178                    exploitability_score: Exploitability::default().score(),
2179                ..Default::default()
2180                });
2181            }
2182        }
2183
2184        // Inter-procedural analysis (use shared analysis if available)
2185        if let Some(analysis) = inter_analysis {
2186            let flows = analysis.detect_inter_procedural_flows(package);
2187
2188            let mut reported_functions: HashSet<String> =
2189                findings.iter().map(|f| f.function.clone()).collect();
2190
2191            for flow in flows {
2192                if flow.sink_type != "http" {
2193                    continue;
2194                }
2195
2196                let is_internal = flow.sink_function.contains("mir_extractor")
2197                    || flow.sink_function.contains("__")
2198                    || flow.source_function.contains("mir_extractor");
2199                if is_internal {
2200                    continue;
2201                }
2202
2203                if reported_functions.contains(&flow.sink_function) {
2204                    continue;
2205                }
2206
2207                if flow.sanitized {
2208                    continue;
2209                }
2210
2211                let sink_func = package
2212                    .functions
2213                    .iter()
2214                    .find(|f| f.name == flow.sink_function);
2215
2216                let span = sink_func.map(|f| f.span.clone()).unwrap_or_default();
2217                let signature = sink_func.map(|f| f.signature.clone()).unwrap_or_default();
2218
2219                findings.push(Finding {
2220                    rule_id: self.metadata.id.clone(),
2221                    rule_name: self.metadata.name.clone(),
2222                    severity: Severity::High,
2223                    message: format!(
2224                        "Inter-procedural SSRF: untrusted input from `{}` \
2225                        flows through {} to HTTP request in `{}`. Validate \
2226                        URLs against an allowlist before making requests.",
2227                        flow.source_function,
2228                        if flow.call_chain.len() > 2 {
2229                            format!("{} function calls", flow.call_chain.len() - 1)
2230                        } else {
2231                            "helper function".to_string()
2232                        },
2233                        flow.sink_function
2234                    ),
2235                    function: flow.sink_function.clone(),
2236                    function_signature: signature,
2237                    evidence: vec![flow.describe()],
2238                    span,
2239                    ..Default::default()
2240                });
2241
2242                reported_functions.insert(flow.sink_function.clone());
2243            }
2244        }
2245
2246        findings
2247    }
2248}
2249
2250// ============================================================================
2251// RUSTCOLA087 - SqlInjectionRule
2252// ============================================================================
2253
2254pub struct SqlInjectionRule {
2255    metadata: RuleMetadata,
2256}
2257
2258impl SqlInjectionRule {
2259    pub fn new() -> Self {
2260        Self {
2261            metadata: RuleMetadata {
2262                id: "RUSTCOLA087".to_string(),
2263                name: "sql-injection".to_string(),
2264                short_description: "Untrusted input used in SQL query construction".to_string(),
2265                full_description: "Detects when user-controlled input is concatenated or \
2266                    formatted directly into SQL query strings instead of using parameterized \
2267                    queries. This allows attackers to modify query logic, bypass authentication, \
2268                    or extract/modify sensitive data. Use prepared statements with bind \
2269                    parameters (?, $1, :name) instead of string interpolation."
2270                    .to_string(),
2271                help_uri: Some("https://owasp.org/www-community/attacks/SQL_Injection".to_string()),
2272                default_severity: Severity::High,
2273                origin: RuleOrigin::BuiltIn,
2274                cwe_ids: Vec::new(),
2275                fix_suggestion: None,
2276                exploitability: Exploitability::default(),
2277            },
2278        }
2279    }
2280
2281    const SQL_STATEMENT_PATTERNS: &'static [&'static str] = &[
2282        "SELECT ",
2283        "SELECT\t",
2284        "SELECT\n",
2285        " FROM ",
2286        "INSERT INTO",
2287        "INSERT  INTO",
2288        "UPDATE ",
2289        "UPDATE\t",
2290        " SET ",
2291        "DELETE FROM",
2292        "DELETE  FROM",
2293        "DROP TABLE",
2294        "DROP DATABASE",
2295        "DROP INDEX",
2296        "DROP VIEW",
2297        "CREATE TABLE",
2298        "CREATE DATABASE",
2299        "CREATE INDEX",
2300        "CREATE VIEW",
2301        "ALTER TABLE",
2302        "ALTER DATABASE",
2303        "TRUNCATE TABLE",
2304        " WHERE ",
2305        " ORDER BY",
2306        " GROUP BY",
2307        " HAVING ",
2308        " JOIN ",
2309        " LEFT JOIN",
2310        " RIGHT JOIN",
2311        " INNER JOIN",
2312        " OUTER JOIN",
2313        " UNION ",
2314        " UNION ALL",
2315        " VALUES",
2316        " VALUES(",
2317        "?)",
2318        "?, ",
2319        " ? ",
2320        "$1",
2321        "$2",
2322        "$3",
2323    ];
2324
2325    const SQL_SINKS: &'static [&'static str] = &[
2326        "format_args!",
2327        "format!",
2328        "String::push_str",
2329        "str::to_string",
2330        "+",
2331        "execute(",
2332        "query(",
2333        "query_as(",
2334        "sql_query(",
2335        "prepare(",
2336        "execute_batch(",
2337        "query_row(",
2338        "query_map(",
2339        "raw_query(",
2340        "raw_sql(",
2341        "sqlx::query",
2342        "sqlx::query_as",
2343        "sqlx::query_scalar",
2344        "diesel::sql_query",
2345        "diesel::delete",
2346        "diesel::insert_into",
2347        "diesel::update",
2348        "rusqlite::execute",
2349        "Connection::execute",
2350        "Connection::query_row",
2351        "Statement::execute",
2352    ];
2353
2354    const UNTRUSTED_SOURCES: &'static [&'static str] = &[
2355        "env::var(",
2356        "env::var_os(",
2357        "std::env::var(",
2358        "std::env::var_os(",
2359        " = var(",
2360        " = var::",
2361        "env::args()",
2362        "std::env::args()",
2363        " = args(",
2364        "Args>::next(",
2365        " = stdin()",
2366        "Stdin::lock(",
2367        "BufRead>::read_line(",
2368        "read_line(move",
2369        "io::stdin()",
2370        "Request",
2371        "Form",
2372        "Query",
2373        "Json",
2374        "Path",
2375    ];
2376
2377    // v1.0.1: Actual SQL execution sinks (not just string building)
2378    // Only flag SQL injection if these execution functions are present
2379    const SQL_EXECUTION_SINKS: &'static [&'static str] = &[
2380        "execute(",
2381        "query(",
2382        "query_as(",
2383        "query_one(",
2384        "query_row(",
2385        "query_map(",
2386        "prepare(",
2387        "sql_query(",
2388        "execute_batch(",
2389        "raw_query(",
2390        "Connection::execute",
2391        "Client::query",
2392        "sqlx::query",
2393        "diesel::sql_query",
2394        "rusqlite::execute",
2395        "tokio_postgres::query",
2396        "Statement::execute",
2397        "Transaction::execute",
2398        "Pool::execute",
2399    ];
2400
2401    // v1.0.1: Patterns indicating non-SQL contexts (logs, errors, CLI help)
2402    // Strings containing SQL keywords in these contexts are false positives
2403    const NON_SQL_CONTEXTS: &'static [&'static str] = &[
2404        // Logging macros
2405        "error!",
2406        "warn!",
2407        "info!",
2408        "debug!",
2409        "trace!",
2410        "tracing::",
2411        "log::",
2412        "eprintln!",
2413        "println!",
2414        // Error handling
2415        "anyhow::Context",
2416        ".context(",
2417        "Error::new",
2418        "bail!",
2419        "thiserror",
2420        "snafu",
2421        // CLI/help text patterns
2422        "--help",
2423        "Usage:",
2424        "USAGE:",
2425        "[OPTIONS]",
2426        "[ARGS]",
2427        "Examples",
2428        "[env:",
2429        "[default:",
2430        // Database terminology in non-SQL contexts
2431        "catalog",
2432        "persist",
2433        "snapshot",
2434        "compaction",
2435        "partition",
2436        // Error message patterns
2437        "failed to",
2438        "unable to",
2439        "could not",
2440        "unexpected error",
2441    ];
2442
2443    const SANITIZERS: &'static [&'static str] = &[
2444        " ? ",
2445        "?)",
2446        "?, ",
2447        "$1",
2448        "$2",
2449        ":name",
2450        ":username",
2451        ":id",
2452        ".bind(",
2453        "bind_value(",
2454        "bind::<",
2455        "QueryBuilder",
2456        "filter(",
2457        ".eq(",
2458        ".ne(",
2459        ".gt(",
2460        ".lt(",
2461        "parse::<i",
2462        "parse::<u",
2463        "parse::<f",
2464        "i32::from_str",
2465        "i64::from_str",
2466        "u32::from_str",
2467        "u64::from_str",
2468        "::contains(move",
2469        "::contains(copy",
2470        "allowed_",
2471        "whitelist",
2472        "allowlist",
2473        "escape(",
2474        "quote(",
2475        "sanitize",
2476        "replace(",
2477        "replace('",
2478        "::replace::",
2479        "is_alphanumeric",
2480        "chars().all(",
2481        " as Iterator>::all::<",
2482    ];
2483
2484    /// v1.0.1: Check if function body contains an actual SQL execution sink
2485    fn has_sql_execution_sink(&self, body: &[String]) -> bool {
2486        let body_str = body.join("\n");
2487        Self::SQL_EXECUTION_SINKS
2488            .iter()
2489            .any(|sink| body_str.contains(sink))
2490    }
2491
2492    /// v1.0.1: Check if evidence line is in a non-SQL context (log/error/CLI)
2493    fn is_non_sql_context(&self, body: &[String], evidence_line: &str) -> bool {
2494        // Check if evidence line itself contains non-SQL patterns
2495        let evidence_lower = evidence_line.to_lowercase();
2496        for pattern in Self::NON_SQL_CONTEXTS {
2497            if evidence_lower.contains(&pattern.to_lowercase()) {
2498                return true;
2499            }
2500        }
2501
2502        // Check if function body is primarily logging/error handling
2503        let body_str = body.join("\n").to_lowercase();
2504        let log_count = Self::NON_SQL_CONTEXTS
2505            .iter()
2506            .filter(|p| body_str.contains(&p.to_lowercase()))
2507            .count();
2508
2509        // If multiple non-SQL context patterns found, likely not SQL code
2510        log_count >= 2
2511    }
2512
2513    fn track_untrusted_vars(&self, body: &[String]) -> HashSet<String> {
2514        let mut untrusted_vars = HashSet::new();
2515
2516        for line in body {
2517            let trimmed = line.trim();
2518            for source in Self::UNTRUSTED_SOURCES {
2519                if trimmed.contains(source) {
2520                    if let Some(target) = self.extract_assignment_target(trimmed) {
2521                        untrusted_vars.insert(target);
2522                    }
2523                }
2524            }
2525        }
2526
2527        let mut changed = true;
2528        let mut iterations = 0;
2529
2530        while changed && iterations < 20 {
2531            changed = false;
2532            iterations += 1;
2533
2534            for line in body {
2535                let trimmed = line.trim();
2536                if !trimmed.contains(" = ") {
2537                    continue;
2538                }
2539
2540                if let Some(target) = self.extract_assignment_target(trimmed) {
2541                    for untrusted in untrusted_vars.clone() {
2542                        if self.contains_var(trimmed, &untrusted) {
2543                            if !untrusted_vars.contains(&target) {
2544                                untrusted_vars.insert(target.clone());
2545                                changed = true;
2546                            }
2547                        }
2548                    }
2549                }
2550            }
2551        }
2552
2553        untrusted_vars
2554    }
2555
2556    fn has_sql_sanitization(&self, body: &[String]) -> bool {
2557        let body_str = body.join("\n");
2558        for sanitizer in Self::SANITIZERS {
2559            if body_str.contains(sanitizer) {
2560                return true;
2561            }
2562        }
2563        false
2564    }
2565
2566    fn find_unsafe_sql_operations(
2567        &self,
2568        body: &[String],
2569        untrusted_vars: &HashSet<String>,
2570    ) -> Vec<String> {
2571        let mut evidence = Vec::new();
2572
2573        let has_sql_const = body.iter().any(|line| {
2574            if !line.contains("const ") && !line.contains("[const ") {
2575                return false;
2576            }
2577            let line_upper = line.to_uppercase();
2578            Self::SQL_STATEMENT_PATTERNS
2579                .iter()
2580                .any(|pattern| line_upper.contains(pattern))
2581        });
2582
2583        let has_promoted_sql_ref = body.iter().any(|line| {
2584            line.contains("::promoted[")
2585                && body.iter().any(|other| {
2586                    if !other.contains("[const ") && !other.contains(" = [const ") {
2587                        return false;
2588                    }
2589                    let other_upper = other.to_uppercase();
2590                    Self::SQL_STATEMENT_PATTERNS
2591                        .iter()
2592                        .any(|pattern| other_upper.contains(pattern))
2593                })
2594        });
2595
2596        if !has_sql_const && !has_promoted_sql_ref {
2597            return evidence;
2598        }
2599
2600        let has_tainted_format = body.iter().any(|line| {
2601            let trimmed = line.trim();
2602            let is_format_related = trimmed.contains("fmt::Arguments")
2603                || trimmed.contains("fmt::rt::Argument")
2604                || trimmed.contains("Arguments::new")
2605                || trimmed.contains("Argument::new")
2606                || trimmed.contains("core::fmt::")
2607                || trimmed.contains("format_args");
2608
2609            if is_format_related {
2610                for var in untrusted_vars {
2611                    if self.contains_var(trimmed, var) {
2612                        return true;
2613                    }
2614                }
2615            }
2616            false
2617        });
2618
2619        if has_tainted_format {
2620            for line in body {
2621                if !line.contains("const ") && !line.contains("[const ") {
2622                    continue;
2623                }
2624                let line_upper = line.to_uppercase();
2625                if Self::SQL_STATEMENT_PATTERNS
2626                    .iter()
2627                    .any(|pattern| line_upper.contains(pattern))
2628                {
2629                    evidence.push(line.trim().to_string());
2630                }
2631            }
2632        }
2633
2634        evidence
2635    }
2636
2637    fn extract_assignment_target(&self, line: &str) -> Option<String> {
2638        let parts: Vec<&str> = line.split('=').collect();
2639        if parts.len() >= 2 {
2640            let target = parts[0].trim();
2641            if target.starts_with('_') && target.chars().skip(1).all(|c| c.is_ascii_digit()) {
2642                return Some(target.to_string());
2643            }
2644            if let Some(var) = target.split_whitespace().find(|s| s.starts_with('_')) {
2645                let var_clean = var.trim_end_matches(':');
2646                if var_clean.starts_with('_') {
2647                    return Some(var_clean.to_string());
2648                }
2649            }
2650        }
2651        None
2652    }
2653
2654    fn contains_var(&self, line: &str, var: &str) -> bool {
2655        line.contains(&format!("move {}", var))
2656            || line.contains(&format!("copy {}", var))
2657            || line.contains(&format!("&{}", var))
2658            || line.contains(&format!("({})", var))
2659            || line.contains(&format!("{},", var))
2660            || line.contains(&format!(" {} ", var))
2661            || line.contains(&format!("[{}]", var))
2662    }
2663
2664    fn extract_function_params(&self, body: &[String]) -> HashSet<String> {
2665        let mut params = HashSet::new();
2666        for line in body {
2667            let trimmed = line.trim();
2668            if trimmed.starts_with("debug ") && trimmed.contains(" => _") {
2669                if let Some(start) = trimmed.find(" => _") {
2670                    let after = &trimmed[start + 5..];
2671                    let var: String = after
2672                        .chars()
2673                        .take_while(|c| c.is_ascii_digit() || *c == '_')
2674                        .collect();
2675                    if !var.is_empty() && var != "0" {
2676                        params.insert(format!("_{}", var.trim_start_matches('_')));
2677                    }
2678                }
2679            }
2680        }
2681        params
2682    }
2683
2684    fn propagate_taint(&self, body: &[String], untrusted_vars: &mut HashSet<String>) {
2685        let mut changed = true;
2686        let mut iterations = 0;
2687
2688        while changed && iterations < 20 {
2689            changed = false;
2690            iterations += 1;
2691
2692            for line in body {
2693                let trimmed = line.trim();
2694                if !trimmed.contains(" = ") {
2695                    continue;
2696                }
2697
2698                if let Some(target) = self.extract_assignment_target(trimmed) {
2699                    for untrusted in untrusted_vars.clone() {
2700                        if self.contains_var(trimmed, &untrusted) {
2701                            if !untrusted_vars.contains(&target) {
2702                                untrusted_vars.insert(target.clone());
2703                                changed = true;
2704                            }
2705                        }
2706                    }
2707                }
2708            }
2709        }
2710    }
2711}
2712
2713impl Rule for SqlInjectionRule {
2714    fn metadata(&self) -> &RuleMetadata {
2715        &self.metadata
2716    }
2717
2718    fn evaluate(
2719        &self,
2720        package: &MirPackage,
2721        inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
2722    ) -> Vec<Finding> {
2723        let mut findings = Vec::new();
2724
2725        // Build tainted return functions map
2726        let mut tainted_return_functions: HashSet<String> = HashSet::new();
2727
2728        for function in &package.functions {
2729            let has_source = function
2730                .body
2731                .iter()
2732                .any(|line| Self::UNTRUSTED_SOURCES.iter().any(|src| line.contains(src)));
2733
2734            if has_source {
2735                let has_sql_const = function.body.iter().any(|line| {
2736                    if !line.contains("const ") && !line.contains("[const ") {
2737                        return false;
2738                    }
2739                    let upper = line.to_uppercase();
2740                    Self::SQL_STATEMENT_PATTERNS
2741                        .iter()
2742                        .any(|pattern| upper.contains(pattern))
2743                });
2744
2745                if !has_sql_const {
2746                    tainted_return_functions.insert(function.name.clone());
2747                }
2748            }
2749        }
2750
2751        for function in &package.functions {
2752            if function.name.contains("mir_extractor")
2753                || function.name.contains("mir-extractor")
2754                || function.name.contains("__")
2755                || function.name.contains("test_")
2756            {
2757                continue;
2758            }
2759
2760            // v1.0.1: Skip if no actual SQL execution sink exists
2761            // This prevents false positives from log messages containing SQL keywords
2762            if !self.has_sql_execution_sink(&function.body) {
2763                continue;
2764            }
2765
2766            let mut untrusted_vars = self.track_untrusted_vars(&function.body);
2767
2768            // Add taint from called functions
2769            for line in &function.body {
2770                let trimmed = line.trim();
2771                if trimmed.contains(" = ") {
2772                    for tainted_fn in &tainted_return_functions {
2773                        let fn_name = tainted_fn.split("::").last().unwrap_or(tainted_fn);
2774                        if trimmed.contains(&format!("= {}()", fn_name)) {
2775                            if let Some(target) = self.extract_assignment_target(trimmed) {
2776                                untrusted_vars.insert(target);
2777                            }
2778                        }
2779                    }
2780                }
2781            }
2782
2783            // Check function parameters if no other sources
2784            if untrusted_vars.is_empty() {
2785                let params = self.extract_function_params(&function.body);
2786                if !params.is_empty() {
2787                    let mut param_vars = params.clone();
2788                    self.propagate_taint(&function.body, &mut param_vars);
2789
2790                    let has_sql_const = function.body.iter().any(|line| {
2791                        if !line.contains("const ") && !line.contains("[const ") {
2792                            return false;
2793                        }
2794                        let upper = line.to_uppercase();
2795                        Self::SQL_STATEMENT_PATTERNS
2796                            .iter()
2797                            .any(|pattern| upper.contains(pattern))
2798                    });
2799
2800                    if has_sql_const {
2801                        let has_param_in_format = function.body.iter().any(|line| {
2802                            let trimmed = line.trim();
2803                            let is_format_related = trimmed.contains("fmt::Arguments")
2804                                || trimmed.contains("Argument::")
2805                                || trimmed.contains("format_args")
2806                                || trimmed.contains("core::fmt::")
2807                                || trimmed.contains("new_display")
2808                                || trimmed.contains("new_debug");
2809
2810                            is_format_related
2811                                && param_vars.iter().any(|v| self.contains_var(trimmed, v))
2812                        });
2813
2814                        if has_param_in_format {
2815                            untrusted_vars = param_vars;
2816                        }
2817                    }
2818                }
2819            }
2820
2821            if untrusted_vars.is_empty() {
2822                continue;
2823            }
2824
2825            if self.has_sql_sanitization(&function.body) {
2826                continue;
2827            }
2828
2829            let unsafe_ops = self.find_unsafe_sql_operations(&function.body, &untrusted_vars);
2830
2831            // v1.0.1: Filter out evidence from non-SQL contexts (logs, errors, CLI help)
2832            let filtered_ops: Vec<String> = unsafe_ops
2833                .into_iter()
2834                .filter(|op| !self.is_non_sql_context(&function.body, op))
2835                .collect();
2836
2837            if !filtered_ops.is_empty() {
2838                findings.push(Finding {
2839                    rule_id: self.metadata.id.clone(),
2840                    rule_name: self.metadata.name.clone(),
2841                    severity: Severity::High,
2842                    message: format!(
2843                        "SQL injection vulnerability in `{}`. Untrusted input is used \
2844                        in SQL query construction without parameterization. Use prepared \
2845                        statements with bind parameters (?, $1, :name) instead of string \
2846                        formatting or concatenation.",
2847                        function.name
2848                    ),
2849                    function: function.name.clone(),
2850                    function_signature: function.signature.clone(),
2851                    evidence: filtered_ops.into_iter().take(3).collect(),
2852                    span: function.span.clone(),
2853                    confidence: Confidence::Medium,
2854                    cwe_ids: Vec::new(),
2855                    fix_suggestion: None,
2856                    code_snippet: None,
2857                    exploitability: Exploitability::default(),
2858                    exploitability_score: Exploitability::default().score(),
2859                ..Default::default()
2860                });
2861            }
2862        }
2863
2864        // Inter-procedural analysis (use shared analysis if available)
2865        if let Some(analysis) = inter_analysis {
2866            let flows = analysis.detect_inter_procedural_flows(package);
2867
2868            let mut reported_functions: HashSet<String> =
2869                findings.iter().map(|f| f.function.clone()).collect();
2870
2871            for flow in flows {
2872                if flow.sink_type != "sql" {
2873                    continue;
2874                }
2875
2876                let is_internal = flow.sink_function.contains("mir_extractor")
2877                    || flow.sink_function.contains("__")
2878                    || flow.source_function.contains("mir_extractor");
2879                if is_internal {
2880                    continue;
2881                }
2882
2883                if reported_functions.contains(&flow.sink_function) {
2884                    continue;
2885                }
2886
2887                if flow.sanitized {
2888                    continue;
2889                }
2890
2891                let sink_func = package
2892                    .functions
2893                    .iter()
2894                    .find(|f| f.name == flow.sink_function);
2895
2896                let span = sink_func.map(|f| f.span.clone()).unwrap_or_default();
2897                let signature = sink_func.map(|f| f.signature.clone()).unwrap_or_default();
2898
2899                findings.push(Finding {
2900                    rule_id: self.metadata.id.clone(),
2901                    rule_name: self.metadata.name.clone(),
2902                    severity: Severity::High,
2903                    message: format!(
2904                        "Inter-procedural SQL injection: untrusted input from `{}` \
2905                        flows through {} to SQL query in `{}`. Use parameterized \
2906                        queries to prevent SQL injection.",
2907                        flow.source_function,
2908                        if flow.call_chain.len() > 2 {
2909                            format!("{} function calls", flow.call_chain.len() - 1)
2910                        } else {
2911                            "helper function".to_string()
2912                        },
2913                        flow.sink_function
2914                    ),
2915                    function: flow.sink_function.clone(),
2916                    function_signature: signature,
2917                    evidence: vec![flow.describe()],
2918                    span,
2919                    ..Default::default()
2920                });
2921
2922                reported_functions.insert(flow.sink_function.clone());
2923            }
2924        }
2925
2926        findings
2927    }
2928}
2929
2930// ============================================================================
2931// RUSTCOLA098 - InterProceduralCommandInjectionRule
2932// ============================================================================
2933
2934pub struct InterProceduralCommandInjectionRule {
2935    metadata: RuleMetadata,
2936}
2937
2938impl InterProceduralCommandInjectionRule {
2939    pub fn new() -> Self {
2940        Self {
2941            metadata: RuleMetadata {
2942                id: "RUSTCOLA098".to_string(),
2943                name: "interprocedural-command-injection".to_string(),
2944                short_description: "Inter-procedural command injection".to_string(),
2945                full_description: "Untrusted input flows through helper functions to \
2946                    command execution without sanitization. Attackers can inject shell \
2947                    metacharacters to execute arbitrary commands. Validate input against \
2948                    an allowlist or use APIs that don't invoke a shell."
2949                    .to_string(),
2950                help_uri: Some(
2951                    "https://owasp.org/www-community/attacks/Command_Injection".to_string(),
2952                ),
2953                default_severity: Severity::High,
2954                origin: RuleOrigin::BuiltIn,
2955                cwe_ids: Vec::new(),
2956                fix_suggestion: None,
2957                exploitability: Exploitability::default(),
2958            },
2959        }
2960    }
2961}
2962
2963impl Rule for InterProceduralCommandInjectionRule {
2964    fn metadata(&self) -> &RuleMetadata {
2965        &self.metadata
2966    }
2967
2968    fn evaluate(
2969        &self,
2970        package: &MirPackage,
2971        inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
2972    ) -> Vec<Finding> {
2973        let mut findings = Vec::new();
2974
2975        // Use shared interprocedural analysis if available
2976        if let Some(analysis) = inter_analysis {
2977            let flows = analysis.detect_inter_procedural_flows(package);
2978
2979            let mut reported_functions: HashSet<String> = HashSet::new();
2980
2981            for flow in flows {
2982                if !flow.sink_type.contains("command") {
2983                    continue;
2984                }
2985
2986                let is_internal = flow.sink_function.contains("mir_extractor")
2987                    || flow.sink_function.contains("mir-extractor")
2988                    || flow.sink_function.contains("__")
2989                    || flow.source_function.contains("mir_extractor")
2990                    || flow.source_function.contains("mir-extractor");
2991                if is_internal {
2992                    continue;
2993                }
2994
2995                if flow.sink_function.contains("test") && flow.sink_function.contains("::") {
2996                    if !flow.sink_function.starts_with("test_") {
2997                        continue;
2998                    }
2999                }
3000
3001                if reported_functions.contains(&flow.sink_function) {
3002                    continue;
3003                }
3004
3005                if flow.sanitized {
3006                    continue;
3007                }
3008
3009                let sink_func = package
3010                    .functions
3011                    .iter()
3012                    .find(|f| f.name == flow.sink_function);
3013
3014                let span = sink_func.map(|f| f.span.clone()).unwrap_or_default();
3015                let signature = sink_func.map(|f| f.signature.clone()).unwrap_or_default();
3016
3017                findings.push(Finding {
3018                    rule_id: self.metadata.id.clone(),
3019                    rule_name: self.metadata.name.clone(),
3020                    severity: Severity::High,
3021                    message: format!(
3022                        "Inter-procedural command injection: untrusted input from `{}` \
3023                        flows through {} to command execution in `{}`. \
3024                        Attackers can inject shell metacharacters. \
3025                        Validate against an allowlist or avoid shell invocation.",
3026                        flow.source_function,
3027                        if flow.call_chain.len() > 2 {
3028                            format!("{} function calls", flow.call_chain.len() - 1)
3029                        } else {
3030                            "helper function".to_string()
3031                        },
3032                        flow.sink_function
3033                    ),
3034                    function: flow.sink_function.clone(),
3035                    function_signature: signature,
3036                    evidence: vec![flow.describe()],
3037                    span,
3038                    ..Default::default()
3039                });
3040
3041                reported_functions.insert(flow.sink_function.clone());
3042            }
3043
3044            // Closure capture detection
3045            for closure in analysis.closure_registry.get_all_closures() {
3046                if reported_functions.contains(&closure.name) {
3047                    continue;
3048                }
3049
3050                let parent_func = package
3051                    .functions
3052                    .iter()
3053                    .find(|f| f.name == closure.parent_function);
3054
3055                let closure_func = package.functions.iter().find(|f| f.name == closure.name);
3056
3057                if let Some(closure_fn) = closure_func {
3058                    let parent_has_source = if let Some(parent) = parent_func {
3059                        parent.body.iter().any(|line| {
3060                            line.contains("args()")
3061                                || line.contains("env::args")
3062                                || line.contains("env::var")
3063                                || line.contains("std::env::")
3064                                || line.contains("= args")
3065                                || line.contains("var(")
3066                        })
3067                    } else {
3068                        closure_fn.body.iter().any(|line| {
3069                            let line_lower = line.to_lowercase();
3070                            (line.contains("debug ") && line.contains("(*((*_1)"))
3071                                && (line_lower.contains("tainted")
3072                                    || line_lower.contains("user")
3073                                    || line_lower.contains("input")
3074                                    || line_lower.contains("cmd")
3075                                    || line_lower.contains("arg")
3076                                    || line_lower.contains("command"))
3077                        })
3078                    };
3079
3080                    let closure_has_sink = closure_fn.body.iter().any(|line| {
3081                        line.contains("Command::new")
3082                            || line.contains("Command::")
3083                            || line.contains("::spawn")
3084                            || line.contains("::output")
3085                            || line.contains("process::Command")
3086                    });
3087
3088                    let has_captures = !closure.captured_vars.is_empty()
3089                        || closure_fn
3090                            .body
3091                            .iter()
3092                            .any(|line| line.contains("debug ") && line.contains("(*((*_1)"));
3093
3094                    if parent_has_source && closure_has_sink && has_captures {
3095                        findings.push(Finding {
3096                            rule_id: self.metadata.id.clone(),
3097                            rule_name: self.metadata.name.clone(),
3098                            severity: Severity::High,
3099                            message: format!(
3100                                "Closure captures tainted data: `{}` captures untrusted input \
3101                                from parent function `{}` and passes it to command execution. \
3102                                Attackers can inject shell metacharacters. \
3103                                Validate input or avoid shell invocation.",
3104                                closure.name, closure.parent_function
3105                            ),
3106                            function: closure.name.clone(),
3107                            function_signature: closure_fn.signature.clone(),
3108                            evidence: vec![
3109                                format!(
3110                                    "Parent function {} contains taint source",
3111                                    closure.parent_function
3112                                ),
3113                                format!("Closure captures variable(s) from parent"),
3114                                "Closure body contains command execution".to_string(),
3115                            ],
3116                            span: closure_fn.span.clone(),
3117                            ..Default::default()
3118                        });
3119
3120                        reported_functions.insert(closure.name.clone());
3121                    }
3122                }
3123            }
3124
3125            // Direct closure scan fallback
3126            for function in &package.functions {
3127                if !function.name.contains("::{closure#") {
3128                    continue;
3129                }
3130
3131                if reported_functions.contains(&function.name) {
3132                    continue;
3133                }
3134
3135                let body_str = function.body.join("\n");
3136
3137                let has_command_sink = body_str.contains("Command::")
3138                    || body_str.contains("::spawn")
3139                    || body_str.contains("::output");
3140
3141                if !has_command_sink {
3142                    continue;
3143                }
3144
3145                let has_tainted_capture = body_str.lines().any(|line| {
3146                    if !line.contains("debug ") || !line.contains("(*((*_1)") {
3147                        return false;
3148                    }
3149                    let line_lower = line.to_lowercase();
3150                    line_lower.contains("tainted")
3151                        || line_lower.contains("user")
3152                        || line_lower.contains("input")
3153                        || line_lower.contains("cmd")
3154                        || line_lower.contains("arg")
3155                        || line_lower.contains("command")
3156                });
3157
3158                if has_tainted_capture {
3159                    let parent_name = if let Some(pos) = function.name.find("::{closure#") {
3160                        function.name[..pos].to_string()
3161                    } else {
3162                        "unknown_parent".to_string()
3163                    };
3164
3165                    findings.push(Finding {
3166                        rule_id: self.metadata.id.clone(),
3167                        rule_name: self.metadata.name.clone(),
3168                        severity: Severity::High,
3169                        message: format!(
3170                            "Closure captures tainted data: `{}` captures untrusted input \
3171                            from parent function `{}` and passes it to command execution. \
3172                            Attackers can inject shell metacharacters. \
3173                            Validate input or avoid shell invocation.",
3174                            function.name, parent_name
3175                        ),
3176                        function: function.name.clone(),
3177                        function_signature: function.signature.clone(),
3178                        evidence: vec![
3179                            format!("Closure captures variable(s) named with taint indicators"),
3180                            "Closure body contains command execution".to_string(),
3181                        ],
3182                        span: function.span.clone(),
3183                        confidence: Confidence::Medium,
3184                        cwe_ids: Vec::new(),
3185                        fix_suggestion: None,
3186                        code_snippet: None,
3187                        exploitability: Exploitability::default(),
3188                        exploitability_score: Exploitability::default().score(),
3189                    ..Default::default()
3190                    });
3191
3192                    reported_functions.insert(function.name.clone());
3193                }
3194            }
3195        }
3196
3197        findings
3198    }
3199}