1use std::collections::{HashMap, HashSet};
15
16use crate::dataflow::taint::TaintAnalysis;
17use crate::rules::utils::{command_rule_should_skip, INPUT_SOURCE_PATTERNS, LOG_SINK_PATTERNS};
18use crate::{
19 detect_command_invocations, extract_span_from_mir_line, Confidence, Exploitability, Finding,
20 MirPackage, Rule, RuleMetadata, RuleOrigin, Severity,
21};
22
23pub struct UntrustedEnvInputRule {
28 metadata: RuleMetadata,
29}
30
31impl UntrustedEnvInputRule {
32 pub fn new() -> Self {
33 Self {
34 metadata: RuleMetadata {
35 id: "RUSTCOLA006".to_string(),
36 name: "untrusted-env-input".to_string(),
37 short_description: "Reads environment-provided input".to_string(),
38 full_description: "Highlights reads from environment variables or command-line arguments which should be validated before use.".to_string(),
39 help_uri: None,
40 default_severity: Severity::Medium,
41 origin: RuleOrigin::BuiltIn,
42 cwe_ids: Vec::new(),
43 fix_suggestion: None,
44 exploitability: Exploitability::default(),
45 },
46 }
47 }
48}
49
50impl Rule for UntrustedEnvInputRule {
51 fn metadata(&self) -> &RuleMetadata {
52 &self.metadata
53 }
54
55 fn evaluate(
56 &self,
57 package: &MirPackage,
58 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
59 ) -> Vec<Finding> {
60 let taint_analysis = TaintAnalysis::new();
61 let mut findings = Vec::new();
62
63 for function in &package.functions {
64 let (_tainted_vars, flows) = taint_analysis.analyze(function);
65
66 for flow in flows {
67 if !flow.sanitized {
68 let sink_span = extract_span_from_mir_line(&flow.sink.sink_line);
69 let span = sink_span.or(function.span.clone());
70
71 let finding =
72 flow.to_finding(&self.metadata, &function.name, &function.signature, span);
73 findings.push(finding);
74 }
75 }
76 }
77
78 findings
79 }
80}
81
82pub struct CommandInjectionRiskRule {
87 metadata: RuleMetadata,
88}
89
90impl CommandInjectionRiskRule {
91 pub fn new() -> Self {
92 Self {
93 metadata: RuleMetadata {
94 id: "RUSTCOLA007".to_string(),
95 name: "process-command-execution".to_string(),
96 short_description: "Spawns external commands".to_string(),
97 full_description: "Detects uses of std::process::Command which should carefully sanitize inputs to avoid command injection.".to_string(),
98 help_uri: None,
99 default_severity: Severity::High,
100 origin: RuleOrigin::BuiltIn,
101 cwe_ids: Vec::new(),
102 fix_suggestion: None,
103 exploitability: Exploitability::default(),
104 },
105 }
106 }
107}
108
109impl Rule for CommandInjectionRiskRule {
110 fn metadata(&self) -> &RuleMetadata {
111 &self.metadata
112 }
113
114 fn evaluate(
115 &self,
116 package: &MirPackage,
117 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
118 ) -> Vec<Finding> {
119 let mut findings = Vec::new();
120
121 for function in &package.functions {
122 if command_rule_should_skip(function, package) {
123 continue;
124 }
125
126 let invocations = detect_command_invocations(function);
127 if invocations.is_empty() {
128 continue;
129 }
130
131 for invocation in invocations {
132 let mut evidence = vec![invocation.command_line.clone()];
133 if !invocation.tainted_args.is_empty() {
134 evidence.push(format!(
135 "tainted arguments: {}",
136 invocation.tainted_args.join(", ")
137 ));
138 }
139
140 let (severity, message) = if invocation.tainted_args.is_empty() {
141 (
142 Severity::Medium,
143 format!(
144 "Process command execution detected in `{}`; review argument construction",
145 function.name
146 ),
147 )
148 } else {
149 (
150 Severity::High,
151 format!(
152 "Potential command injection: tainted arguments reach Command::arg in `{}`",
153 function.name
154 ),
155 )
156 };
157
158 findings.push(Finding {
159 rule_id: self.metadata.id.clone(),
160 rule_name: self.metadata.name.clone(),
161 severity,
162 message,
163 function: function.name.clone(),
164 function_signature: function.signature.clone(),
165 evidence,
166 span: function.span.clone(),
167 confidence: Confidence::Medium,
168 cwe_ids: Vec::new(),
169 fix_suggestion: None,
170 code_snippet: None,
171 exploitability: Exploitability::default(),
172 exploitability_score: Exploitability::default().score(),
173 ..Default::default()
174 });
175 }
176 }
177
178 findings
179 }
180}
181
182pub struct CommandArgConcatenationRule {
187 metadata: RuleMetadata,
188}
189
190impl CommandArgConcatenationRule {
191 pub fn new() -> Self {
192 Self {
193 metadata: RuleMetadata {
194 id: "RUSTCOLA031".to_string(),
195 name: "command-arg-concatenation".to_string(),
196 short_description: "Command built with string concatenation or formatting".to_string(),
197 full_description: "Detects Command::new or Command::arg calls that use format!, format_args!, concat!, or string concatenation operators, which can enable command injection if user input is involved.".to_string(),
198 help_uri: Some("https://cwe.mitre.org/data/definitions/78.html".to_string()),
199 default_severity: Severity::High,
200 origin: RuleOrigin::BuiltIn,
201 cwe_ids: Vec::new(),
202 fix_suggestion: None,
203 exploitability: Exploitability::default(),
204 },
205 }
206 }
207
208 fn concatenation_patterns() -> &'static [&'static str] {
209 &[
210 "format!",
211 "format_args!",
212 "concat!",
213 "std::format",
214 "core::format",
215 "alloc::format",
216 "String::from",
217 "+ &str",
218 "+ String",
219 ]
220 }
221
222 fn command_construction_patterns() -> &'static [&'static str] {
223 &["Command::new(", "Command::arg(", "Command::args("]
224 }
225}
226
227impl Rule for CommandArgConcatenationRule {
228 fn metadata(&self) -> &RuleMetadata {
229 &self.metadata
230 }
231
232 fn evaluate(
233 &self,
234 package: &MirPackage,
235 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
236 ) -> Vec<Finding> {
237 if package.crate_name == "mir-extractor" {
238 return Vec::new();
239 }
240
241 let mut findings = Vec::new();
242
243 for function in &package.functions {
244 let mut concat_lines: Vec<(usize, String)> = Vec::new();
245 let mut command_lines: Vec<(usize, String)> = Vec::new();
246
247 for (idx, line) in function.body.iter().enumerate() {
249 let trimmed = line.trim();
250
251 for pattern in Self::concatenation_patterns() {
253 if trimmed.contains(pattern) {
254 concat_lines.push((idx, trimmed.to_string()));
255 break;
256 }
257 }
258
259 for pattern in Self::command_construction_patterns() {
261 if trimmed.contains(pattern) {
262 command_lines.push((idx, trimmed.to_string()));
263 break;
264 }
265 }
266 }
267
268 for (cmd_idx, cmd_line) in &command_lines {
270 let relevant_concat: Vec<&String> = concat_lines
272 .iter()
273 .filter(|(concat_idx, _)| concat_idx < cmd_idx && cmd_idx - concat_idx < 10)
274 .map(|(_, line)| line)
275 .collect();
276
277 if relevant_concat.is_empty() {
278 continue;
279 }
280
281 let mut evidence = vec![cmd_line.clone()];
282 evidence.extend(relevant_concat.iter().map(|s| (*s).clone()));
283
284 findings.push(Finding {
285 rule_id: self.metadata.id.clone(),
286 rule_name: self.metadata.name.clone(),
287 severity: self.metadata.default_severity,
288 message: format!(
289 "Command argument uses string concatenation in `{}`, potential injection risk",
290 function.name
291 ),
292 function: function.name.clone(),
293 function_signature: function.signature.clone(),
294 evidence,
295 span: function.span.clone(),
296 confidence: Confidence::Medium,
297 cwe_ids: Vec::new(),
298 fix_suggestion: None,
299 code_snippet: None,
300 exploitability: Exploitability::default(),
301 exploitability_score: Exploitability::default().score(),
302 ..Default::default()
303 });
304 }
305 }
306
307 findings
308 }
309}
310
311pub struct LogInjectionRule {
316 metadata: RuleMetadata,
317}
318
319impl LogInjectionRule {
320 pub fn new() -> Self {
321 Self {
322 metadata: RuleMetadata {
323 id: "RUSTCOLA076".to_string(),
324 name: "log-injection".to_string(),
325 short_description: "Untrusted input may enable log injection".to_string(),
326 full_description: "Detects environment variables or command-line arguments \
327 that flow to logging functions without newline sanitization. Attackers can \
328 inject newline characters to forge log entries, evade detection, or corrupt \
329 log analysis. Sanitize by replacing or escaping \\n, \\r characters, or use \
330 structured logging formats (JSON) that properly escape special characters."
331 .to_string(),
332 help_uri: Some("https://cwe.mitre.org/data/definitions/117.html".to_string()),
333 default_severity: Severity::Medium,
334 origin: RuleOrigin::BuiltIn,
335 cwe_ids: Vec::new(),
336 fix_suggestion: None,
337 exploitability: Exploitability::default(),
338 },
339 }
340 }
341
342 fn newline_sanitizer_patterns() -> &'static [&'static str] {
344 &[
345 "::replace", "::trim(", "::trim_end(",
348 "::trim_matches(",
349 "escape_", "::lines(", "::split(", "::parse::<", ]
354 }
355
356 fn track_untrusted_vars(body: &[String]) -> HashSet<String> {
358 let mut untrusted_vars = HashSet::new();
359
360 for line in body {
361 let trimmed = line.trim();
362
363 let is_source = INPUT_SOURCE_PATTERNS.iter().any(|p| trimmed.contains(p));
365
366 if is_source {
367 if let Some(eq_pos) = trimmed.find(" = ") {
369 let target = trimmed[..eq_pos].trim();
370 if let Some(var) = target
371 .split(|c: char| !c.is_alphanumeric() && c != '_')
372 .find(|s| s.starts_with('_'))
373 {
374 untrusted_vars.insert(var.to_string());
375 }
376 }
377 }
378
379 if trimmed.contains(" = ") && !is_source {
381 if let Some(eq_pos) = trimmed.find(" = ") {
382 let target = trimmed[..eq_pos].trim();
383 let source = trimmed[eq_pos + 3..].trim();
384
385 let uses_untrusted =
387 untrusted_vars.iter().any(|v| Self::contains_var(source, v));
388
389 if uses_untrusted {
390 let has_sanitizer = Self::newline_sanitizer_patterns()
392 .iter()
393 .any(|p| source.contains(p));
394
395 if !has_sanitizer {
396 if let Some(target_var) = target
398 .split(|c: char| !c.is_alphanumeric() && c != '_')
399 .find(|s| s.starts_with('_'))
400 {
401 untrusted_vars.insert(target_var.to_string());
402 }
403 }
404 }
405 }
406 }
407 }
408
409 untrusted_vars
410 }
411
412 fn contains_var(line: &str, var: &str) -> bool {
414 for (idx, _) in line.match_indices(var) {
415 let after_pos = idx + var.len();
416 if after_pos >= line.len() {
417 return true;
418 }
419 let next_char = line[after_pos..].chars().next().unwrap();
420 if !next_char.is_ascii_digit() {
421 return true;
422 }
423 }
424 false
425 }
426
427 fn find_log_injections(body: &[String], untrusted_vars: &HashSet<String>) -> Vec<String> {
429 let mut evidence = Vec::new();
430
431 for line in body {
432 let trimmed = line.trim();
433
434 let is_log_sink = LOG_SINK_PATTERNS.iter().any(|p| trimmed.contains(p));
436
437 if is_log_sink {
438 for var in untrusted_vars {
440 if Self::contains_var(trimmed, var) {
441 evidence.push(trimmed.to_string());
442 break;
443 }
444 }
445 }
446 }
447
448 evidence
449 }
450
451 fn find_logging_helpers(package: &MirPackage) -> HashSet<String> {
453 let mut helpers = HashSet::new();
454
455 for function in &package.functions {
456 if function.name.contains("{closure") {
458 continue;
459 }
460
461 let has_param = function.body.iter().any(|line| {
463 let trimmed = line.trim();
464 trimmed.starts_with("debug ") && trimmed.contains(" => _1")
465 });
466
467 if !has_param {
468 continue;
469 }
470
471 let mut param_vars: HashSet<String> = HashSet::new();
473 param_vars.insert("_1".to_string());
474
475 for line in &function.body {
477 let trimmed = line.trim();
478 if let Some(eq_pos) = trimmed.find(" = ") {
479 let target = trimmed[..eq_pos].trim();
480 let source = trimmed[eq_pos + 3..].trim();
481
482 let uses_param = param_vars.iter().any(|v| Self::contains_var(source, v));
483 if uses_param {
484 if let Some(target_var) = target
485 .split(|c: char| !c.is_alphanumeric() && c != '_')
486 .find(|s| s.starts_with('_'))
487 {
488 param_vars.insert(target_var.to_string());
489 }
490 }
491 }
492 }
493
494 for line in &function.body {
496 let trimmed = line.trim();
497 let is_log_sink = LOG_SINK_PATTERNS.iter().any(|p| trimmed.contains(p));
498 if is_log_sink {
499 for var in ¶m_vars {
500 if Self::contains_var(trimmed, var) {
501 helpers.insert(function.name.clone());
502 break;
503 }
504 }
505 }
506 }
507 }
508
509 helpers
510 }
511
512 fn find_helper_log_injections(
514 body: &[String],
515 untrusted_vars: &HashSet<String>,
516 logging_helpers: &HashSet<String>,
517 ) -> Vec<String> {
518 let mut evidence = Vec::new();
519
520 for line in body {
521 let trimmed = line.trim();
522
523 for helper in logging_helpers {
525 let helper_name = helper.split("::").last().unwrap_or(helper);
526 if trimmed.contains(&format!("{}(", helper_name)) {
527 for var in untrusted_vars {
528 if Self::contains_var(trimmed, var) {
529 evidence.push(trimmed.to_string());
530 break;
531 }
532 }
533 }
534 }
535 }
536
537 evidence
538 }
539}
540
541impl Rule for LogInjectionRule {
542 fn metadata(&self) -> &RuleMetadata {
543 &self.metadata
544 }
545
546 fn evaluate(
547 &self,
548 package: &MirPackage,
549 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
550 ) -> Vec<Finding> {
551 let mut findings = Vec::new();
552
553 let logging_helpers = Self::find_logging_helpers(package);
554
555 for function in &package.functions {
556 if function.name.contains("mir_extractor") || function.name.contains("mir-extractor") {
557 continue;
558 }
559
560 let untrusted_vars = Self::track_untrusted_vars(&function.body);
561
562 if untrusted_vars.is_empty() {
563 continue;
564 }
565
566 let mut injections = Self::find_log_injections(&function.body, &untrusted_vars);
567 let helper_injections =
568 Self::find_helper_log_injections(&function.body, &untrusted_vars, &logging_helpers);
569 injections.extend(helper_injections);
570
571 if !injections.is_empty() {
572 findings.push(Finding {
573 rule_id: self.metadata.id.clone(),
574 rule_name: self.metadata.name.clone(),
575 severity: self.metadata.default_severity,
576 message: format!(
577 "Untrusted input flows to logging in `{}` without newline sanitization. \
578 Attackers may inject newlines to forge log entries.",
579 function.name
580 ),
581 function: function.name.clone(),
582 function_signature: function.signature.clone(),
583 evidence: injections.into_iter().take(3).collect(),
584 span: function.span.clone(),
585 confidence: Confidence::Medium,
586 cwe_ids: Vec::new(),
587 fix_suggestion: None,
588 code_snippet: None,
589 exploitability: Exploitability::default(),
590 exploitability_score: Exploitability::default().score(),
591 ..Default::default()
592 });
593 }
594 }
595
596 findings
597 }
598}
599
600pub struct RegexInjectionRule {
605 metadata: RuleMetadata,
606}
607
608impl RegexInjectionRule {
609 pub fn new() -> Self {
610 Self {
611 metadata: RuleMetadata {
612 id: "RUSTCOLA079".to_string(),
613 name: "regex-injection".to_string(),
614 short_description: "Untrusted input used to construct regex pattern".to_string(),
615 full_description: "Detects environment variables, command-line arguments, or other \
616 untrusted input flowing to Regex::new(), RegexBuilder::new(), or regex! macro \
617 without sanitization. Attackers can craft malicious patterns causing catastrophic \
618 backtracking (ReDoS), consuming excessive CPU and causing denial of service. \
619 Validate regex patterns, use timeouts, limit pattern complexity, or use \
620 regex crates with ReDoS protection (e.g., `regex` crate's default is safe, \
621 but user-controlled patterns can still match unexpectedly)."
622 .to_string(),
623 help_uri: Some("https://cwe.mitre.org/data/definitions/1333.html".to_string()),
624 default_severity: Severity::High,
625 origin: RuleOrigin::BuiltIn,
626 cwe_ids: Vec::new(),
627 fix_suggestion: None,
628 exploitability: Exploitability::default(),
629 },
630 }
631 }
632
633 fn input_source_patterns() -> &'static [&'static str] {
635 INPUT_SOURCE_PATTERNS
636 }
637
638 fn sanitizer_patterns() -> &'static [&'static str] {
640 &[
641 "escape(", "is_match(", "validate", "sanitize",
645 "whitelist",
646 "allowlist",
647 "allowed_pattern",
648 "safe_pattern",
649 ]
650 }
651
652 fn regex_sink_patterns() -> &'static [&'static str] {
654 &[
655 "Regex::new", "RegexBuilder::new", "RegexSet::new", "regex!(",
659 "Regex::from_str",
660 "RegexBuilder::from_str",
661 "RegexBuilder::build", ]
663 }
664
665 fn has_validation_guard(body: &[String], untrusted_vars: &HashSet<String>) -> bool {
667 let validation_funcs = ["validate", "sanitize", "is_valid", "check_pattern"];
668 let mut validation_result_var: Option<String> = None;
669
670 for line in body {
671 let trimmed = line.trim();
672
673 for validator in &validation_funcs {
674 if trimmed.to_lowercase().contains(validator) {
675 for var in untrusted_vars {
676 if Self::contains_var(trimmed, var) {
677 if let Some(eq_pos) = trimmed.find(" = ") {
678 let lhs = trimmed[..eq_pos].trim();
679 if let Some(result_var) = lhs
680 .split(|c: char| !c.is_alphanumeric() && c != '_')
681 .find(|s| s.starts_with('_'))
682 {
683 validation_result_var = Some(result_var.to_string());
684 }
685 }
686 }
687 }
688 }
689 }
690
691 if let Some(ref result_var) = validation_result_var {
692 if trimmed.contains("switchInt") && Self::contains_var(trimmed, result_var) {
693 return true;
694 }
695 }
696 }
697
698 false
699 }
700
701 fn contains_var(line: &str, var: &str) -> bool {
702 for (idx, _) in line.match_indices(var) {
703 let after_pos = idx + var.len();
704 if after_pos >= line.len() {
705 return true;
706 }
707 let next_char = line[after_pos..].chars().next().unwrap();
708 if !next_char.is_ascii_digit() {
709 return true;
710 }
711 }
712 false
713 }
714
715 fn track_untrusted_vars(body: &[String]) -> HashSet<String> {
716 let mut untrusted_vars = HashSet::new();
717 let source_patterns = Self::input_source_patterns();
718 let sanitizer_patterns = Self::sanitizer_patterns();
719
720 let mut ref_aliases: HashMap<String, String> = HashMap::new();
721 for line in body {
722 let trimmed = line.trim();
723 if let Some(eq_pos) = trimmed.find(" = &") {
724 let lhs = trimmed[..eq_pos].trim();
725 let rhs = &trimmed[eq_pos + 3..].trim();
726 let rhs_clean = rhs.trim_start_matches("mut ");
727
728 if let Some(lhs_var) = lhs
729 .split(|c: char| !c.is_alphanumeric() && c != '_')
730 .find(|s| s.starts_with('_'))
731 {
732 if let Some(rhs_var) = rhs_clean
733 .split(|c: char| !c.is_alphanumeric() && c != '_')
734 .find(|s| s.starts_with('_'))
735 {
736 ref_aliases.insert(lhs_var.to_string(), rhs_var.to_string());
737 }
738 }
739 }
740 }
741
742 for line in body {
743 let trimmed = line.trim();
744 let is_source = source_patterns.iter().any(|p| trimmed.contains(p));
745
746 if is_source {
747 if let Some(eq_pos) = trimmed.find(" = ") {
748 let target = trimmed[..eq_pos].trim();
749 if let Some(var) = target
750 .split(|c: char| !c.is_alphanumeric() && c != '_')
751 .find(|s| s.starts_with('_'))
752 {
753 untrusted_vars.insert(var.to_string());
754 }
755 }
756
757 if trimmed.contains("read_line(") {
758 if let Some(start) = trimmed.find("read_line(") {
759 let after = &trimmed[start..];
760 if let Some(copy_pos) = after.rfind("copy _") {
761 let var_start = &after[copy_pos + 5..];
762 if let Some(end) =
763 var_start.find(|c: char| !c.is_alphanumeric() && c != '_')
764 {
765 let var = &var_start[..end];
766 if var.starts_with('_') {
767 untrusted_vars.insert(var.to_string());
768 if let Some(aliased) = ref_aliases.get(var) {
769 untrusted_vars.insert(aliased.clone());
770 }
771 }
772 }
773 }
774 }
775 }
776 }
777 }
778
779 let mut changed = true;
780 while changed {
781 changed = false;
782 for line in body {
783 let trimmed = line.trim();
784
785 if trimmed.contains(" = ") {
786 if let Some(eq_pos) = trimmed.find(" = ") {
787 let target = trimmed[..eq_pos].trim();
788 let source = trimmed[eq_pos + 3..].trim();
789
790 let uses_untrusted =
791 untrusted_vars.iter().any(|v| Self::contains_var(source, v));
792
793 if uses_untrusted {
794 let has_sanitizer = sanitizer_patterns
795 .iter()
796 .any(|p| source.to_lowercase().contains(&p.to_lowercase()));
797
798 if !has_sanitizer {
799 if let Some(target_var) = target
800 .split(|c: char| !c.is_alphanumeric() && c != '_')
801 .find(|s| s.starts_with('_'))
802 {
803 if !untrusted_vars.contains(target_var) {
804 untrusted_vars.insert(target_var.to_string());
805 changed = true;
806 }
807 }
808 }
809 }
810 }
811 }
812 }
813 }
814
815 untrusted_vars
816 }
817
818 fn propagate_taint_in_body(body: &[String], untrusted_vars: &mut HashSet<String>) {
819 let sanitizer_patterns = Self::sanitizer_patterns();
820
821 let mut changed = true;
822 while changed {
823 changed = false;
824 for line in body {
825 let trimmed = line.trim();
826
827 if trimmed.contains(" = ") {
828 if let Some(eq_pos) = trimmed.find(" = ") {
829 let target = trimmed[..eq_pos].trim();
830 let source = trimmed[eq_pos + 3..].trim();
831
832 let uses_untrusted =
833 untrusted_vars.iter().any(|v| Self::contains_var(source, v));
834
835 if uses_untrusted {
836 let has_sanitizer = sanitizer_patterns
837 .iter()
838 .any(|p| source.to_lowercase().contains(&p.to_lowercase()));
839
840 if !has_sanitizer {
841 if let Some(target_var) = target
842 .split(|c: char| !c.is_alphanumeric() && c != '_')
843 .find(|s| s.starts_with('_'))
844 {
845 if !untrusted_vars.contains(target_var) {
846 untrusted_vars.insert(target_var.to_string());
847 changed = true;
848 }
849 }
850 }
851 }
852 }
853 }
854 }
855 }
856 }
857
858 fn find_regex_injections(body: &[String], untrusted_vars: &HashSet<String>) -> Vec<String> {
859 let mut evidence = Vec::new();
860 let regex_sinks = Self::regex_sink_patterns();
861
862 for line in body {
863 let trimmed = line.trim();
864 let is_regex_sink = regex_sinks.iter().any(|p| trimmed.contains(p));
865
866 if is_regex_sink {
867 for var in untrusted_vars {
868 if Self::contains_var(trimmed, var) {
869 evidence.push(trimmed.to_string());
870 break;
871 }
872 }
873 }
874 }
875
876 evidence
877 }
878}
879
880impl Rule for RegexInjectionRule {
881 fn metadata(&self) -> &RuleMetadata {
882 &self.metadata
883 }
884
885 fn evaluate(
886 &self,
887 package: &MirPackage,
888 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
889 ) -> Vec<Finding> {
890 let mut findings = Vec::new();
891
892 let mut tainted_closures: HashSet<String> = HashSet::new();
893
894 for function in &package.functions {
895 if function.name.contains("{closure") {
896 continue;
897 }
898
899 let untrusted_vars = Self::track_untrusted_vars(&function.body);
900 if untrusted_vars.is_empty() {
901 continue;
902 }
903
904 let combinator_patterns = [
905 "and_then",
906 "map(",
907 "filter(",
908 "filter_map(",
909 "unwrap_or_else(",
910 ];
911 for line in &function.body {
912 let trimmed = line.trim();
913 for pattern in &combinator_patterns {
914 if trimmed.contains(pattern) {
915 for var in &untrusted_vars {
916 if Self::contains_var(trimmed, var) {
917 tainted_closures.insert(function.name.clone());
918 break;
919 }
920 }
921 }
922 }
923 }
924 }
925
926 for function in &package.functions {
927 if function.name.contains("mir_extractor") || function.name.contains("mir-extractor") {
928 continue;
929 }
930
931 let is_closure = function.name.contains("{closure");
932 let mut untrusted_vars = if is_closure {
933 let parent_name = function.name.split("::{closure").next().unwrap_or("");
934 if tainted_closures.contains(parent_name) {
935 let mut vars = HashSet::new();
936 for line in &function.body {
937 let trimmed = line.trim();
938 if trimmed.starts_with("debug ") && trimmed.contains(" => _") {
939 if let Some(var) = trimmed.split(" => _").nth(1) {
940 let var = var.trim_end_matches(';');
941 vars.insert(format!("_{}", var));
942 }
943 }
944 }
945 if vars.is_empty() {
946 vars.insert("_2".to_string());
947 }
948 vars
949 } else {
950 HashSet::new()
951 }
952 } else {
953 Self::track_untrusted_vars(&function.body)
954 };
955
956 if is_closure && !untrusted_vars.is_empty() {
957 Self::propagate_taint_in_body(&function.body, &mut untrusted_vars);
958 }
959
960 if untrusted_vars.is_empty() {
961 continue;
962 }
963
964 if Self::has_validation_guard(&function.body, &untrusted_vars) {
965 continue;
966 }
967
968 let injections = Self::find_regex_injections(&function.body, &untrusted_vars);
969
970 if !injections.is_empty() {
971 let report_name = if is_closure {
972 function
973 .name
974 .split("::{closure")
975 .next()
976 .unwrap_or(&function.name)
977 .to_string()
978 } else {
979 function.name.clone()
980 };
981
982 findings.push(Finding {
983 rule_id: self.metadata.id.clone(),
984 rule_name: self.metadata.name.clone(),
985 severity: self.metadata.default_severity,
986 message: format!(
987 "Untrusted input flows to regex construction in `{}`. \
988 Attackers may craft patterns causing ReDoS (catastrophic backtracking) \
989 or unexpected matches. Use regex::escape() for literal matching or \
990 validate patterns against an allowlist.",
991 report_name
992 ),
993 function: function.name.clone(),
994 function_signature: function.signature.clone(),
995 evidence: injections.into_iter().take(3).collect(),
996 span: function.span.clone(),
997 confidence: Confidence::Medium,
998 cwe_ids: Vec::new(),
999 fix_suggestion: None,
1000 code_snippet: None,
1001 exploitability: Exploitability::default(),
1002 exploitability_score: Exploitability::default().score(),
1003 ..Default::default()
1004 });
1005 }
1006 }
1007
1008 findings
1009 }
1010}
1011
1012pub struct UncheckedIndexRule {
1017 metadata: RuleMetadata,
1018}
1019
1020impl UncheckedIndexRule {
1021 pub fn new() -> Self {
1022 Self {
1023 metadata: RuleMetadata {
1024 id: "RUSTCOLA080".to_string(),
1025 name: "unchecked-indexing".to_string(),
1026 short_description: "Untrusted input used as array index without bounds check"
1027 .to_string(),
1028 full_description: "Detects array or slice indexing operations where the index \
1029 originates from untrusted sources (environment variables, command-line \
1030 arguments, file contents, network input) without bounds validation. Direct \
1031 indexing with [] can panic if the index is out of bounds, causing denial of \
1032 service. Use .get() for safe access that returns Option, or validate the \
1033 index against the array length before indexing."
1034 .to_string(),
1035 help_uri: Some("https://cwe.mitre.org/data/definitions/129.html".to_string()),
1036 default_severity: Severity::Medium,
1037 origin: RuleOrigin::BuiltIn,
1038 cwe_ids: Vec::new(),
1039 fix_suggestion: None,
1040 exploitability: Exploitability::default(),
1041 },
1042 }
1043 }
1044
1045 fn input_source_patterns() -> &'static [&'static str] {
1046 INPUT_SOURCE_PATTERNS
1047 }
1048
1049 fn contains_var(line: &str, var: &str) -> bool {
1050 for (idx, _) in line.match_indices(var) {
1051 let after_pos = idx + var.len();
1052 if after_pos >= line.len() {
1053 return true;
1054 }
1055 let next_char = line[after_pos..].chars().next().unwrap();
1056 if !next_char.is_ascii_digit() {
1057 return true;
1058 }
1059 }
1060 false
1061 }
1062
1063 fn track_untrusted_indices(
1064 body: &[String],
1065 tainted_return_funcs: &HashSet<String>,
1066 ) -> HashSet<String> {
1067 let mut untrusted_vars = HashSet::new();
1068 let source_patterns = Self::input_source_patterns();
1069
1070 let mut mut_refs: HashMap<String, String> = HashMap::new();
1071 for line in body {
1072 let trimmed = line.trim();
1073 if trimmed.contains("= &mut _") {
1074 if let Some(eq_pos) = trimmed.find(" = ") {
1075 let target = trimmed[..eq_pos].trim();
1076 let source = trimmed[eq_pos + 3..].trim();
1077 if let Some(target_var) = target
1078 .split(|c: char| !c.is_alphanumeric() && c != '_')
1079 .find(|s| s.starts_with('_'))
1080 {
1081 if let Some(src_start) = source.find('_') {
1082 let src_var: String = source[src_start..]
1083 .chars()
1084 .take_while(|c| c.is_alphanumeric() || *c == '_')
1085 .collect();
1086 if !src_var.is_empty() {
1087 mut_refs.insert(target_var.to_string(), src_var);
1088 }
1089 }
1090 }
1091 }
1092 }
1093 }
1094
1095 for line in body {
1096 let trimmed = line.trim();
1097 let is_source = source_patterns.iter().any(|p| trimmed.contains(p));
1098
1099 if is_source {
1100 if let Some(eq_pos) = trimmed.find(" = ") {
1101 let target = trimmed[..eq_pos].trim();
1102 if let Some(var) = target
1103 .split(|c: char| !c.is_alphanumeric() && c != '_')
1104 .find(|s| s.starts_with('_'))
1105 {
1106 untrusted_vars.insert(var.to_string());
1107 }
1108 }
1109
1110 if trimmed.contains("read_line") {
1111 for (ref_var, target_var) in &mut_refs {
1112 if trimmed.contains(ref_var) {
1113 untrusted_vars.insert(target_var.clone());
1114 }
1115 }
1116 }
1117 }
1118
1119 if !tainted_return_funcs.is_empty() {
1120 if let Some(eq_pos) = trimmed.find(" = ") {
1121 let source = trimmed[eq_pos + 3..].trim();
1122 for func_name in tainted_return_funcs {
1123 let short_name = func_name.split("::").last().unwrap_or(func_name);
1124 if source.contains(&format!("{}(", short_name))
1125 || source.contains(&format!("{}::", short_name))
1126 {
1127 let target = trimmed[..eq_pos].trim();
1128 if let Some(var) = target
1129 .split(|c: char| !c.is_alphanumeric() && c != '_')
1130 .find(|s| s.starts_with('_'))
1131 {
1132 untrusted_vars.insert(var.to_string());
1133 }
1134 }
1135 }
1136 }
1137 }
1138 }
1139
1140 let mut changed = true;
1141 while changed {
1142 changed = false;
1143 for line in body {
1144 let trimmed = line.trim();
1145 let uses_untrusted = untrusted_vars
1146 .iter()
1147 .any(|v| Self::contains_var(trimmed, v));
1148
1149 if !uses_untrusted {
1150 continue;
1151 }
1152
1153 if let Some(eq_pos) = trimmed.find(" = ") {
1154 let target = trimmed[..eq_pos].trim();
1155 if let Some(target_var) = target
1156 .split(|c: char| !c.is_alphanumeric() && c != '_')
1157 .find(|s| s.starts_with('_'))
1158 {
1159 if !untrusted_vars.contains(target_var) {
1160 let dominated_by_untrusted = trimmed.contains("::parse")
1161 || trimmed.contains("parse::")
1162 || trimmed.contains("from_str")
1163 || trimmed.contains("::unwrap(")
1164 || trimmed.contains("::expect(")
1165 || {
1166 let source = trimmed[eq_pos + 3..].trim();
1167 untrusted_vars.iter().any(|v| Self::contains_var(source, v))
1168 };
1169
1170 if dominated_by_untrusted {
1171 untrusted_vars.insert(target_var.to_string());
1172 changed = true;
1173 }
1174 }
1175 }
1176 }
1177 }
1178 }
1179
1180 untrusted_vars
1181 }
1182
1183 fn has_bounds_validation(body: &[String], untrusted_vars: &HashSet<String>) -> bool {
1184 let mut comparison_vars: HashSet<String> = HashSet::new();
1185
1186 for line in body {
1187 let trimmed = line.trim();
1188
1189 if trimmed.contains("::get(")
1190 || trimmed.contains("::get_mut(")
1191 || trimmed.contains("::get::<")
1192 {
1193 continue;
1194 }
1195
1196 if trimmed.contains(".len()") || trimmed.contains("::len(") {
1197 for var in untrusted_vars {
1198 if Self::contains_var(trimmed, var) {
1199 return true;
1200 }
1201 }
1202 }
1203
1204 if (trimmed.contains("::min(") || trimmed.contains("::max("))
1205 && (trimmed.contains("len") || trimmed.contains("_"))
1206 {
1207 for var in untrusted_vars {
1208 if Self::contains_var(trimmed, var) {
1209 return true;
1210 }
1211 }
1212 }
1213
1214 let has_comparison = trimmed.contains("Lt(")
1215 || trimmed.contains("Le(")
1216 || trimmed.contains("Gt(")
1217 || trimmed.contains("Ge(");
1218 if has_comparison {
1219 for var in untrusted_vars {
1220 if Self::contains_var(trimmed, var) {
1221 if let Some(eq_pos) = trimmed.find(" = ") {
1222 let target = trimmed[..eq_pos].trim();
1223 if let Some(target_var) = target
1224 .split(|c: char| !c.is_alphanumeric() && c != '_')
1225 .find(|s| s.starts_with('_'))
1226 {
1227 comparison_vars.insert(target_var.to_string());
1228 }
1229 }
1230 }
1231 }
1232 }
1233
1234 if trimmed.contains("switchInt(") {
1235 for comp_var in &comparison_vars {
1236 if Self::contains_var(trimmed, comp_var) {
1237 return true;
1238 }
1239 }
1240 }
1241 }
1242
1243 false
1244 }
1245
1246 fn find_unsafe_indexing(body: &[String], untrusted_vars: &HashSet<String>) -> Vec<String> {
1247 let mut evidence = Vec::new();
1248
1249 for line in body {
1250 let trimmed = line.trim();
1251
1252 if trimmed.contains("::index(") || trimmed.contains("::index_mut(") {
1253 if trimmed.contains("::get(") || trimmed.contains("::get_mut(") {
1254 continue;
1255 }
1256
1257 if let Some(idx_start) = trimmed.find("::index") {
1258 let after_index = &trimmed[idx_start..];
1259 if let Some(comma_pos) = after_index.find(", ") {
1260 let index_arg = &after_index[comma_pos + 2..];
1261 for var in untrusted_vars {
1262 if Self::contains_var(index_arg, var) {
1263 evidence.push(trimmed.to_string());
1264 break;
1265 }
1266 }
1267 }
1268 }
1269 }
1270
1271 if trimmed.contains('[') && trimmed.contains(']') {
1272 if trimmed.contains("= [") {
1273 continue;
1274 }
1275
1276 if trimmed.contains("let ") || trimmed.contains("::get") {
1277 continue;
1278 }
1279
1280 if let Some(bracket_start) = trimmed.find('[') {
1281 if let Some(bracket_end) = trimmed[bracket_start..].find(']') {
1282 let index_content =
1283 &trimmed[bracket_start + 1..bracket_start + bracket_end];
1284
1285 for var in untrusted_vars {
1286 if Self::contains_var(index_content, var) {
1287 evidence.push(trimmed.to_string());
1288 break;
1289 }
1290 }
1291 }
1292 }
1293 }
1294 }
1295
1296 evidence
1297 }
1298
1299 fn find_tainted_return_functions(package: &MirPackage) -> HashSet<String> {
1300 let mut tainted_funcs = HashSet::new();
1301 let source_patterns = Self::input_source_patterns();
1302
1303 for function in &package.functions {
1304 if function.name.contains("mir_extractor")
1305 || function.name.contains("mir-extractor")
1306 || function.name.contains("__")
1307 {
1308 continue;
1309 }
1310
1311 let has_source = function
1312 .body
1313 .iter()
1314 .any(|line| source_patterns.iter().any(|p| line.contains(p)));
1315
1316 if !has_source {
1317 continue;
1318 }
1319
1320 let empty_set = HashSet::new();
1321 let tainted = Self::track_untrusted_indices(&function.body, &empty_set);
1322
1323 let returns_tainted = function.body.iter().any(|line| {
1324 let trimmed = line.trim();
1325 if trimmed.starts_with("_0 = ") || trimmed.starts_with("_0 =") {
1326 tainted.iter().any(|v| Self::contains_var(trimmed, v))
1327 } else {
1328 false
1329 }
1330 });
1331
1332 if returns_tainted {
1333 tainted_funcs.insert(function.name.clone());
1334 }
1335 }
1336
1337 tainted_funcs
1338 }
1339}
1340
1341impl Rule for UncheckedIndexRule {
1342 fn metadata(&self) -> &RuleMetadata {
1343 &self.metadata
1344 }
1345
1346 fn evaluate(
1347 &self,
1348 package: &MirPackage,
1349 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1350 ) -> Vec<Finding> {
1351 let mut findings = Vec::new();
1352
1353 let tainted_return_funcs = Self::find_tainted_return_functions(package);
1354
1355 for function in &package.functions {
1356 if function.name.contains("mir_extractor")
1357 || function.name.contains("mir-extractor")
1358 || function.name.contains("__")
1359 {
1360 continue;
1361 }
1362
1363 let untrusted_vars =
1364 Self::track_untrusted_indices(&function.body, &tainted_return_funcs);
1365
1366 if untrusted_vars.is_empty() {
1367 continue;
1368 }
1369
1370 if Self::has_bounds_validation(&function.body, &untrusted_vars) {
1371 continue;
1372 }
1373
1374 let unsafe_indexing = Self::find_unsafe_indexing(&function.body, &untrusted_vars);
1375
1376 if !unsafe_indexing.is_empty() {
1377 findings.push(Finding {
1378 rule_id: self.metadata.id.clone(),
1379 rule_name: self.metadata.name.clone(),
1380 severity: self.metadata.default_severity,
1381 message: format!(
1382 "Untrusted input used as array index in `{}` without bounds checking. \
1383 This can cause panic if index is out of bounds. Use .get() for safe \
1384 access or validate index < array.len() before indexing.",
1385 function.name
1386 ),
1387 function: function.name.clone(),
1388 function_signature: function.signature.clone(),
1389 evidence: unsafe_indexing.into_iter().take(3).collect(),
1390 span: function.span.clone(),
1391 confidence: Confidence::Medium,
1392 cwe_ids: Vec::new(),
1393 fix_suggestion: None,
1394 code_snippet: None,
1395 exploitability: Exploitability::default(),
1396 exploitability_score: Exploitability::default().score(),
1397 ..Default::default()
1398 });
1399 }
1400 }
1401
1402 findings
1403 }
1404}
1405
1406pub fn register_injection_rules(engine: &mut crate::RuleEngine) {
1408 engine.register_rule(Box::new(UntrustedEnvInputRule::new()));
1409 engine.register_rule(Box::new(CommandInjectionRiskRule::new()));
1410 engine.register_rule(Box::new(CommandArgConcatenationRule::new()));
1411 engine.register_rule(Box::new(LogInjectionRule::new()));
1412 engine.register_rule(Box::new(RegexInjectionRule::new()));
1413 engine.register_rule(Box::new(UncheckedIndexRule::new()));
1414 engine.register_rule(Box::new(PathTraversalRule::new()));
1415 engine.register_rule(Box::new(SsrfRule::new()));
1416 engine.register_rule(Box::new(SqlInjectionRule::new()));
1417 engine.register_rule(Box::new(InterProceduralCommandInjectionRule::new()));
1418}
1419
1420pub struct PathTraversalRule {
1425 metadata: RuleMetadata,
1426}
1427
1428impl PathTraversalRule {
1429 pub fn new() -> Self {
1430 Self {
1431 metadata: RuleMetadata {
1432 id: "RUSTCOLA086".to_string(),
1433 name: "path-traversal".to_string(),
1434 short_description: "Untrusted input used in filesystem path".to_string(),
1435 full_description: "Detects when user-controlled input flows to filesystem \
1436 operations without proper validation. Attackers can use path traversal \
1437 sequences like '../' or absolute paths to access files outside intended \
1438 directories. Use canonicalize() + starts_with() validation, or strip \
1439 dangerous path components before use."
1440 .to_string(),
1441 help_uri: Some(
1442 "https://owasp.org/www-community/attacks/Path_Traversal".to_string(),
1443 ),
1444 default_severity: Severity::High,
1445 origin: RuleOrigin::BuiltIn,
1446 cwe_ids: Vec::new(),
1447 fix_suggestion: None,
1448 exploitability: Exploitability::default(),
1449 },
1450 }
1451 }
1452
1453 const FS_SINKS: &'static [&'static str] = &[
1454 "fs::read_to_string",
1455 "fs::read",
1456 "File::open",
1457 "std::fs::read_to_string",
1458 "std::fs::read",
1459 "std::fs::File::open",
1460 "OpenOptions::open",
1461 "read_to_string(",
1462 "read_to_string::<",
1463 "fs::write",
1464 "fs::create_dir",
1465 "fs::create_dir_all",
1466 "std::fs::write",
1467 "std::fs::create_dir",
1468 "std::fs::create_dir_all",
1469 "File::create",
1470 "std::fs::File::create",
1471 "create_dir_all::<",
1472 "create_dir::<",
1473 "fs::remove_file",
1474 "fs::remove_dir",
1475 "fs::remove_dir_all",
1476 "std::fs::remove_file",
1477 "std::fs::remove_dir",
1478 "std::fs::remove_dir_all",
1479 "remove_file::<",
1480 "remove_dir::<",
1481 "remove_dir_all::<",
1482 "fs::copy",
1483 "fs::rename",
1484 "std::fs::copy",
1485 "std::fs::rename",
1486 "copy::<",
1487 "rename::<",
1488 "Path::join",
1489 "PathBuf::push",
1490 "PathBuf::join",
1491 ];
1492
1493 const UNTRUSTED_SOURCES: &'static [&'static str] = &[
1494 "env::var(",
1495 "env::var_os(",
1496 "std::env::var(",
1497 "std::env::var_os(",
1498 " = var(",
1499 " = var::",
1500 "env::args()",
1501 "std::env::args()",
1502 " = args(",
1503 "Args>::next(",
1504 " = stdin()",
1505 "Stdin::lock(",
1506 "BufRead>::read_line(",
1507 "read_line(move",
1508 "io::stdin()",
1509 ];
1510
1511 const SANITIZERS: &'static [&'static str] = &[
1512 "canonicalize(",
1513 "starts_with(",
1514 "strip_prefix(",
1515 "is_relative(",
1516 "is_absolute(",
1517 "::contains(move",
1518 "::contains(copy",
1519 "slice::<impl",
1520 "String::replace",
1521 "str::replace",
1522 ".filter(",
1523 "chars().all(",
1524 "is_alphanumeric",
1525 "validate",
1526 "sanitize",
1527 "check_path",
1528 "is_safe",
1529 "safe_join",
1530 ];
1531
1532 fn track_untrusted_paths(&self, body: &[String]) -> HashSet<String> {
1533 let mut untrusted_vars = HashSet::new();
1534
1535 for line in body {
1536 let trimmed = line.trim();
1537 for source in Self::UNTRUSTED_SOURCES {
1538 if trimmed.contains(source) {
1539 if let Some(target) = self.extract_assignment_target(trimmed) {
1540 untrusted_vars.insert(target);
1541 }
1542 }
1543 }
1544 }
1545
1546 let mut changed = true;
1547 let mut iterations = 0;
1548
1549 while changed && iterations < 20 {
1550 changed = false;
1551 iterations += 1;
1552
1553 for line in body {
1554 let trimmed = line.trim();
1555 if !trimmed.contains(" = ") {
1556 continue;
1557 }
1558
1559 if let Some(target) = self.extract_assignment_target(trimmed) {
1560 for untrusted in untrusted_vars.clone() {
1561 if self.contains_var(trimmed, &untrusted) {
1562 if !untrusted_vars.contains(&target) {
1563 untrusted_vars.insert(target.clone());
1564 changed = true;
1565 }
1566 }
1567 }
1568 }
1569 }
1570 }
1571
1572 for line in body {
1574 if line.contains("read_line(") {
1575 if let Some(buffer_ref) = Self::extract_read_line_buffer(line) {
1576 if let Some(actual_var) = Self::resolve_reference(body, &buffer_ref) {
1577 untrusted_vars.insert(actual_var);
1578 } else {
1579 untrusted_vars.insert(buffer_ref);
1580 }
1581 }
1582 }
1583 }
1584
1585 untrusted_vars
1586 }
1587
1588 fn resolve_reference(body: &[String], ref_var: &str) -> Option<String> {
1589 for line in body {
1590 let trimmed = line.trim();
1591 if trimmed.starts_with(ref_var) && trimmed.contains(" = &") {
1592 if let Some(amp_idx) = trimmed.find('&') {
1593 let after_amp = &trimmed[amp_idx + 1..];
1594 let target = if after_amp.starts_with("mut ") {
1595 after_amp[4..].trim_end_matches(';')
1596 } else {
1597 after_amp.trim_end_matches(';')
1598 };
1599 let target = target.trim();
1600 if target.starts_with('_') {
1601 return Some(target.to_string());
1602 }
1603 }
1604 }
1605 }
1606 None
1607 }
1608
1609 fn extract_read_line_buffer(line: &str) -> Option<String> {
1610 if let Some(idx) = line.find("read_line(") {
1611 let after = &line[idx..];
1612 if let Some(comma_idx) = after.find(',') {
1613 let second_arg = &after[comma_idx + 1..];
1614 for word in second_arg.split_whitespace() {
1615 let clean = word.trim_matches(|c| c == ')' || c == '(' || c == '&');
1616 if clean.starts_with('_') && clean.len() > 1 {
1617 return Some(clean.to_string());
1618 }
1619 }
1620 }
1621 }
1622 None
1623 }
1624
1625 fn has_path_sanitization(&self, body: &[String], _untrusted_vars: &HashSet<String>) -> bool {
1626 let body_str = body.join("\n");
1627
1628 for sanitizer in Self::SANITIZERS {
1629 if body_str.contains(sanitizer) {
1630 return true;
1631 }
1632 }
1633
1634 if body_str.contains("switchInt(") {
1635 if body_str.contains("contains(")
1636 || body_str.contains("starts_with(")
1637 || body_str.contains("is_relative()")
1638 || body_str.contains("strip_prefix(")
1639 {
1640 return true;
1641 }
1642 }
1643
1644 if body_str.contains("Err(")
1645 && (body_str.contains("Permission")
1646 || body_str.contains("Invalid")
1647 || body_str.contains("traversal")
1648 || body_str.contains("not in allow"))
1649 {
1650 return true;
1651 }
1652
1653 false
1654 }
1655
1656 fn find_unsafe_fs_operations(
1657 &self,
1658 body: &[String],
1659 untrusted_vars: &HashSet<String>,
1660 ) -> Vec<String> {
1661 let mut evidence = Vec::new();
1662
1663 for line in body {
1664 let trimmed = line.trim();
1665 for sink in Self::FS_SINKS {
1666 if trimmed.contains(sink) {
1667 for var in untrusted_vars {
1668 if trimmed.contains(&format!("move {}", var))
1669 || trimmed.contains(&format!("copy {}", var))
1670 || trimmed.contains(&format!("&{}", var))
1671 || trimmed.contains(&format!("({}", var))
1672 {
1673 evidence.push(trimmed.to_string());
1674 break;
1675 }
1676 }
1677 }
1678 }
1679 }
1680
1681 evidence
1682 }
1683
1684 fn extract_assignment_target(&self, line: &str) -> Option<String> {
1685 let parts: Vec<&str> = line.split('=').collect();
1686 if parts.len() >= 2 {
1687 let target = parts[0].trim();
1688 if target.starts_with('_') && target.chars().skip(1).all(|c| c.is_ascii_digit()) {
1689 return Some(target.to_string());
1690 }
1691 if let Some(var) = target.split_whitespace().find(|s| s.starts_with('_')) {
1692 let var_clean = var.trim_end_matches(':');
1693 if var_clean.starts_with('_') {
1694 return Some(var_clean.to_string());
1695 }
1696 }
1697 }
1698 None
1699 }
1700
1701 fn contains_var(&self, line: &str, var: &str) -> bool {
1702 line.contains(&format!("move {}", var))
1703 || line.contains(&format!("copy {}", var))
1704 || line.contains(&format!("&{}", var))
1705 || line.contains(&format!("({})", var))
1706 || line.contains(&format!("{},", var))
1707 || line.contains(&format!(" {} ", var))
1708 || line.contains(&format!("[{}]", var))
1709 }
1710}
1711
1712impl Rule for PathTraversalRule {
1713 fn metadata(&self) -> &RuleMetadata {
1714 &self.metadata
1715 }
1716
1717 fn evaluate(
1718 &self,
1719 package: &MirPackage,
1720 inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1721 ) -> Vec<Finding> {
1722 let mut findings = Vec::new();
1723
1724 for function in &package.functions {
1725 if function.name.contains("mir_extractor")
1726 || function.name.contains("mir-extractor")
1727 || function.name.contains("__")
1728 || function.name.contains("test_")
1729 || function.name.contains("detect_rustup")
1730 || function.name.contains("find_rust_toolchain")
1731 || function.name.contains("detect_toolchain")
1732 || function.name.contains("find_cargo_cola_workspace")
1733 {
1734 continue;
1735 }
1736
1737 let untrusted_vars = self.track_untrusted_paths(&function.body);
1738
1739 if untrusted_vars.is_empty() {
1740 continue;
1741 }
1742
1743 if self.has_path_sanitization(&function.body, &untrusted_vars) {
1744 continue;
1745 }
1746
1747 let unsafe_ops = self.find_unsafe_fs_operations(&function.body, &untrusted_vars);
1748
1749 if !unsafe_ops.is_empty() {
1750 let severity = if unsafe_ops.iter().any(|op| {
1751 op.contains("remove")
1752 || op.contains("write")
1753 || op.contains("create")
1754 || op.contains("rename")
1755 }) {
1756 Severity::High
1757 } else {
1758 Severity::Medium
1759 };
1760
1761 findings.push(Finding {
1762 rule_id: self.metadata.id.clone(),
1763 rule_name: self.metadata.name.clone(),
1764 severity,
1765 message: format!(
1766 "Untrusted input used in filesystem path in `{}`. \
1767 User-controlled paths can enable access to files outside \
1768 intended directories using '../' sequences or absolute paths. \
1769 Use canonicalize() + starts_with() validation, or sanitize \
1770 path input to remove dangerous components.",
1771 function.name
1772 ),
1773 function: function.name.clone(),
1774 function_signature: function.signature.clone(),
1775 evidence: unsafe_ops.into_iter().take(3).collect(),
1776 span: function.span.clone(),
1777 confidence: Confidence::Medium,
1778 cwe_ids: Vec::new(),
1779 fix_suggestion: None,
1780 code_snippet: None,
1781 exploitability: Exploitability::default(),
1782 exploitability_score: Exploitability::default().score(),
1783 ..Default::default()
1784 });
1785 }
1786 }
1787
1788 if let Some(analysis) = inter_analysis {
1790 let flows = analysis.detect_inter_procedural_flows(package);
1791
1792 let mut reported_functions: HashSet<String> =
1793 findings.iter().map(|f| f.function.clone()).collect();
1794
1795 for flow in flows {
1796 if flow.sink_type != "filesystem" {
1797 continue;
1798 }
1799
1800 let is_internal = flow.sink_function.contains("mir_extractor")
1801 || flow.sink_function.contains("mir-extractor")
1802 || flow.sink_function.contains("cache_envelope")
1803 || flow.sink_function.contains("detect_toolchain")
1804 || flow.sink_function.contains("extract_artifacts")
1805 || flow.sink_function.contains("__")
1806 || flow.source_function.contains("mir_extractor")
1807 || flow.source_function.contains("mir-extractor")
1808 || flow.source_function.contains("cache_envelope")
1809 || flow.source_function.contains("fingerprint")
1810 || flow.source_function.contains("toolchain");
1811 if is_internal {
1812 continue;
1813 }
1814
1815 if reported_functions.contains(&flow.sink_function) {
1816 continue;
1817 }
1818
1819 if flow.sanitized {
1820 continue;
1821 }
1822
1823 let sink_func = package
1824 .functions
1825 .iter()
1826 .find(|f| f.name == flow.sink_function);
1827
1828 let span = sink_func.map(|f| f.span.clone()).unwrap_or_default();
1829 let signature = sink_func.map(|f| f.signature.clone()).unwrap_or_default();
1830
1831 findings.push(Finding {
1832 rule_id: self.metadata.id.clone(),
1833 rule_name: self.metadata.name.clone(),
1834 severity: Severity::High,
1835 message: format!(
1836 "Inter-procedural path traversal: untrusted input from `{}` \
1837 flows through {} to filesystem operation in `{}`. \
1838 User-controlled paths can enable access to files outside \
1839 intended directories.",
1840 flow.source_function,
1841 if flow.call_chain.len() > 2 {
1842 format!("{} function calls", flow.call_chain.len() - 1)
1843 } else {
1844 "helper function".to_string()
1845 },
1846 flow.sink_function
1847 ),
1848 function: flow.sink_function.clone(),
1849 function_signature: signature,
1850 evidence: vec![flow.describe()],
1851 span,
1852 ..Default::default()
1853 });
1854
1855 reported_functions.insert(flow.sink_function.clone());
1856 }
1857 }
1858
1859 findings
1860 }
1861}
1862
1863pub struct SsrfRule {
1868 metadata: RuleMetadata,
1869}
1870
1871impl SsrfRule {
1872 pub fn new() -> Self {
1873 Self {
1874 metadata: RuleMetadata {
1875 id: "RUSTCOLA088".to_string(),
1876 name: "server-side-request-forgery".to_string(),
1877 short_description: "Untrusted input used as HTTP request URL".to_string(),
1878 full_description: "Detects when user-controlled input is used directly as \
1879 an HTTP request URL without validation. This enables attackers to make \
1880 the server send requests to arbitrary destinations, potentially accessing \
1881 internal services (localhost, cloud metadata at 169.254.169.254), scanning \
1882 internal networks, or exfiltrating data. Validate URLs against an allowlist \
1883 of permitted hosts and schemes before making requests."
1884 .to_string(),
1885 help_uri: Some(
1886 "https://owasp.org/www-community/attacks/Server_Side_Request_Forgery"
1887 .to_string(),
1888 ),
1889 default_severity: Severity::High,
1890 origin: RuleOrigin::BuiltIn,
1891 cwe_ids: Vec::new(),
1892 fix_suggestion: None,
1893 exploitability: Exploitability::default(),
1894 },
1895 }
1896 }
1897
1898 const HTTP_SINKS: &'static [&'static str] = &[
1902 "reqwest::blocking::get",
1904 "reqwest::get",
1905 "blocking::get",
1906 "Client>::get",
1907 "Client>::post",
1908 "Client>::put",
1909 "Client>::delete",
1910 "Client>::patch",
1911 "Client>::head",
1912 "RequestBuilder>::send",
1913 "ureq::get",
1915 "ureq::post",
1916 "ureq::put",
1917 "ureq::delete",
1918 "ureq::request",
1919 "Agent>::get",
1920 "Agent>::post",
1921 "Request>::call",
1922 "hyper::Client",
1924 "get::<&String>",
1926 "get::<&str>",
1927 "post::<&String>",
1928 "post::<&str>",
1929 ];
1930
1931 const UNTRUSTED_SOURCES: &'static [&'static str] = &[
1934 "env::var(",
1935 "env::var_os(",
1936 "std::env::var(",
1937 "std::env::var_os(",
1938 " = var(",
1939 " = var::",
1940 "var::<&str>",
1941 "var_os::<",
1942 "env::args()",
1943 "std::env::args()",
1944 " = args()",
1945 "Args>::next(",
1946 "args().collect",
1947 " = stdin()",
1948 "Stdin::lock(",
1949 "Stdin>::lock",
1950 "BufRead>::read_line(",
1951 "read_line(move",
1952 "io::stdin()",
1953 "Lines>::next(",
1954 "fs::read_to_string(",
1955 "read_to_string(move",
1956 "read_to_string::",
1957 "BufReader>::read",
1958 "Read>::read",
1959 "axum::extract::Query",
1961 "axum::extract::Path",
1962 "axum::extract::Form",
1963 "axum::Json",
1964 "actix_web::web::Query",
1965 "actix_web::web::Path",
1966 "actix_web::web::Form",
1967 "actix_web::web::Json",
1968 "body::to_bytes",
1970 "hyper::body::to_bytes",
1971 "BodyExt>::collect",
1972 ];
1973
1974 const SANITIZERS: &'static [&'static str] = &[
1975 "Url::parse(",
1976 "url::Url::parse(",
1977 "Uri::from_str(",
1978 "host_str(",
1979 "scheme(",
1980 "starts_with(",
1981 "ends_with(",
1982 "contains(",
1983 "allowed",
1984 "whitelist",
1985 "allowlist",
1986 "trusted",
1987 "permitted",
1988 "localhost",
1989 "127.0.0.1",
1990 "169.254.169.254",
1991 "192.168.",
1992 "10.",
1993 "172.",
1994 ".internal",
1995 "== \"https\"",
1996 "== \"http\"",
1997 "is_alphanumeric",
1998 "chars().all(",
1999 " as Iterator>::all::<",
2000 "Eq>::eq::<",
2001 "PartialEq>::eq::<",
2002 "match ",
2003 "Some(\"",
2004 ];
2005
2006 fn track_untrusted_vars(&self, body: &[String]) -> HashSet<String> {
2007 let mut untrusted_vars = HashSet::new();
2008
2009 for line in body {
2010 let trimmed = line.trim();
2011 for source in Self::UNTRUSTED_SOURCES {
2012 if trimmed.contains(source) {
2013 if let Some(target) = self.extract_assignment_target(trimmed) {
2014 untrusted_vars.insert(target);
2015 }
2016 }
2017 }
2018 }
2019
2020 let mut changed = true;
2021 let mut iterations = 0;
2022
2023 while changed && iterations < 20 {
2024 changed = false;
2025 iterations += 1;
2026
2027 for line in body {
2028 let trimmed = line.trim();
2029 if !trimmed.contains(" = ") {
2030 continue;
2031 }
2032
2033 if let Some(target) = self.extract_assignment_target(trimmed) {
2034 for untrusted in untrusted_vars.clone() {
2035 if self.contains_var(trimmed, &untrusted) {
2036 if !untrusted_vars.contains(&target) {
2037 untrusted_vars.insert(target.clone());
2038 changed = true;
2039 }
2040 }
2041 }
2042 }
2043 }
2044 }
2045
2046 untrusted_vars
2047 }
2048
2049 fn has_ssrf_sanitization(&self, body: &[String]) -> bool {
2050 let body_str = body.join("\n");
2051 for sanitizer in Self::SANITIZERS {
2052 if body_str.contains(sanitizer) {
2053 return true;
2054 }
2055 }
2056 false
2057 }
2058
2059 fn find_unsafe_http_operations(
2060 &self,
2061 body: &[String],
2062 untrusted_vars: &HashSet<String>,
2063 ) -> Vec<String> {
2064 let mut evidence = Vec::new();
2065
2066 for line in body {
2067 let trimmed = line.trim();
2068 for sink in Self::HTTP_SINKS {
2069 if trimmed.contains(sink) {
2070 for var in untrusted_vars {
2071 if self.contains_var(trimmed, var) {
2072 evidence.push(trimmed.to_string());
2073 break;
2074 }
2075 }
2076 }
2077 }
2078 }
2079
2080 evidence
2081 }
2082
2083 fn extract_assignment_target(&self, line: &str) -> Option<String> {
2084 let parts: Vec<&str> = line.split('=').collect();
2085 if parts.len() >= 2 {
2086 let target = parts[0].trim();
2087 if target.starts_with('_') && target.chars().skip(1).all(|c| c.is_ascii_digit()) {
2088 return Some(target.to_string());
2089 }
2090 if let Some(var) = target.split_whitespace().find(|s| s.starts_with('_')) {
2091 let var_clean = var.trim_end_matches(':');
2092 if var_clean.starts_with('_') {
2093 return Some(var_clean.to_string());
2094 }
2095 }
2096 }
2097 None
2098 }
2099
2100 fn contains_var(&self, line: &str, var: &str) -> bool {
2101 line.contains(&format!("move {}", var))
2102 || line.contains(&format!("copy {}", var))
2103 || line.contains(&format!("&{}", var))
2104 || line.contains(&format!("({})", var))
2105 || line.contains(&format!("{},", var))
2106 || line.contains(&format!(" {} ", var))
2107 || line.contains(&format!("[{}]", var))
2108 || line.contains(&format!("(({} as", var))
2109 }
2110}
2111
2112impl Rule for SsrfRule {
2113 fn metadata(&self) -> &RuleMetadata {
2114 &self.metadata
2115 }
2116
2117 fn evaluate(
2118 &self,
2119 package: &MirPackage,
2120 inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
2121 ) -> Vec<Finding> {
2122 let mut findings = Vec::new();
2123
2124 for function in &package.functions {
2125 if function.name.contains("mir_extractor")
2126 || function.name.contains("mir-extractor")
2127 || function.name.contains("__")
2128 || function.name.contains("test_")
2129 || function.name == "detect_toolchain"
2130 {
2131 continue;
2132 }
2133
2134 let body_str = function.body.join("\n");
2135 let has_http_client = Self::HTTP_SINKS.iter().any(|s| body_str.contains(s))
2136 || body_str.contains("reqwest")
2137 || body_str.contains("ureq")
2138 || body_str.contains("hyper");
2139
2140 if !has_http_client {
2141 continue;
2142 }
2143
2144 let untrusted_vars = self.track_untrusted_vars(&function.body);
2145
2146 if untrusted_vars.is_empty() {
2147 continue;
2148 }
2149
2150 if self.has_ssrf_sanitization(&function.body) {
2151 continue;
2152 }
2153
2154 let unsafe_ops = self.find_unsafe_http_operations(&function.body, &untrusted_vars);
2155
2156 if !unsafe_ops.is_empty() {
2157 findings.push(Finding {
2158 rule_id: self.metadata.id.clone(),
2159 rule_name: self.metadata.name.clone(),
2160 severity: Severity::High,
2161 message: format!(
2162 "Server-Side Request Forgery (SSRF) vulnerability in `{}`. \
2163 User-controlled input is used as an HTTP request URL without \
2164 validation. Attackers could access internal services, cloud \
2165 metadata (169.254.169.254), or scan internal networks. Validate \
2166 URLs against an allowlist of permitted hosts.",
2167 function.name
2168 ),
2169 function: function.name.clone(),
2170 function_signature: function.signature.clone(),
2171 evidence: unsafe_ops.into_iter().take(3).collect(),
2172 span: function.span.clone(),
2173 confidence: Confidence::Medium,
2174 cwe_ids: Vec::new(),
2175 fix_suggestion: None,
2176 code_snippet: None,
2177 exploitability: Exploitability::default(),
2178 exploitability_score: Exploitability::default().score(),
2179 ..Default::default()
2180 });
2181 }
2182 }
2183
2184 if let Some(analysis) = inter_analysis {
2186 let flows = analysis.detect_inter_procedural_flows(package);
2187
2188 let mut reported_functions: HashSet<String> =
2189 findings.iter().map(|f| f.function.clone()).collect();
2190
2191 for flow in flows {
2192 if flow.sink_type != "http" {
2193 continue;
2194 }
2195
2196 let is_internal = flow.sink_function.contains("mir_extractor")
2197 || flow.sink_function.contains("__")
2198 || flow.source_function.contains("mir_extractor");
2199 if is_internal {
2200 continue;
2201 }
2202
2203 if reported_functions.contains(&flow.sink_function) {
2204 continue;
2205 }
2206
2207 if flow.sanitized {
2208 continue;
2209 }
2210
2211 let sink_func = package
2212 .functions
2213 .iter()
2214 .find(|f| f.name == flow.sink_function);
2215
2216 let span = sink_func.map(|f| f.span.clone()).unwrap_or_default();
2217 let signature = sink_func.map(|f| f.signature.clone()).unwrap_or_default();
2218
2219 findings.push(Finding {
2220 rule_id: self.metadata.id.clone(),
2221 rule_name: self.metadata.name.clone(),
2222 severity: Severity::High,
2223 message: format!(
2224 "Inter-procedural SSRF: untrusted input from `{}` \
2225 flows through {} to HTTP request in `{}`. Validate \
2226 URLs against an allowlist before making requests.",
2227 flow.source_function,
2228 if flow.call_chain.len() > 2 {
2229 format!("{} function calls", flow.call_chain.len() - 1)
2230 } else {
2231 "helper function".to_string()
2232 },
2233 flow.sink_function
2234 ),
2235 function: flow.sink_function.clone(),
2236 function_signature: signature,
2237 evidence: vec![flow.describe()],
2238 span,
2239 ..Default::default()
2240 });
2241
2242 reported_functions.insert(flow.sink_function.clone());
2243 }
2244 }
2245
2246 findings
2247 }
2248}
2249
2250pub struct SqlInjectionRule {
2255 metadata: RuleMetadata,
2256}
2257
2258impl SqlInjectionRule {
2259 pub fn new() -> Self {
2260 Self {
2261 metadata: RuleMetadata {
2262 id: "RUSTCOLA087".to_string(),
2263 name: "sql-injection".to_string(),
2264 short_description: "Untrusted input used in SQL query construction".to_string(),
2265 full_description: "Detects when user-controlled input is concatenated or \
2266 formatted directly into SQL query strings instead of using parameterized \
2267 queries. This allows attackers to modify query logic, bypass authentication, \
2268 or extract/modify sensitive data. Use prepared statements with bind \
2269 parameters (?, $1, :name) instead of string interpolation."
2270 .to_string(),
2271 help_uri: Some("https://owasp.org/www-community/attacks/SQL_Injection".to_string()),
2272 default_severity: Severity::High,
2273 origin: RuleOrigin::BuiltIn,
2274 cwe_ids: Vec::new(),
2275 fix_suggestion: None,
2276 exploitability: Exploitability::default(),
2277 },
2278 }
2279 }
2280
2281 const SQL_STATEMENT_PATTERNS: &'static [&'static str] = &[
2282 "SELECT ",
2283 "SELECT\t",
2284 "SELECT\n",
2285 " FROM ",
2286 "INSERT INTO",
2287 "INSERT INTO",
2288 "UPDATE ",
2289 "UPDATE\t",
2290 " SET ",
2291 "DELETE FROM",
2292 "DELETE FROM",
2293 "DROP TABLE",
2294 "DROP DATABASE",
2295 "DROP INDEX",
2296 "DROP VIEW",
2297 "CREATE TABLE",
2298 "CREATE DATABASE",
2299 "CREATE INDEX",
2300 "CREATE VIEW",
2301 "ALTER TABLE",
2302 "ALTER DATABASE",
2303 "TRUNCATE TABLE",
2304 " WHERE ",
2305 " ORDER BY",
2306 " GROUP BY",
2307 " HAVING ",
2308 " JOIN ",
2309 " LEFT JOIN",
2310 " RIGHT JOIN",
2311 " INNER JOIN",
2312 " OUTER JOIN",
2313 " UNION ",
2314 " UNION ALL",
2315 " VALUES",
2316 " VALUES(",
2317 "?)",
2318 "?, ",
2319 " ? ",
2320 "$1",
2321 "$2",
2322 "$3",
2323 ];
2324
2325 const SQL_SINKS: &'static [&'static str] = &[
2326 "format_args!",
2327 "format!",
2328 "String::push_str",
2329 "str::to_string",
2330 "+",
2331 "execute(",
2332 "query(",
2333 "query_as(",
2334 "sql_query(",
2335 "prepare(",
2336 "execute_batch(",
2337 "query_row(",
2338 "query_map(",
2339 "raw_query(",
2340 "raw_sql(",
2341 "sqlx::query",
2342 "sqlx::query_as",
2343 "sqlx::query_scalar",
2344 "diesel::sql_query",
2345 "diesel::delete",
2346 "diesel::insert_into",
2347 "diesel::update",
2348 "rusqlite::execute",
2349 "Connection::execute",
2350 "Connection::query_row",
2351 "Statement::execute",
2352 ];
2353
2354 const UNTRUSTED_SOURCES: &'static [&'static str] = &[
2355 "env::var(",
2356 "env::var_os(",
2357 "std::env::var(",
2358 "std::env::var_os(",
2359 " = var(",
2360 " = var::",
2361 "env::args()",
2362 "std::env::args()",
2363 " = args(",
2364 "Args>::next(",
2365 " = stdin()",
2366 "Stdin::lock(",
2367 "BufRead>::read_line(",
2368 "read_line(move",
2369 "io::stdin()",
2370 "Request",
2371 "Form",
2372 "Query",
2373 "Json",
2374 "Path",
2375 ];
2376
2377 const SQL_EXECUTION_SINKS: &'static [&'static str] = &[
2380 "execute(",
2381 "query(",
2382 "query_as(",
2383 "query_one(",
2384 "query_row(",
2385 "query_map(",
2386 "prepare(",
2387 "sql_query(",
2388 "execute_batch(",
2389 "raw_query(",
2390 "Connection::execute",
2391 "Client::query",
2392 "sqlx::query",
2393 "diesel::sql_query",
2394 "rusqlite::execute",
2395 "tokio_postgres::query",
2396 "Statement::execute",
2397 "Transaction::execute",
2398 "Pool::execute",
2399 ];
2400
2401 const NON_SQL_CONTEXTS: &'static [&'static str] = &[
2404 "error!",
2406 "warn!",
2407 "info!",
2408 "debug!",
2409 "trace!",
2410 "tracing::",
2411 "log::",
2412 "eprintln!",
2413 "println!",
2414 "anyhow::Context",
2416 ".context(",
2417 "Error::new",
2418 "bail!",
2419 "thiserror",
2420 "snafu",
2421 "--help",
2423 "Usage:",
2424 "USAGE:",
2425 "[OPTIONS]",
2426 "[ARGS]",
2427 "Examples",
2428 "[env:",
2429 "[default:",
2430 "catalog",
2432 "persist",
2433 "snapshot",
2434 "compaction",
2435 "partition",
2436 "failed to",
2438 "unable to",
2439 "could not",
2440 "unexpected error",
2441 ];
2442
2443 const SANITIZERS: &'static [&'static str] = &[
2444 " ? ",
2445 "?)",
2446 "?, ",
2447 "$1",
2448 "$2",
2449 ":name",
2450 ":username",
2451 ":id",
2452 ".bind(",
2453 "bind_value(",
2454 "bind::<",
2455 "QueryBuilder",
2456 "filter(",
2457 ".eq(",
2458 ".ne(",
2459 ".gt(",
2460 ".lt(",
2461 "parse::<i",
2462 "parse::<u",
2463 "parse::<f",
2464 "i32::from_str",
2465 "i64::from_str",
2466 "u32::from_str",
2467 "u64::from_str",
2468 "::contains(move",
2469 "::contains(copy",
2470 "allowed_",
2471 "whitelist",
2472 "allowlist",
2473 "escape(",
2474 "quote(",
2475 "sanitize",
2476 "replace(",
2477 "replace('",
2478 "::replace::",
2479 "is_alphanumeric",
2480 "chars().all(",
2481 " as Iterator>::all::<",
2482 ];
2483
2484 fn has_sql_execution_sink(&self, body: &[String]) -> bool {
2486 let body_str = body.join("\n");
2487 Self::SQL_EXECUTION_SINKS
2488 .iter()
2489 .any(|sink| body_str.contains(sink))
2490 }
2491
2492 fn is_non_sql_context(&self, body: &[String], evidence_line: &str) -> bool {
2494 let evidence_lower = evidence_line.to_lowercase();
2496 for pattern in Self::NON_SQL_CONTEXTS {
2497 if evidence_lower.contains(&pattern.to_lowercase()) {
2498 return true;
2499 }
2500 }
2501
2502 let body_str = body.join("\n").to_lowercase();
2504 let log_count = Self::NON_SQL_CONTEXTS
2505 .iter()
2506 .filter(|p| body_str.contains(&p.to_lowercase()))
2507 .count();
2508
2509 log_count >= 2
2511 }
2512
2513 fn track_untrusted_vars(&self, body: &[String]) -> HashSet<String> {
2514 let mut untrusted_vars = HashSet::new();
2515
2516 for line in body {
2517 let trimmed = line.trim();
2518 for source in Self::UNTRUSTED_SOURCES {
2519 if trimmed.contains(source) {
2520 if let Some(target) = self.extract_assignment_target(trimmed) {
2521 untrusted_vars.insert(target);
2522 }
2523 }
2524 }
2525 }
2526
2527 let mut changed = true;
2528 let mut iterations = 0;
2529
2530 while changed && iterations < 20 {
2531 changed = false;
2532 iterations += 1;
2533
2534 for line in body {
2535 let trimmed = line.trim();
2536 if !trimmed.contains(" = ") {
2537 continue;
2538 }
2539
2540 if let Some(target) = self.extract_assignment_target(trimmed) {
2541 for untrusted in untrusted_vars.clone() {
2542 if self.contains_var(trimmed, &untrusted) {
2543 if !untrusted_vars.contains(&target) {
2544 untrusted_vars.insert(target.clone());
2545 changed = true;
2546 }
2547 }
2548 }
2549 }
2550 }
2551 }
2552
2553 untrusted_vars
2554 }
2555
2556 fn has_sql_sanitization(&self, body: &[String]) -> bool {
2557 let body_str = body.join("\n");
2558 for sanitizer in Self::SANITIZERS {
2559 if body_str.contains(sanitizer) {
2560 return true;
2561 }
2562 }
2563 false
2564 }
2565
2566 fn find_unsafe_sql_operations(
2567 &self,
2568 body: &[String],
2569 untrusted_vars: &HashSet<String>,
2570 ) -> Vec<String> {
2571 let mut evidence = Vec::new();
2572
2573 let has_sql_const = body.iter().any(|line| {
2574 if !line.contains("const ") && !line.contains("[const ") {
2575 return false;
2576 }
2577 let line_upper = line.to_uppercase();
2578 Self::SQL_STATEMENT_PATTERNS
2579 .iter()
2580 .any(|pattern| line_upper.contains(pattern))
2581 });
2582
2583 let has_promoted_sql_ref = body.iter().any(|line| {
2584 line.contains("::promoted[")
2585 && body.iter().any(|other| {
2586 if !other.contains("[const ") && !other.contains(" = [const ") {
2587 return false;
2588 }
2589 let other_upper = other.to_uppercase();
2590 Self::SQL_STATEMENT_PATTERNS
2591 .iter()
2592 .any(|pattern| other_upper.contains(pattern))
2593 })
2594 });
2595
2596 if !has_sql_const && !has_promoted_sql_ref {
2597 return evidence;
2598 }
2599
2600 let has_tainted_format = body.iter().any(|line| {
2601 let trimmed = line.trim();
2602 let is_format_related = trimmed.contains("fmt::Arguments")
2603 || trimmed.contains("fmt::rt::Argument")
2604 || trimmed.contains("Arguments::new")
2605 || trimmed.contains("Argument::new")
2606 || trimmed.contains("core::fmt::")
2607 || trimmed.contains("format_args");
2608
2609 if is_format_related {
2610 for var in untrusted_vars {
2611 if self.contains_var(trimmed, var) {
2612 return true;
2613 }
2614 }
2615 }
2616 false
2617 });
2618
2619 if has_tainted_format {
2620 for line in body {
2621 if !line.contains("const ") && !line.contains("[const ") {
2622 continue;
2623 }
2624 let line_upper = line.to_uppercase();
2625 if Self::SQL_STATEMENT_PATTERNS
2626 .iter()
2627 .any(|pattern| line_upper.contains(pattern))
2628 {
2629 evidence.push(line.trim().to_string());
2630 }
2631 }
2632 }
2633
2634 evidence
2635 }
2636
2637 fn extract_assignment_target(&self, line: &str) -> Option<String> {
2638 let parts: Vec<&str> = line.split('=').collect();
2639 if parts.len() >= 2 {
2640 let target = parts[0].trim();
2641 if target.starts_with('_') && target.chars().skip(1).all(|c| c.is_ascii_digit()) {
2642 return Some(target.to_string());
2643 }
2644 if let Some(var) = target.split_whitespace().find(|s| s.starts_with('_')) {
2645 let var_clean = var.trim_end_matches(':');
2646 if var_clean.starts_with('_') {
2647 return Some(var_clean.to_string());
2648 }
2649 }
2650 }
2651 None
2652 }
2653
2654 fn contains_var(&self, line: &str, var: &str) -> bool {
2655 line.contains(&format!("move {}", var))
2656 || line.contains(&format!("copy {}", var))
2657 || line.contains(&format!("&{}", var))
2658 || line.contains(&format!("({})", var))
2659 || line.contains(&format!("{},", var))
2660 || line.contains(&format!(" {} ", var))
2661 || line.contains(&format!("[{}]", var))
2662 }
2663
2664 fn extract_function_params(&self, body: &[String]) -> HashSet<String> {
2665 let mut params = HashSet::new();
2666 for line in body {
2667 let trimmed = line.trim();
2668 if trimmed.starts_with("debug ") && trimmed.contains(" => _") {
2669 if let Some(start) = trimmed.find(" => _") {
2670 let after = &trimmed[start + 5..];
2671 let var: String = after
2672 .chars()
2673 .take_while(|c| c.is_ascii_digit() || *c == '_')
2674 .collect();
2675 if !var.is_empty() && var != "0" {
2676 params.insert(format!("_{}", var.trim_start_matches('_')));
2677 }
2678 }
2679 }
2680 }
2681 params
2682 }
2683
2684 fn propagate_taint(&self, body: &[String], untrusted_vars: &mut HashSet<String>) {
2685 let mut changed = true;
2686 let mut iterations = 0;
2687
2688 while changed && iterations < 20 {
2689 changed = false;
2690 iterations += 1;
2691
2692 for line in body {
2693 let trimmed = line.trim();
2694 if !trimmed.contains(" = ") {
2695 continue;
2696 }
2697
2698 if let Some(target) = self.extract_assignment_target(trimmed) {
2699 for untrusted in untrusted_vars.clone() {
2700 if self.contains_var(trimmed, &untrusted) {
2701 if !untrusted_vars.contains(&target) {
2702 untrusted_vars.insert(target.clone());
2703 changed = true;
2704 }
2705 }
2706 }
2707 }
2708 }
2709 }
2710 }
2711}
2712
2713impl Rule for SqlInjectionRule {
2714 fn metadata(&self) -> &RuleMetadata {
2715 &self.metadata
2716 }
2717
2718 fn evaluate(
2719 &self,
2720 package: &MirPackage,
2721 inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
2722 ) -> Vec<Finding> {
2723 let mut findings = Vec::new();
2724
2725 let mut tainted_return_functions: HashSet<String> = HashSet::new();
2727
2728 for function in &package.functions {
2729 let has_source = function
2730 .body
2731 .iter()
2732 .any(|line| Self::UNTRUSTED_SOURCES.iter().any(|src| line.contains(src)));
2733
2734 if has_source {
2735 let has_sql_const = function.body.iter().any(|line| {
2736 if !line.contains("const ") && !line.contains("[const ") {
2737 return false;
2738 }
2739 let upper = line.to_uppercase();
2740 Self::SQL_STATEMENT_PATTERNS
2741 .iter()
2742 .any(|pattern| upper.contains(pattern))
2743 });
2744
2745 if !has_sql_const {
2746 tainted_return_functions.insert(function.name.clone());
2747 }
2748 }
2749 }
2750
2751 for function in &package.functions {
2752 if function.name.contains("mir_extractor")
2753 || function.name.contains("mir-extractor")
2754 || function.name.contains("__")
2755 || function.name.contains("test_")
2756 {
2757 continue;
2758 }
2759
2760 if !self.has_sql_execution_sink(&function.body) {
2763 continue;
2764 }
2765
2766 let mut untrusted_vars = self.track_untrusted_vars(&function.body);
2767
2768 for line in &function.body {
2770 let trimmed = line.trim();
2771 if trimmed.contains(" = ") {
2772 for tainted_fn in &tainted_return_functions {
2773 let fn_name = tainted_fn.split("::").last().unwrap_or(tainted_fn);
2774 if trimmed.contains(&format!("= {}()", fn_name)) {
2775 if let Some(target) = self.extract_assignment_target(trimmed) {
2776 untrusted_vars.insert(target);
2777 }
2778 }
2779 }
2780 }
2781 }
2782
2783 if untrusted_vars.is_empty() {
2785 let params = self.extract_function_params(&function.body);
2786 if !params.is_empty() {
2787 let mut param_vars = params.clone();
2788 self.propagate_taint(&function.body, &mut param_vars);
2789
2790 let has_sql_const = function.body.iter().any(|line| {
2791 if !line.contains("const ") && !line.contains("[const ") {
2792 return false;
2793 }
2794 let upper = line.to_uppercase();
2795 Self::SQL_STATEMENT_PATTERNS
2796 .iter()
2797 .any(|pattern| upper.contains(pattern))
2798 });
2799
2800 if has_sql_const {
2801 let has_param_in_format = function.body.iter().any(|line| {
2802 let trimmed = line.trim();
2803 let is_format_related = trimmed.contains("fmt::Arguments")
2804 || trimmed.contains("Argument::")
2805 || trimmed.contains("format_args")
2806 || trimmed.contains("core::fmt::")
2807 || trimmed.contains("new_display")
2808 || trimmed.contains("new_debug");
2809
2810 is_format_related
2811 && param_vars.iter().any(|v| self.contains_var(trimmed, v))
2812 });
2813
2814 if has_param_in_format {
2815 untrusted_vars = param_vars;
2816 }
2817 }
2818 }
2819 }
2820
2821 if untrusted_vars.is_empty() {
2822 continue;
2823 }
2824
2825 if self.has_sql_sanitization(&function.body) {
2826 continue;
2827 }
2828
2829 let unsafe_ops = self.find_unsafe_sql_operations(&function.body, &untrusted_vars);
2830
2831 let filtered_ops: Vec<String> = unsafe_ops
2833 .into_iter()
2834 .filter(|op| !self.is_non_sql_context(&function.body, op))
2835 .collect();
2836
2837 if !filtered_ops.is_empty() {
2838 findings.push(Finding {
2839 rule_id: self.metadata.id.clone(),
2840 rule_name: self.metadata.name.clone(),
2841 severity: Severity::High,
2842 message: format!(
2843 "SQL injection vulnerability in `{}`. Untrusted input is used \
2844 in SQL query construction without parameterization. Use prepared \
2845 statements with bind parameters (?, $1, :name) instead of string \
2846 formatting or concatenation.",
2847 function.name
2848 ),
2849 function: function.name.clone(),
2850 function_signature: function.signature.clone(),
2851 evidence: filtered_ops.into_iter().take(3).collect(),
2852 span: function.span.clone(),
2853 confidence: Confidence::Medium,
2854 cwe_ids: Vec::new(),
2855 fix_suggestion: None,
2856 code_snippet: None,
2857 exploitability: Exploitability::default(),
2858 exploitability_score: Exploitability::default().score(),
2859 ..Default::default()
2860 });
2861 }
2862 }
2863
2864 if let Some(analysis) = inter_analysis {
2866 let flows = analysis.detect_inter_procedural_flows(package);
2867
2868 let mut reported_functions: HashSet<String> =
2869 findings.iter().map(|f| f.function.clone()).collect();
2870
2871 for flow in flows {
2872 if flow.sink_type != "sql" {
2873 continue;
2874 }
2875
2876 let is_internal = flow.sink_function.contains("mir_extractor")
2877 || flow.sink_function.contains("__")
2878 || flow.source_function.contains("mir_extractor");
2879 if is_internal {
2880 continue;
2881 }
2882
2883 if reported_functions.contains(&flow.sink_function) {
2884 continue;
2885 }
2886
2887 if flow.sanitized {
2888 continue;
2889 }
2890
2891 let sink_func = package
2892 .functions
2893 .iter()
2894 .find(|f| f.name == flow.sink_function);
2895
2896 let span = sink_func.map(|f| f.span.clone()).unwrap_or_default();
2897 let signature = sink_func.map(|f| f.signature.clone()).unwrap_or_default();
2898
2899 findings.push(Finding {
2900 rule_id: self.metadata.id.clone(),
2901 rule_name: self.metadata.name.clone(),
2902 severity: Severity::High,
2903 message: format!(
2904 "Inter-procedural SQL injection: untrusted input from `{}` \
2905 flows through {} to SQL query in `{}`. Use parameterized \
2906 queries to prevent SQL injection.",
2907 flow.source_function,
2908 if flow.call_chain.len() > 2 {
2909 format!("{} function calls", flow.call_chain.len() - 1)
2910 } else {
2911 "helper function".to_string()
2912 },
2913 flow.sink_function
2914 ),
2915 function: flow.sink_function.clone(),
2916 function_signature: signature,
2917 evidence: vec![flow.describe()],
2918 span,
2919 ..Default::default()
2920 });
2921
2922 reported_functions.insert(flow.sink_function.clone());
2923 }
2924 }
2925
2926 findings
2927 }
2928}
2929
2930pub struct InterProceduralCommandInjectionRule {
2935 metadata: RuleMetadata,
2936}
2937
2938impl InterProceduralCommandInjectionRule {
2939 pub fn new() -> Self {
2940 Self {
2941 metadata: RuleMetadata {
2942 id: "RUSTCOLA098".to_string(),
2943 name: "interprocedural-command-injection".to_string(),
2944 short_description: "Inter-procedural command injection".to_string(),
2945 full_description: "Untrusted input flows through helper functions to \
2946 command execution without sanitization. Attackers can inject shell \
2947 metacharacters to execute arbitrary commands. Validate input against \
2948 an allowlist or use APIs that don't invoke a shell."
2949 .to_string(),
2950 help_uri: Some(
2951 "https://owasp.org/www-community/attacks/Command_Injection".to_string(),
2952 ),
2953 default_severity: Severity::High,
2954 origin: RuleOrigin::BuiltIn,
2955 cwe_ids: Vec::new(),
2956 fix_suggestion: None,
2957 exploitability: Exploitability::default(),
2958 },
2959 }
2960 }
2961}
2962
2963impl Rule for InterProceduralCommandInjectionRule {
2964 fn metadata(&self) -> &RuleMetadata {
2965 &self.metadata
2966 }
2967
2968 fn evaluate(
2969 &self,
2970 package: &MirPackage,
2971 inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
2972 ) -> Vec<Finding> {
2973 let mut findings = Vec::new();
2974
2975 if let Some(analysis) = inter_analysis {
2977 let flows = analysis.detect_inter_procedural_flows(package);
2978
2979 let mut reported_functions: HashSet<String> = HashSet::new();
2980
2981 for flow in flows {
2982 if !flow.sink_type.contains("command") {
2983 continue;
2984 }
2985
2986 let is_internal = flow.sink_function.contains("mir_extractor")
2987 || flow.sink_function.contains("mir-extractor")
2988 || flow.sink_function.contains("__")
2989 || flow.source_function.contains("mir_extractor")
2990 || flow.source_function.contains("mir-extractor");
2991 if is_internal {
2992 continue;
2993 }
2994
2995 if flow.sink_function.contains("test") && flow.sink_function.contains("::") {
2996 if !flow.sink_function.starts_with("test_") {
2997 continue;
2998 }
2999 }
3000
3001 if reported_functions.contains(&flow.sink_function) {
3002 continue;
3003 }
3004
3005 if flow.sanitized {
3006 continue;
3007 }
3008
3009 let sink_func = package
3010 .functions
3011 .iter()
3012 .find(|f| f.name == flow.sink_function);
3013
3014 let span = sink_func.map(|f| f.span.clone()).unwrap_or_default();
3015 let signature = sink_func.map(|f| f.signature.clone()).unwrap_or_default();
3016
3017 findings.push(Finding {
3018 rule_id: self.metadata.id.clone(),
3019 rule_name: self.metadata.name.clone(),
3020 severity: Severity::High,
3021 message: format!(
3022 "Inter-procedural command injection: untrusted input from `{}` \
3023 flows through {} to command execution in `{}`. \
3024 Attackers can inject shell metacharacters. \
3025 Validate against an allowlist or avoid shell invocation.",
3026 flow.source_function,
3027 if flow.call_chain.len() > 2 {
3028 format!("{} function calls", flow.call_chain.len() - 1)
3029 } else {
3030 "helper function".to_string()
3031 },
3032 flow.sink_function
3033 ),
3034 function: flow.sink_function.clone(),
3035 function_signature: signature,
3036 evidence: vec![flow.describe()],
3037 span,
3038 ..Default::default()
3039 });
3040
3041 reported_functions.insert(flow.sink_function.clone());
3042 }
3043
3044 for closure in analysis.closure_registry.get_all_closures() {
3046 if reported_functions.contains(&closure.name) {
3047 continue;
3048 }
3049
3050 let parent_func = package
3051 .functions
3052 .iter()
3053 .find(|f| f.name == closure.parent_function);
3054
3055 let closure_func = package.functions.iter().find(|f| f.name == closure.name);
3056
3057 if let Some(closure_fn) = closure_func {
3058 let parent_has_source = if let Some(parent) = parent_func {
3059 parent.body.iter().any(|line| {
3060 line.contains("args()")
3061 || line.contains("env::args")
3062 || line.contains("env::var")
3063 || line.contains("std::env::")
3064 || line.contains("= args")
3065 || line.contains("var(")
3066 })
3067 } else {
3068 closure_fn.body.iter().any(|line| {
3069 let line_lower = line.to_lowercase();
3070 (line.contains("debug ") && line.contains("(*((*_1)"))
3071 && (line_lower.contains("tainted")
3072 || line_lower.contains("user")
3073 || line_lower.contains("input")
3074 || line_lower.contains("cmd")
3075 || line_lower.contains("arg")
3076 || line_lower.contains("command"))
3077 })
3078 };
3079
3080 let closure_has_sink = closure_fn.body.iter().any(|line| {
3081 line.contains("Command::new")
3082 || line.contains("Command::")
3083 || line.contains("::spawn")
3084 || line.contains("::output")
3085 || line.contains("process::Command")
3086 });
3087
3088 let has_captures = !closure.captured_vars.is_empty()
3089 || closure_fn
3090 .body
3091 .iter()
3092 .any(|line| line.contains("debug ") && line.contains("(*((*_1)"));
3093
3094 if parent_has_source && closure_has_sink && has_captures {
3095 findings.push(Finding {
3096 rule_id: self.metadata.id.clone(),
3097 rule_name: self.metadata.name.clone(),
3098 severity: Severity::High,
3099 message: format!(
3100 "Closure captures tainted data: `{}` captures untrusted input \
3101 from parent function `{}` and passes it to command execution. \
3102 Attackers can inject shell metacharacters. \
3103 Validate input or avoid shell invocation.",
3104 closure.name, closure.parent_function
3105 ),
3106 function: closure.name.clone(),
3107 function_signature: closure_fn.signature.clone(),
3108 evidence: vec![
3109 format!(
3110 "Parent function {} contains taint source",
3111 closure.parent_function
3112 ),
3113 format!("Closure captures variable(s) from parent"),
3114 "Closure body contains command execution".to_string(),
3115 ],
3116 span: closure_fn.span.clone(),
3117 ..Default::default()
3118 });
3119
3120 reported_functions.insert(closure.name.clone());
3121 }
3122 }
3123 }
3124
3125 for function in &package.functions {
3127 if !function.name.contains("::{closure#") {
3128 continue;
3129 }
3130
3131 if reported_functions.contains(&function.name) {
3132 continue;
3133 }
3134
3135 let body_str = function.body.join("\n");
3136
3137 let has_command_sink = body_str.contains("Command::")
3138 || body_str.contains("::spawn")
3139 || body_str.contains("::output");
3140
3141 if !has_command_sink {
3142 continue;
3143 }
3144
3145 let has_tainted_capture = body_str.lines().any(|line| {
3146 if !line.contains("debug ") || !line.contains("(*((*_1)") {
3147 return false;
3148 }
3149 let line_lower = line.to_lowercase();
3150 line_lower.contains("tainted")
3151 || line_lower.contains("user")
3152 || line_lower.contains("input")
3153 || line_lower.contains("cmd")
3154 || line_lower.contains("arg")
3155 || line_lower.contains("command")
3156 });
3157
3158 if has_tainted_capture {
3159 let parent_name = if let Some(pos) = function.name.find("::{closure#") {
3160 function.name[..pos].to_string()
3161 } else {
3162 "unknown_parent".to_string()
3163 };
3164
3165 findings.push(Finding {
3166 rule_id: self.metadata.id.clone(),
3167 rule_name: self.metadata.name.clone(),
3168 severity: Severity::High,
3169 message: format!(
3170 "Closure captures tainted data: `{}` captures untrusted input \
3171 from parent function `{}` and passes it to command execution. \
3172 Attackers can inject shell metacharacters. \
3173 Validate input or avoid shell invocation.",
3174 function.name, parent_name
3175 ),
3176 function: function.name.clone(),
3177 function_signature: function.signature.clone(),
3178 evidence: vec![
3179 format!("Closure captures variable(s) named with taint indicators"),
3180 "Closure body contains command execution".to_string(),
3181 ],
3182 span: function.span.clone(),
3183 confidence: Confidence::Medium,
3184 cwe_ids: Vec::new(),
3185 fix_suggestion: None,
3186 code_snippet: None,
3187 exploitability: Exploitability::default(),
3188 exploitability_score: Exploitability::default().score(),
3189 ..Default::default()
3190 });
3191
3192 reported_functions.insert(function.name.clone());
3193 }
3194 }
3195 }
3196
3197 findings
3198 }
3199}