1#![allow(dead_code)]
14
15use std::collections::HashSet;
16use std::ffi::OsStr;
17use std::fs;
18use std::path::Path;
19use walkdir::WalkDir;
20
21use super::utils::filter_entry;
22use crate::{
23 Confidence, Exploitability, Finding, MirFunction, MirPackage, Rule, RuleMetadata, RuleOrigin,
24 Severity,
25};
26
27const INPUT_SOURCE_PATTERNS: &[&str] = &[
29 "= var::<", "= var(", "var_os(", "::args(", "args_os(", "::nth(", "read_line(", "read_to_string(", ];
38
39pub struct CleartextEnvVarRule {
46 metadata: RuleMetadata,
47}
48
49impl CleartextEnvVarRule {
50 pub fn new() -> Self {
51 Self {
52 metadata: RuleMetadata {
53 id: "RUSTCOLA065".to_string(),
54 name: "cleartext-env-var".to_string(),
55 short_description: "Sensitive data in environment variable".to_string(),
56 full_description: "Detects sensitive data (passwords, secrets, tokens, keys) \
57 being stored in environment variables via std::env::set_var. Environment \
58 variables can be read by child processes, logged, and are often visible \
59 in /proc filesystem on Linux. Consider using dedicated secret management \
60 solutions instead."
61 .to_string(),
62 help_uri: Some("https://cwe.mitre.org/data/definitions/526.html".to_string()),
63 default_severity: Severity::High,
64 origin: RuleOrigin::BuiltIn,
65 cwe_ids: Vec::new(),
66 fix_suggestion: None,
67 exploitability: Exploitability::default(),
68 },
69 }
70 }
71
72 const SENSITIVE_PATTERNS: &'static [&'static str] = &[
73 "password",
74 "passwd",
75 "pwd",
76 "secret",
77 "token",
78 "apikey",
79 "api_key",
80 "auth",
81 "credential",
82 "cred",
83 "private_key",
84 "privatekey",
85 "access_key",
86 "secret_key",
87 ];
88
89 fn looks_like_sensitive_env_set(&self, function: &MirFunction) -> bool {
90 for line in &function.body {
91 if line.contains("set_var") {
93 let line_lower = line.to_lowercase();
95 for pattern in Self::SENSITIVE_PATTERNS {
96 if line_lower.contains(pattern) {
97 return true;
98 }
99 }
100 }
101 }
102 false
103 }
104}
105
106impl Rule for CleartextEnvVarRule {
107 fn metadata(&self) -> &RuleMetadata {
108 &self.metadata
109 }
110
111 fn evaluate(
112 &self,
113 package: &MirPackage,
114 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
115 ) -> Vec<Finding> {
116 let mut findings = Vec::new();
117
118 for function in &package.functions {
119 if self.looks_like_sensitive_env_set(function) {
120 let mut evidence = Vec::new();
121 for line in &function.body {
122 if line.contains("set_var") {
123 evidence.push(line.trim().to_string());
124 if evidence.len() >= 3 {
125 break;
126 }
127 }
128 }
129
130 findings.push(Finding {
131 rule_id: self.metadata.id.clone(),
132 rule_name: self.metadata.name.clone(),
133 severity: self.metadata.default_severity,
134 message: format!(
135 "Sensitive data stored in environment variable in `{}`. \
136 Environment variables are inherited by child processes and \
137 may be logged. Use dedicated secret management instead.",
138 function.name
139 ),
140 function: function.name.clone(),
141 function_signature: function.signature.clone(),
142 evidence,
143 span: function.span.clone(),
144 confidence: Confidence::Medium,
145 cwe_ids: Vec::new(),
146 fix_suggestion: None,
147 code_snippet: None,
148 exploitability: Exploitability::default(),
149 exploitability_score: Exploitability::default().score(),
150 ..Default::default()
151 });
152 }
153 }
154
155 findings
156 }
157}
158
159pub struct EnvVarLiteralRule {
165 metadata: RuleMetadata,
166}
167
168impl EnvVarLiteralRule {
169 pub fn new() -> Self {
170 Self {
171 metadata: RuleMetadata {
172 id: "RUSTCOLA047".to_string(),
173 name: "env-var-literal".to_string(),
174 short_description: "Hardcoded environment variable name".to_string(),
175 full_description: "Detects string literals passed directly to std::env::var(). \
176 Hardcoded environment variable names can leak configuration expectations \
177 and make it harder to configure applications in different environments. \
178 Consider using constants or configuration structs."
179 .to_string(),
180 help_uri: None,
181 default_severity: Severity::Low,
182 origin: RuleOrigin::BuiltIn,
183 cwe_ids: Vec::new(),
184 fix_suggestion: None,
185 exploitability: Exploitability::default(),
186 },
187 }
188 }
189
190 fn has_env_var_literal(&self, function: &MirFunction) -> bool {
191 let body_str = function.body.join("\n");
192
193 body_str.contains("env::var") && body_str.contains("const \"")
196 }
197}
198
199impl Rule for EnvVarLiteralRule {
200 fn metadata(&self) -> &RuleMetadata {
201 &self.metadata
202 }
203
204 fn evaluate(
205 &self,
206 package: &MirPackage,
207 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
208 ) -> Vec<Finding> {
209 let mut findings = Vec::new();
210
211 for function in &package.functions {
212 if self.has_env_var_literal(function) {
213 let mut evidence = Vec::new();
214 for line in &function.body {
215 if (line.contains("env::var") || line.contains("var::<"))
216 && line.contains("const \"")
217 {
218 evidence.push(line.trim().to_string());
219 if evidence.len() >= 3 {
220 break;
221 }
222 }
223 }
224
225 findings.push(Finding {
226 rule_id: self.metadata.id.clone(),
227 rule_name: self.metadata.name.clone(),
228 severity: self.metadata.default_severity,
229 message: "Hardcoded environment variable name detected. Consider using \
230 constants or configuration structs for better maintainability."
231 .to_string(),
232 function: function.name.clone(),
233 function_signature: function.signature.clone(),
234 evidence,
235 span: function.span.clone(),
236 confidence: Confidence::Medium,
237 cwe_ids: Vec::new(),
238 fix_suggestion: None,
239 code_snippet: None,
240 exploitability: Exploitability::default(),
241 exploitability_score: Exploitability::default().score(),
242 ..Default::default()
243 });
244 }
245 }
246
247 findings
248 }
249}
250
251pub struct InvisibleUnicodeRule {
257 metadata: RuleMetadata,
258}
259
260impl InvisibleUnicodeRule {
261 pub fn new() -> Self {
262 Self {
263 metadata: RuleMetadata {
264 id: "RUSTCOLA048".to_string(),
265 name: "invisible-unicode".to_string(),
266 short_description: "Invisible Unicode characters in source".to_string(),
267 full_description: "Detects invisible Unicode characters in source code. \
268 These can be used to create Trojan Source attacks where code appears \
269 benign but executes differently. Includes zero-width characters, \
270 bidirectional overrides, and other invisible control characters."
271 .to_string(),
272 help_uri: Some("https://trojansource.codes/".to_string()),
273 default_severity: Severity::High,
274 origin: RuleOrigin::BuiltIn,
275 cwe_ids: Vec::new(),
276 fix_suggestion: None,
277 exploitability: Exploitability::default(),
278 },
279 }
280 }
281
282 const INVISIBLE_CHARS: &'static [char] = &[
283 '\u{200B}', '\u{200C}', '\u{200D}', '\u{FEFF}', '\u{2060}', '\u{202A}', '\u{202B}', '\u{202C}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}', '\u{2069}', ];
298
299 fn has_invisible_chars(&self, function: &MirFunction) -> bool {
300 let body_str = function.body.join("\n");
301 for &c in Self::INVISIBLE_CHARS {
302 if body_str.contains(c) {
303 return true;
304 }
305 }
306 false
307 }
308}
309
310impl Rule for InvisibleUnicodeRule {
311 fn metadata(&self) -> &RuleMetadata {
312 &self.metadata
313 }
314
315 fn evaluate(
316 &self,
317 package: &MirPackage,
318 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
319 ) -> Vec<Finding> {
320 let mut findings = Vec::new();
321
322 for function in &package.functions {
323 if self.has_invisible_chars(function) {
324 let mut evidence = Vec::new();
325 for line in &function.body {
326 let has_invisible = Self::INVISIBLE_CHARS.iter().any(|&c| line.contains(c));
327 if has_invisible {
328 evidence.push(line.trim().to_string());
329 if evidence.len() >= 3 {
330 break;
331 }
332 }
333 }
334
335 findings.push(Finding {
336 rule_id: self.metadata.id.clone(),
337 rule_name: self.metadata.name.clone(),
338 severity: self.metadata.default_severity,
339 message: format!(
340 "Invisible Unicode characters detected in `{}`. These may be \
341 Trojan Source attacks where code appears benign but executes \
342 differently. Remove or replace with visible equivalents.",
343 function.name
344 ),
345 function: function.name.clone(),
346 function_signature: function.signature.clone(),
347 evidence,
348 span: function.span.clone(),
349 confidence: Confidence::Medium,
350 cwe_ids: Vec::new(),
351 fix_suggestion: None,
352 code_snippet: None,
353 exploitability: Exploitability::default(),
354 exploitability_score: Exploitability::default().score(),
355 ..Default::default()
356 });
357 }
358 }
359
360 findings
361 }
362}
363
364pub struct UntrimmedStdinRule {
370 metadata: RuleMetadata,
371}
372
373impl UntrimmedStdinRule {
374 pub fn new() -> Self {
375 Self {
376 metadata: RuleMetadata {
377 id: "RUSTCOLA053".to_string(),
378 name: "untrimmed-stdin".to_string(),
379 short_description: "Stdin input not trimmed".to_string(),
380 full_description: "Detects stdin().read_line() usage without subsequent \
381 trim() call. read_line() includes the trailing newline which can cause \
382 subtle bugs in file paths, passwords, or comparisons. Always call \
383 .trim() or .trim_end() on stdin input."
384 .to_string(),
385 help_uri: None,
386 default_severity: Severity::Low,
387 origin: RuleOrigin::BuiltIn,
388 cwe_ids: Vec::new(),
389 fix_suggestion: None,
390 exploitability: Exploitability::default(),
391 },
392 }
393 }
394
395 fn has_untrimmed_stdin(&self, function: &MirFunction) -> bool {
396 let body_str = function.body.join("\n");
397
398 let has_read_line = body_str.contains("stdin")
400 && (body_str.contains("read_line") || body_str.contains("BufRead"));
401
402 if !has_read_line {
403 return false;
404 }
405
406 let has_trim = body_str.contains("trim") || body_str.contains("trim_end");
408
409 !has_trim
410 }
411}
412
413impl Rule for UntrimmedStdinRule {
414 fn metadata(&self) -> &RuleMetadata {
415 &self.metadata
416 }
417
418 fn evaluate(
419 &self,
420 package: &MirPackage,
421 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
422 ) -> Vec<Finding> {
423 let mut findings = Vec::new();
424
425 for function in &package.functions {
426 if self.has_untrimmed_stdin(function) {
427 let mut evidence = Vec::new();
428 for line in &function.body {
429 if line.contains("stdin") || line.contains("read_line") {
430 evidence.push(line.trim().to_string());
431 if evidence.len() >= 3 {
432 break;
433 }
434 }
435 }
436
437 findings.push(Finding {
438 rule_id: self.metadata.id.clone(),
439 rule_name: self.metadata.name.clone(),
440 severity: self.metadata.default_severity,
441 message: "Stdin read_line() without trim(). The trailing newline can \
442 cause bugs in paths, passwords, or comparisons. Call .trim() on input."
443 .to_string(),
444 function: function.name.clone(),
445 function_signature: function.signature.clone(),
446 evidence,
447 span: function.span.clone(),
448 confidence: Confidence::Medium,
449 cwe_ids: Vec::new(),
450 fix_suggestion: None,
451 code_snippet: None,
452 exploitability: Exploitability::default(),
453 exploitability_score: Exploitability::default().score(),
454 ..Default::default()
455 });
456 }
457 }
458
459 findings
460 }
461}
462
463pub struct InfiniteIteratorRule {
469 metadata: RuleMetadata,
470}
471
472impl InfiniteIteratorRule {
473 pub fn new() -> Self {
474 Self {
475 metadata: RuleMetadata {
476 id: "RUSTCOLA054".to_string(),
477 name: "infinite-iterator".to_string(),
478 short_description: "Infinite iterator without termination".to_string(),
479 full_description: "Detects infinite iterators (std::iter::repeat, cycle, \
480 repeat_with) without termination methods (take, take_while, any, find, \
481 position, zip). Consuming an infinite iterator without bounds leads to \
482 infinite loops or memory exhaustion."
483 .to_string(),
484 help_uri: None,
485 default_severity: Severity::High,
486 origin: RuleOrigin::BuiltIn,
487 cwe_ids: Vec::new(),
488 fix_suggestion: None,
489 exploitability: Exploitability::default(),
490 },
491 }
492 }
493
494 fn looks_like_infinite_iterator(&self, function: &MirFunction) -> bool {
495 let body_str = function.body.join("\n");
496
497 if function.name.contains("mir_extractor") || function.name.contains("mir-extractor") {
499 return false;
500 }
501
502 if function.name.contains("::new")
504 || body_str.contains("const \"iter::repeat")
505 || body_str.contains("const \"std::iter::repeat")
506 {
507 return false;
508 }
509
510 let has_repeat = body_str.contains("std::iter::repeat")
512 || body_str.contains("core::iter::repeat")
513 || body_str.contains("Repeat<");
514 let has_cycle = body_str.contains("::cycle") || body_str.contains("Cycle<");
515 let has_repeat_with = body_str.contains("std::iter::repeat_with")
516 || body_str.contains("core::iter::repeat_with")
517 || body_str.contains("repeat_with::<")
518 || body_str.contains("RepeatWith<");
519
520 if !has_repeat && !has_cycle && !has_repeat_with {
521 return false;
522 }
523
524 let has_take = body_str.contains("::take(") || body_str.contains(">::take::<");
526 let has_take_while =
527 body_str.contains("::take_while") || body_str.contains(">::take_while::<");
528 let has_any = body_str.contains("::any(") || body_str.contains(">::any::<");
529 let has_find = body_str.contains("::find(") || body_str.contains(">::find::<");
530 let has_position = body_str.contains("::position") || body_str.contains(">::position::<");
531 let has_zip = body_str.contains("::zip");
532 let has_nth = body_str.contains("::nth(") || body_str.contains(">::nth::<");
533
534 let return_count = body_str.matches("return;").count();
536 let has_early_return = return_count > 1;
537
538 !has_take
540 && !has_take_while
541 && !has_any
542 && !has_find
543 && !has_position
544 && !has_zip
545 && !has_nth
546 && !has_early_return
547 }
548}
549
550impl Rule for InfiniteIteratorRule {
551 fn metadata(&self) -> &RuleMetadata {
552 &self.metadata
553 }
554
555 fn evaluate(
556 &self,
557 package: &MirPackage,
558 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
559 ) -> Vec<Finding> {
560 let mut findings = Vec::new();
561
562 for function in &package.functions {
563 if self.looks_like_infinite_iterator(function) {
564 let mut evidence = Vec::new();
565 for line in &function.body {
566 if line.contains("std::iter::repeat")
567 || line.contains("core::iter::repeat")
568 || line.contains("::cycle")
569 || line.contains("repeat_with")
570 {
571 evidence.push(line.trim().to_string());
572 if evidence.len() >= 3 {
573 break;
574 }
575 }
576 }
577
578 findings.push(Finding {
579 rule_id: self.metadata.id.clone(),
580 rule_name: self.metadata.name.clone(),
581 severity: self.metadata.default_severity,
582 message: "Infinite iterator (repeat, cycle, or repeat_with) without \
583 termination method (take, take_while, any, find, position, zip). \
584 This can cause unbounded loops leading to DoS."
585 .to_string(),
586 function: function.name.clone(),
587 function_signature: function.signature.clone(),
588 evidence,
589 span: function.span.clone(),
590 ..Default::default()
591 });
592 }
593 }
594
595 findings
596 }
597}
598
599pub struct DivisionByUntrustedRule {
605 metadata: RuleMetadata,
606}
607
608impl DivisionByUntrustedRule {
609 pub fn new() -> Self {
610 Self {
611 metadata: RuleMetadata {
612 id: "RUSTCOLA077".to_string(),
613 name: "division-by-untrusted".to_string(),
614 short_description: "Division by untrusted input without validation".to_string(),
615 full_description: "Division or modulo operations use untrusted input as \
616 the denominator without checking for zero. If the input is zero, this \
617 causes a panic (DoS). Use checked_div/checked_rem or validate the \
618 denominator before the operation."
619 .to_string(),
620 help_uri: Some("https://cwe.mitre.org/data/definitions/369.html".to_string()),
621 default_severity: Severity::Medium,
622 origin: RuleOrigin::BuiltIn,
623 cwe_ids: Vec::new(),
624 fix_suggestion: None,
625 exploitability: Exploitability::default(),
626 },
627 }
628 }
629
630 const DIVISION_PATTERNS: &'static [&'static str] = &[
631 "Div(", "Rem(", "div(", "rem(", " / ", " % ", ];
635
636 const ZERO_CHECK_PATTERNS: &'static [&'static str] = &[
637 "checked_div",
638 "checked_rem",
639 "saturating_div",
640 "wrapping_div",
641 "!= 0",
642 "!= 0_",
643 "> 0",
644 ">= 1",
645 "is_zero",
646 "NonZero",
647 ];
648
649 fn track_untrusted_numerics(body: &[String]) -> HashSet<String> {
651 let mut untrusted_vars = HashSet::new();
652
653 for line in body {
654 let trimmed = line.trim();
655
656 let is_source = INPUT_SOURCE_PATTERNS.iter().any(|p| trimmed.contains(p));
657 if is_source {
658 if let Some(eq_pos) = trimmed.find(" = ") {
659 let target = trimmed[..eq_pos].trim();
660 if let Some(var) = target
661 .split(|c: char| !c.is_alphanumeric() && c != '_')
662 .find(|s| s.starts_with('_'))
663 {
664 untrusted_vars.insert(var.to_string());
665 }
666 }
667 }
668
669 if trimmed.contains("::parse::") {
671 let uses_untrusted = untrusted_vars.iter().any(|v| trimmed.contains(v));
672 if uses_untrusted {
673 if let Some(eq_pos) = trimmed.find(" = ") {
674 let target = trimmed[..eq_pos].trim();
675 if let Some(var) = target
676 .split(|c: char| !c.is_alphanumeric() && c != '_')
677 .find(|s| s.starts_with('_'))
678 {
679 untrusted_vars.insert(var.to_string());
680 }
681 }
682 }
683 }
684
685 if trimmed.contains(" = ") && !is_source {
687 if let Some(eq_pos) = trimmed.find(" = ") {
688 let target = trimmed[..eq_pos].trim();
689 let source = trimmed[eq_pos + 3..].trim();
690
691 let uses_untrusted = untrusted_vars.iter().any(|v| source.contains(v));
692 if uses_untrusted {
693 if let Some(target_var) = target
694 .split(|c: char| !c.is_alphanumeric() && c != '_')
695 .find(|s| s.starts_with('_'))
696 {
697 untrusted_vars.insert(target_var.to_string());
698 }
699 }
700 }
701 }
702 }
703
704 untrusted_vars
705 }
706
707 fn has_zero_validation(body: &[String], untrusted_vars: &HashSet<String>) -> bool {
708 for line in body {
709 let trimmed = line.trim();
710 let has_check = Self::ZERO_CHECK_PATTERNS
711 .iter()
712 .any(|p| trimmed.contains(p));
713 if has_check {
714 for var in untrusted_vars {
715 if trimmed.contains(var) {
716 return true;
717 }
718 }
719 }
720 }
721 false
722 }
723
724 fn find_unsafe_divisions(body: &[String], untrusted_vars: &HashSet<String>) -> Vec<String> {
725 let mut evidence = Vec::new();
726
727 for line in body {
728 let trimmed = line.trim();
729 let is_division = Self::DIVISION_PATTERNS.iter().any(|p| trimmed.contains(p));
730 if is_division {
731 for var in untrusted_vars {
732 if trimmed.contains(var) {
733 evidence.push(trimmed.to_string());
734 break;
735 }
736 }
737 }
738 }
739
740 evidence
741 }
742}
743
744impl Rule for DivisionByUntrustedRule {
745 fn metadata(&self) -> &RuleMetadata {
746 &self.metadata
747 }
748
749 fn evaluate(
750 &self,
751 package: &MirPackage,
752 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
753 ) -> Vec<Finding> {
754 let mut findings = Vec::new();
755
756 for function in &package.functions {
757 if function.name.contains("mir_extractor") || function.name.contains("mir-extractor") {
758 continue;
759 }
760
761 let untrusted_vars = Self::track_untrusted_numerics(&function.body);
762 if untrusted_vars.is_empty() {
763 continue;
764 }
765
766 if Self::has_zero_validation(&function.body, &untrusted_vars) {
767 continue;
768 }
769
770 let unsafe_divs = Self::find_unsafe_divisions(&function.body, &untrusted_vars);
771 if !unsafe_divs.is_empty() {
772 findings.push(Finding {
773 rule_id: self.metadata.id.clone(),
774 rule_name: self.metadata.name.clone(),
775 severity: self.metadata.default_severity,
776 message: format!(
777 "Division in `{}` uses untrusted input as denominator without \
778 zero validation. Use checked_div/checked_rem or validate != 0.",
779 function.name
780 ),
781 function: function.name.clone(),
782 function_signature: function.signature.clone(),
783 evidence: unsafe_divs.into_iter().take(3).collect(),
784 span: function.span.clone(),
785 confidence: Confidence::Medium,
786 cwe_ids: Vec::new(),
787 fix_suggestion: None,
788 code_snippet: None,
789 exploitability: Exploitability::default(),
790 exploitability_score: Exploitability::default().score(),
791 ..Default::default()
792 });
793 }
794 }
795
796 findings
797 }
798}
799
800pub struct InsecureYamlDeserializationRule {
806 metadata: RuleMetadata,
807}
808
809impl InsecureYamlDeserializationRule {
810 pub fn new() -> Self {
811 Self {
812 metadata: RuleMetadata {
813 id: "RUSTCOLA089".to_string(),
814 name: "insecure-yaml-deserialization".to_string(),
815 short_description: "Untrusted input in YAML deserialization".to_string(),
816 full_description: "User-controlled input is passed to serde_yaml \
817 deserialization functions without validation. Attackers can craft \
818 malicious YAML using anchors/aliases for exponential expansion \
819 (billion laughs), deeply nested structures, or unexpected type \
820 coercion to cause denial of service or unexpected behavior."
821 .to_string(),
822 help_uri: Some(
823 "https://owasp.org/www-project-web-security-testing-guide/".to_string(),
824 ),
825 default_severity: Severity::Medium,
826 origin: RuleOrigin::BuiltIn,
827 cwe_ids: Vec::new(),
828 fix_suggestion: None,
829 exploitability: Exploitability::default(),
830 },
831 }
832 }
833
834 const YAML_SINKS: &'static [&'static str] = &[
835 "serde_yaml::from_str",
836 "serde_yaml::from_slice",
837 "serde_yaml::from_reader",
838 "serde_yaml::from_str::",
839 "serde_yaml::from_slice::",
840 "serde_yaml::from_reader::",
841 ];
842
843 const UNTRUSTED_SOURCES: &'static [&'static str] = &[
844 "env::var",
845 "env::var_os",
846 "std::env::var",
847 "var::<",
848 "var_os::<",
849 "env::args",
850 "std::env::args",
851 "args::<",
852 "= args()",
853 "Args>",
854 "stdin",
855 "Stdin",
856 "read_to_string",
857 "read_to_end",
858 "BufRead::read_line",
859 "TcpStream",
860 "::connect(",
861 ];
862
863 const SANITIZERS: &'static [&'static str] = &[
864 r#"contains("&")"#,
865 r#"contains("*")"#,
866 ".len()",
867 "len() >",
868 "len() <",
869 "serde_json::from_str", "validate",
871 "sanitize",
872 "allowlist",
873 ];
874
875 fn track_untrusted_vars(&self, function: &MirFunction) -> HashSet<String> {
876 let mut tainted: HashSet<String> = HashSet::new();
877
878 for line in &function.body {
879 for source in Self::UNTRUSTED_SOURCES {
880 if line.contains(source) {
881 if let Some(var) = self.extract_assigned_var(line) {
882 tainted.insert(var);
883 }
884 }
885 }
886
887 if line.contains(" = ") {
889 if let Some((dest, src_part)) = line.split_once(" = ") {
890 let dest_var = dest.trim().to_string();
891 for tvar in tainted.clone() {
892 if self.contains_var(src_part, &tvar) {
893 tainted.insert(dest_var.clone());
894 break;
895 }
896 }
897 }
898 }
899 }
900
901 tainted
902 }
903
904 fn extract_assigned_var(&self, line: &str) -> Option<String> {
905 let line = line.trim();
906 if let Some(eq_pos) = line.find(" = ") {
907 let lhs = line[..eq_pos].trim();
908 if lhs.starts_with('_') && lhs.chars().skip(1).all(|c| c.is_ascii_digit()) {
909 return Some(lhs.to_string());
910 }
911 if lhs.starts_with("(*_") {
912 if let Some(end) = lhs.find(')') {
913 return Some(lhs[2..end].to_string());
914 }
915 }
916 }
917 None
918 }
919
920 fn contains_var(&self, text: &str, var: &str) -> bool {
921 if text.contains(var) {
922 return true;
923 }
924 let var_num = var.trim_start_matches('_');
925 text.contains(&format!("move _{}", var_num))
926 || text.contains(&format!("copy _{}", var_num))
927 || text.contains(&format!("&_{}", var_num))
928 || text.contains(&format!("(*_{})", var_num))
929 }
930
931 fn find_unsafe_yaml_operations(
932 &self,
933 function: &MirFunction,
934 tainted: &HashSet<String>,
935 ) -> Vec<String> {
936 let mut evidence = Vec::new();
937
938 for line in &function.body {
940 for sanitizer in Self::SANITIZERS {
941 if line.contains(sanitizer) {
942 return evidence; }
944 }
945 }
946
947 for line in &function.body {
949 for sink in Self::YAML_SINKS {
950 if line.contains(sink) {
951 for tvar in tainted {
952 if self.contains_var(line, tvar) {
953 evidence.push(line.trim().to_string());
954 break;
955 }
956 }
957 }
958 }
959 }
960
961 evidence
962 }
963}
964
965impl Rule for InsecureYamlDeserializationRule {
966 fn metadata(&self) -> &RuleMetadata {
967 &self.metadata
968 }
969
970 fn evaluate(
971 &self,
972 package: &MirPackage,
973 inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
974 ) -> Vec<Finding> {
975 let mut findings = Vec::new();
976
977 for function in &package.functions {
978 if function.name.contains("test") {
979 continue;
980 }
981
982 let tainted = self.track_untrusted_vars(function);
983 if tainted.is_empty() {
984 continue;
985 }
986
987 let unsafe_ops = self.find_unsafe_yaml_operations(function, &tainted);
988 if !unsafe_ops.is_empty() {
989 findings.push(Finding {
990 rule_id: self.metadata.id.clone(),
991 rule_name: self.metadata.name.clone(),
992 severity: Severity::Medium,
993 message: format!(
994 "Insecure YAML deserialization in `{}`. User-controlled input \
995 passed to serde_yaml without validation. Malicious YAML can use \
996 anchors/aliases for billion laughs attacks.",
997 function.name
998 ),
999 function: function.name.clone(),
1000 function_signature: function.signature.clone(),
1001 evidence: unsafe_ops.into_iter().take(3).collect(),
1002 span: function.span.clone(),
1003 confidence: Confidence::Medium,
1004 cwe_ids: Vec::new(),
1005 fix_suggestion: None,
1006 code_snippet: None,
1007 exploitability: Exploitability::default(),
1008 exploitability_score: Exploitability::default().score(),
1009 ..Default::default()
1010 });
1011 }
1012 }
1013
1014 if let Some(analysis) = inter_analysis {
1016 let flows = analysis.detect_inter_procedural_flows(package);
1017 let mut reported_functions: HashSet<String> =
1018 findings.iter().map(|f| f.function.clone()).collect();
1019
1020 for flow in flows {
1021 if flow.sink_type != "yaml" {
1022 continue;
1023 }
1024 if flow.sink_function.contains("mir_extractor") || flow.sanitized {
1025 continue;
1026 }
1027 if reported_functions.contains(&flow.sink_function) {
1028 continue;
1029 }
1030
1031 let sink_func = package
1032 .functions
1033 .iter()
1034 .find(|f| f.name == flow.sink_function);
1035
1036 findings.push(Finding {
1037 rule_id: self.metadata.id.clone(),
1038 rule_name: self.metadata.name.clone(),
1039 severity: Severity::Medium,
1040 message: format!(
1041 "Inter-procedural YAML injection: untrusted input from `{}` \
1042 flows to YAML deserialization in `{}`.",
1043 flow.source_function, flow.sink_function
1044 ),
1045 function: flow.sink_function.clone(),
1046 function_signature: sink_func.map(|f| f.signature.clone()).unwrap_or_default(),
1047 evidence: vec![flow.describe()],
1048 span: sink_func.map(|f| f.span.clone()).unwrap_or_default(),
1049 ..Default::default()
1050 });
1051 reported_functions.insert(flow.sink_function);
1052 }
1053 }
1054
1055 findings
1056 }
1057}
1058
1059pub struct UnboundedReadRule {
1065 metadata: RuleMetadata,
1066}
1067
1068impl UnboundedReadRule {
1069 pub fn new() -> Self {
1070 Self {
1071 metadata: RuleMetadata {
1072 id: "RUSTCOLA090".to_string(),
1073 name: "unbounded-read-to-end".to_string(),
1074 short_description: "Unbounded read on untrusted source".to_string(),
1075 full_description: "read_to_end() or read_to_string() is called on an \
1076 untrusted source (network stream, stdin, user-controlled file) without \
1077 size limits. Attackers can send arbitrarily large payloads to exhaust \
1078 server memory. Use .take(max_size) to limit bytes read."
1079 .to_string(),
1080 help_uri: Some("https://cwe.mitre.org/data/definitions/400.html".to_string()),
1081 default_severity: Severity::Medium,
1082 origin: RuleOrigin::BuiltIn,
1083 cwe_ids: Vec::new(),
1084 fix_suggestion: None,
1085 exploitability: Exploitability::default(),
1086 },
1087 }
1088 }
1089
1090 const UNTRUSTED_SOURCES: &'static [&'static str] = &[
1091 "TcpStream::connect",
1092 "TcpListener::accept",
1093 "UnixStream::connect",
1094 "::connect(",
1095 "::accept(",
1096 "<TcpStream",
1097 "<UnixStream",
1098 "io::stdin",
1099 "stdin()",
1100 "Stdin",
1101 "env::var",
1102 "env::args",
1103 "var::<",
1104 "args::<",
1105 "Args>",
1106 "File::open",
1107 ];
1108
1109 const UNBOUNDED_SINKS: &'static [&'static str] = &[
1110 "read_to_end",
1111 "read_to_string",
1112 "Read>::read_to_end",
1113 "Read>::read_to_string",
1114 ];
1115
1116 const SAFE_PATTERNS: &'static [&'static str] = &[
1117 ".take(",
1118 "take(",
1119 "metadata(",
1120 ".len()",
1121 "MAX_SIZE",
1122 "max_size",
1123 "limit",
1124 "chunk",
1125 ];
1126
1127 fn has_untrusted_source(&self, function: &MirFunction) -> bool {
1128 for line in &function.body {
1129 for source in Self::UNTRUSTED_SOURCES {
1130 if line.contains(source) {
1131 return true;
1132 }
1133 }
1134 }
1135 false
1136 }
1137
1138 fn has_safe_limit(&self, function: &MirFunction) -> bool {
1139 for line in &function.body {
1140 for pattern in Self::SAFE_PATTERNS {
1141 if line.to_lowercase().contains(&pattern.to_lowercase()) {
1142 return true;
1143 }
1144 }
1145 }
1146 false
1147 }
1148
1149 fn find_unbounded_reads(&self, function: &MirFunction) -> Vec<String> {
1150 let mut evidence = Vec::new();
1151 for line in &function.body {
1152 for sink in Self::UNBOUNDED_SINKS {
1153 if line.contains(sink) {
1154 evidence.push(line.trim().to_string());
1155 }
1156 }
1157 }
1158 evidence
1159 }
1160}
1161
1162impl Rule for UnboundedReadRule {
1163 fn metadata(&self) -> &RuleMetadata {
1164 &self.metadata
1165 }
1166
1167 fn evaluate(
1168 &self,
1169 package: &MirPackage,
1170 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1171 ) -> Vec<Finding> {
1172 let mut findings = Vec::new();
1173
1174 for function in &package.functions {
1175 if function.name.contains("test") {
1176 continue;
1177 }
1178 if !self.has_untrusted_source(function) {
1179 continue;
1180 }
1181 if self.has_safe_limit(function) {
1182 continue;
1183 }
1184
1185 let unbounded_reads = self.find_unbounded_reads(function);
1186 if !unbounded_reads.is_empty() {
1187 let body_str = function.body.join("\n");
1188 let severity = if body_str.contains("TcpStream") || body_str.contains("UnixStream")
1189 {
1190 Severity::High
1191 } else {
1192 Severity::Medium
1193 };
1194
1195 findings.push(Finding {
1196 rule_id: self.metadata.id.clone(),
1197 rule_name: self.metadata.name.clone(),
1198 severity,
1199 message: format!(
1200 "Unbounded read in `{}`. read_to_end()/read_to_string() without \
1201 size limits. Use .take(max_bytes) to limit the read size.",
1202 function.name
1203 ),
1204 function: function.name.clone(),
1205 function_signature: function.signature.clone(),
1206 evidence: unbounded_reads.into_iter().take(3).collect(),
1207 span: function.span.clone(),
1208 confidence: Confidence::Medium,
1209 cwe_ids: Vec::new(),
1210 fix_suggestion: None,
1211 code_snippet: None,
1212 exploitability: Exploitability::default(),
1213 exploitability_score: Exploitability::default().score(),
1214 ..Default::default()
1215 });
1216 }
1217 }
1218
1219 findings
1220 }
1221}
1222
1223pub struct InsecureJsonTomlDeserializationRule {
1229 metadata: RuleMetadata,
1230}
1231
1232impl InsecureJsonTomlDeserializationRule {
1233 pub fn new() -> Self {
1234 Self {
1235 metadata: RuleMetadata {
1236 id: "RUSTCOLA091".to_string(),
1237 name: "insecure-json-toml-deserialization".to_string(),
1238 short_description: "Untrusted input in JSON/TOML deserialization".to_string(),
1239 full_description: "User-controlled input is passed to serde_json or toml \
1240 deserialization functions without validation. Attackers can craft \
1241 deeply nested structures to cause stack overflow, or very large \
1242 payloads to cause memory exhaustion."
1243 .to_string(),
1244 help_uri: Some(
1245 "https://owasp.org/www-project-web-security-testing-guide/".to_string(),
1246 ),
1247 default_severity: Severity::Medium,
1248 origin: RuleOrigin::BuiltIn,
1249 cwe_ids: Vec::new(),
1250 fix_suggestion: None,
1251 exploitability: Exploitability::default(),
1252 },
1253 }
1254 }
1255
1256 const SINKS: &'static [&'static str] = &[
1257 "serde_json::from_str",
1258 "serde_json::from_slice",
1259 "serde_json::from_reader",
1260 "serde_json::from_str::",
1261 "serde_json::from_slice::",
1262 "serde_json::from_reader::",
1263 "toml::from_str",
1264 "toml::de::from_str",
1265 ];
1266
1267 const UNTRUSTED_SOURCES: &'static [&'static str] = &[
1268 "env::var",
1269 "env::var_os",
1270 "std::env::var",
1271 "var::<",
1272 "var_os::<",
1273 "env::args",
1274 "std::env::args",
1275 "args::<",
1276 "= args()",
1277 "Args>",
1278 "stdin",
1279 "Stdin",
1280 "read_to_string",
1281 "read_to_end",
1282 "File::open",
1283 "TcpStream",
1284 "::connect(",
1285 ];
1286
1287 fn track_untrusted_vars(&self, function: &MirFunction) -> HashSet<String> {
1288 let mut tainted: HashSet<String> = HashSet::new();
1289
1290 for line in &function.body {
1291 for source in Self::UNTRUSTED_SOURCES {
1292 if line.contains(source) {
1293 if let Some(var) = self.extract_assigned_var(line) {
1294 tainted.insert(var);
1295 }
1296 }
1297 }
1298
1299 if line.contains(" = ") {
1300 if let Some((dest, src_part)) = line.split_once(" = ") {
1301 let dest_var = dest.trim().to_string();
1302 for tvar in tainted.clone() {
1303 if self.contains_var(src_part, &tvar) {
1304 tainted.insert(dest_var.clone());
1305 break;
1306 }
1307 }
1308 }
1309 }
1310 }
1311
1312 tainted
1313 }
1314
1315 fn extract_assigned_var(&self, line: &str) -> Option<String> {
1316 let line = line.trim();
1317 if let Some(eq_pos) = line.find(" = ") {
1318 let lhs = line[..eq_pos].trim();
1319 if lhs.starts_with('_') && lhs.chars().skip(1).all(|c| c.is_ascii_digit()) {
1320 return Some(lhs.to_string());
1321 }
1322 }
1323 None
1324 }
1325
1326 fn contains_var(&self, text: &str, var: &str) -> bool {
1327 if text.contains(var) {
1328 return true;
1329 }
1330 let var_num = var.trim_start_matches('_');
1331 text.contains(&format!("move _{}", var_num)) || text.contains(&format!("copy _{}", var_num))
1332 }
1333
1334 fn has_size_limit_check(&self, function: &MirFunction, tainted: &HashSet<String>) -> bool {
1335 let mut len_result_vars: HashSet<String> = HashSet::new();
1336
1337 for line in &function.body {
1338 let is_string_len = (line.contains("String::len(") || line.contains("str::len("))
1339 && !line.contains("Vec<");
1340
1341 if is_string_len {
1342 for tvar in tainted {
1343 if self.contains_var(line, tvar) {
1344 if let Some(var) = self.extract_assigned_var(line) {
1345 len_result_vars.insert(var);
1346 }
1347 }
1348 }
1349 }
1350
1351 if line.contains("Gt(")
1352 || line.contains("Lt(")
1353 || line.contains("Ge(")
1354 || line.contains("Le(")
1355 {
1356 for len_var in &len_result_vars {
1357 if self.contains_var(line, len_var) {
1358 return true;
1359 }
1360 }
1361 }
1362 }
1363
1364 false
1365 }
1366
1367 fn find_unsafe_operations(
1368 &self,
1369 function: &MirFunction,
1370 tainted: &HashSet<String>,
1371 ) -> Vec<String> {
1372 let mut unsafe_ops = Vec::new();
1373
1374 if self.has_size_limit_check(function, tainted) {
1375 return unsafe_ops;
1376 }
1377
1378 for line in &function.body {
1379 let is_sink = Self::SINKS.iter().any(|sink| line.contains(sink));
1380 if !is_sink {
1381 continue;
1382 }
1383
1384 let taint_flows = tainted.iter().any(|t| self.contains_var(line, t));
1385 if taint_flows {
1386 unsafe_ops.push(line.trim().to_string());
1387 }
1388 }
1389
1390 unsafe_ops
1391 }
1392}
1393
1394impl Rule for InsecureJsonTomlDeserializationRule {
1395 fn metadata(&self) -> &RuleMetadata {
1396 &self.metadata
1397 }
1398
1399 fn evaluate(
1400 &self,
1401 package: &MirPackage,
1402 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1403 ) -> Vec<Finding> {
1404 let mut findings = Vec::new();
1405
1406 for function in &package.functions {
1407 if function.name.contains("test") {
1408 continue;
1409 }
1410
1411 let tainted = self.track_untrusted_vars(function);
1412 if tainted.is_empty() {
1413 continue;
1414 }
1415
1416 let unsafe_ops = self.find_unsafe_operations(function, &tainted);
1417 if !unsafe_ops.is_empty() {
1418 let is_toml = unsafe_ops.iter().any(|op| op.contains("toml::"));
1419 let format_name = if is_toml { "TOML" } else { "JSON" };
1420
1421 findings.push(Finding {
1422 rule_id: self.metadata.id.clone(),
1423 rule_name: self.metadata.name.clone(),
1424 severity: Severity::Medium,
1425 message: format!(
1426 "Insecure {} deserialization in `{}`. User-controlled input \
1427 passed without validation. Deeply nested structures can cause \
1428 stack overflow.",
1429 format_name, function.name
1430 ),
1431 function: function.name.clone(),
1432 function_signature: function.signature.clone(),
1433 evidence: unsafe_ops.into_iter().take(3).collect(),
1434 span: function.span.clone(),
1435 confidence: Confidence::Medium,
1436 cwe_ids: Vec::new(),
1437 fix_suggestion: None,
1438 code_snippet: None,
1439 exploitability: Exploitability::default(),
1440 exploitability_score: Exploitability::default().score(),
1441 ..Default::default()
1442 });
1443 }
1444 }
1445
1446 findings
1447 }
1448}
1449
1450pub struct SerdeLengthMismatchRule {
1457 metadata: RuleMetadata,
1458}
1459
1460impl SerdeLengthMismatchRule {
1461 pub fn new() -> Self {
1462 Self {
1463 metadata: RuleMetadata {
1464 id: "RUSTCOLA081".to_string(),
1465 name: "serde-length-mismatch".to_string(),
1466 short_description: "Serde serialize_* length mismatch".to_string(),
1467 full_description: "Detects when the declared field/element count in \
1468 serialize_struct/serialize_tuple/etc doesn't match the actual number \
1469 of serialize_field/serialize_element calls. This mismatch can cause \
1470 deserialization failures, data corruption, or panics in binary formats \
1471 like bincode, postcard, or MessagePack that rely on precise length hints."
1472 .to_string(),
1473 default_severity: Severity::Medium,
1474 origin: RuleOrigin::BuiltIn,
1475 cwe_ids: Vec::new(),
1476 fix_suggestion: None,
1477 help_uri: None,
1478 exploitability: Exploitability::default(),
1479 },
1480 }
1481 }
1482
1483 fn find_serializer_declarations(body: &[String]) -> Vec<(String, String, usize, String)> {
1484 let mut declarations = Vec::new();
1485
1486 let mut var_values: std::collections::HashMap<String, usize> =
1487 std::collections::HashMap::new();
1488 for line in body {
1489 let trimmed = line.trim();
1490 if trimmed.contains("Option::<usize>::Some(const ") {
1491 if let Some(eq_pos) = trimmed.find(" = ") {
1492 let var_name = trimmed[..eq_pos].trim().to_string();
1493 if let Some(start) = trimmed.find("Some(const ") {
1494 let after = &trimmed[start + 11..];
1495 if let Some(end) = after.find("_usize") {
1496 if let Ok(val) = after[..end].trim().parse::<usize>() {
1497 var_values.insert(var_name, val);
1498 }
1499 }
1500 }
1501 }
1502 }
1503 }
1504
1505 for line in body {
1506 let trimmed = line.trim();
1507
1508 if trimmed.contains("serialize_struct(")
1509 && !trimmed.contains("serialize_struct_variant")
1510 {
1511 if let Some(decl) = Self::extract_struct_declaration(trimmed) {
1512 declarations.push(("struct".to_string(), decl.0, decl.1, trimmed.to_string()));
1513 }
1514 }
1515
1516 if trimmed.contains("serialize_tuple(")
1517 && !trimmed.contains("serialize_tuple_struct")
1518 && !trimmed.contains("serialize_tuple_variant")
1519 {
1520 if let Some(len) = Self::extract_tuple_length(trimmed) {
1521 declarations.push((
1522 "tuple".to_string(),
1523 "".to_string(),
1524 len,
1525 trimmed.to_string(),
1526 ));
1527 }
1528 }
1529
1530 if trimmed.contains("serialize_tuple_struct(") {
1531 if let Some(decl) = Self::extract_struct_declaration(trimmed) {
1532 declarations.push((
1533 "tuple_struct".to_string(),
1534 decl.0,
1535 decl.1,
1536 trimmed.to_string(),
1537 ));
1538 }
1539 }
1540
1541 if trimmed.contains("serialize_seq(") {
1542 if let Some(len) = Self::extract_seq_length(trimmed) {
1543 declarations.push((
1544 "seq".to_string(),
1545 "".to_string(),
1546 len,
1547 trimmed.to_string(),
1548 ));
1549 } else if let Some(len) = Self::extract_seq_length_from_var(trimmed, &var_values) {
1550 declarations.push((
1551 "seq".to_string(),
1552 "".to_string(),
1553 len,
1554 trimmed.to_string(),
1555 ));
1556 }
1557 }
1558
1559 if trimmed.contains("serialize_map(") {
1560 if let Some(len) = Self::extract_map_length(trimmed) {
1561 declarations.push((
1562 "map".to_string(),
1563 "".to_string(),
1564 len,
1565 trimmed.to_string(),
1566 ));
1567 } else if let Some(len) = Self::extract_map_length_from_var(trimmed, &var_values) {
1568 declarations.push((
1569 "map".to_string(),
1570 "".to_string(),
1571 len,
1572 trimmed.to_string(),
1573 ));
1574 }
1575 }
1576 }
1577
1578 declarations
1579 }
1580
1581 fn extract_struct_declaration(line: &str) -> Option<(String, usize)> {
1582 let name_start = line.find("const \"")? + 7;
1583 let name_end = line[name_start..].find("\"")? + name_start;
1584 let name = line[name_start..name_end].to_string();
1585
1586 let after_name = &line[name_end..];
1587 if let Some(const_pos) = after_name.find("const ") {
1588 let len_start = const_pos + 6;
1589 let len_str = &after_name[len_start..];
1590 if let Some(usize_pos) = len_str.find("_usize") {
1591 let num_str = &len_str[..usize_pos];
1592 if let Ok(len) = num_str.trim().parse::<usize>() {
1593 return Some((name, len));
1594 }
1595 }
1596 }
1597
1598 None
1599 }
1600
1601 fn extract_tuple_length(line: &str) -> Option<usize> {
1602 if let Some(const_pos) = line.rfind("const ") {
1603 let after_const = &line[const_pos + 6..];
1604 if let Some(usize_pos) = after_const.find("_usize") {
1605 let num_str = &after_const[..usize_pos];
1606 if let Ok(len) = num_str.trim().parse::<usize>() {
1607 return Some(len);
1608 }
1609 }
1610 }
1611 None
1612 }
1613
1614 fn extract_seq_length(line: &str) -> Option<usize> {
1615 if line.contains("Option::<usize>::None") || line.contains("None::<usize>") {
1616 return None;
1617 }
1618
1619 if let Some(const_pos) = line.rfind("const ") {
1620 let after_const = &line[const_pos + 6..];
1621 if let Some(usize_pos) = after_const.find("_usize") {
1622 let num_str = &after_const[..usize_pos];
1623 if let Ok(len) = num_str.trim().parse::<usize>() {
1624 return Some(len);
1625 }
1626 }
1627 }
1628
1629 None
1630 }
1631
1632 fn extract_map_length(line: &str) -> Option<usize> {
1633 Self::extract_seq_length(line)
1634 }
1635
1636 fn extract_seq_length_from_var(
1637 line: &str,
1638 var_values: &std::collections::HashMap<String, usize>,
1639 ) -> Option<usize> {
1640 if let Some(paren_start) = line.find("serialize_seq(") {
1641 let after = &line[paren_start..];
1642 for (var, val) in var_values {
1643 if after.contains(&format!("move {}", var))
1644 || after.contains(&format!(", {})", var))
1645 {
1646 return Some(*val);
1647 }
1648 }
1649 }
1650 None
1651 }
1652
1653 fn extract_map_length_from_var(
1654 line: &str,
1655 var_values: &std::collections::HashMap<String, usize>,
1656 ) -> Option<usize> {
1657 if let Some(paren_start) = line.find("serialize_map(") {
1658 let after = &line[paren_start..];
1659 for (var, val) in var_values {
1660 if after.contains(&format!("move {}", var))
1661 || after.contains(&format!(", {})", var))
1662 {
1663 return Some(*val);
1664 }
1665 }
1666 }
1667 None
1668 }
1669
1670 fn count_serialize_fields(body: &[String]) -> usize {
1671 body.iter()
1672 .filter(|line| {
1673 let trimmed = line.trim();
1674 trimmed.contains("SerializeStruct>::serialize_field")
1675 || trimmed.contains("SerializeStructVariant>::serialize_field")
1676 })
1677 .count()
1678 }
1679
1680 fn count_serialize_elements(body: &[String]) -> usize {
1681 body.iter()
1682 .filter(|line| {
1683 let trimmed = line.trim();
1684 trimmed.contains("SerializeTuple>::serialize_element")
1685 || trimmed.contains("SerializeTupleStruct>::serialize_field")
1686 })
1687 .count()
1688 }
1689
1690 fn count_seq_elements(body: &[String]) -> usize {
1691 body.iter()
1692 .filter(|line| {
1693 let trimmed = line.trim();
1694 trimmed.contains("SerializeSeq>::serialize_element")
1695 })
1696 .count()
1697 }
1698
1699 fn count_map_entries(body: &[String]) -> usize {
1700 body.iter()
1701 .filter(|line| {
1702 let trimmed = line.trim();
1703 trimmed.contains("SerializeMap>::serialize_entry")
1704 || trimmed.contains("SerializeMap>::serialize_key")
1705 })
1706 .count()
1707 }
1708
1709 fn has_loop_serialization(body: &[String]) -> bool {
1710 let body_str = body.join("\n");
1711
1712 body_str.contains("switchInt")
1713 && (body_str.contains("IntoIterator")
1714 || body_str.contains("Iterator>::next")
1715 || body_str.contains("Range"))
1716 }
1717}
1718
1719impl Rule for SerdeLengthMismatchRule {
1720 fn metadata(&self) -> &RuleMetadata {
1721 &self.metadata
1722 }
1723
1724 fn evaluate(
1725 &self,
1726 package: &MirPackage,
1727 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1728 ) -> Vec<Finding> {
1729 let mut findings = Vec::new();
1730
1731 for function in &package.functions {
1732 if !function.name.contains("serialize") && !function.signature.contains("Serialize") {
1733 continue;
1734 }
1735
1736 let declarations = Self::find_serializer_declarations(&function.body);
1737
1738 if declarations.is_empty() {
1739 continue;
1740 }
1741
1742 for (ser_type, name, declared_len, decl_line) in &declarations {
1743 let has_loop = Self::has_loop_serialization(&function.body);
1744
1745 let actual_count = match ser_type.as_str() {
1746 "struct" => Self::count_serialize_fields(&function.body),
1747 "tuple" | "tuple_struct" => Self::count_serialize_elements(&function.body),
1748 "seq" => {
1749 if has_loop {
1750 usize::MAX
1751 } else {
1752 Self::count_seq_elements(&function.body)
1753 }
1754 }
1755 "map" => {
1756 if has_loop {
1757 usize::MAX
1758 } else {
1759 Self::count_map_entries(&function.body)
1760 }
1761 }
1762 _ => continue,
1763 };
1764
1765 if actual_count == usize::MAX {
1766 let type_desc = match ser_type.as_str() {
1767 "seq" => "sequence",
1768 "map" => "map",
1769 _ => "collection",
1770 };
1771
1772 let name_info = if name.is_empty() {
1773 String::new()
1774 } else {
1775 format!(" for `{}`", name)
1776 };
1777
1778 findings.push(Finding {
1779 rule_id: self.metadata.id.clone(),
1780 rule_name: self.metadata.name.clone(),
1781 severity: self.metadata.default_severity,
1782 message: format!(
1783 "Serde serialize_{}{} declares constant length {} but uses loop-based serialization. \
1784 The hardcoded length hint will likely not match the actual number of {} entries. \
1785 Use `None` for dynamic-length collections or use `self.{}.len()` instead.",
1786 ser_type, name_info, declared_len, type_desc,
1787 if ser_type == "seq" { "data" } else { "items" }
1788 ),
1789 function: function.name.clone(),
1790 function_signature: function.signature.clone(),
1791 evidence: vec![decl_line.clone()],
1792 span: function.span.clone(),
1793 confidence: Confidence::Medium,
1794 cwe_ids: Vec::new(),
1795 fix_suggestion: None,
1796 code_snippet: None,
1797 exploitability: Exploitability::default(),
1798 exploitability_score: Exploitability::default().score(),
1799 ..Default::default()
1800 });
1801 continue;
1802 }
1803
1804 if actual_count != *declared_len {
1805 let type_desc = match ser_type.as_str() {
1806 "struct" => "struct fields",
1807 "tuple" | "tuple_struct" => "tuple elements",
1808 "seq" => "sequence elements",
1809 "map" => "map entries",
1810 _ => "elements",
1811 };
1812
1813 let name_info = if name.is_empty() {
1814 String::new()
1815 } else {
1816 format!(" for `{}`", name)
1817 };
1818
1819 findings.push(Finding {
1820 rule_id: self.metadata.id.clone(),
1821 rule_name: self.metadata.name.clone(),
1822 severity: self.metadata.default_severity,
1823 message: format!(
1824 "Serde serialize_{}{} declares {} {} but actually serializes {}. \
1825 This mismatch can cause deserialization failures in binary formats. \
1826 Update the length argument to match the actual count.",
1827 ser_type, name_info, declared_len, type_desc, actual_count
1828 ),
1829 function: function.name.clone(),
1830 function_signature: function.signature.clone(),
1831 evidence: vec![decl_line.clone()],
1832 span: function.span.clone(),
1833 confidence: Confidence::Medium,
1834 cwe_ids: Vec::new(),
1835 fix_suggestion: None,
1836 code_snippet: None,
1837 exploitability: Exploitability::default(),
1838 exploitability_score: Exploitability::default().score(),
1839 ..Default::default()
1840 });
1841 }
1842 }
1843 }
1844
1845 findings
1846 }
1847}
1848
1849pub struct UncheckedTimestampMultiplicationRule {
1859 metadata: RuleMetadata,
1860}
1861
1862impl UncheckedTimestampMultiplicationRule {
1863 pub fn new() -> Self {
1864 Self {
1865 metadata: RuleMetadata {
1866 id: "RUSTCOLA106".to_string(),
1867 name: "unchecked-timestamp-multiplication".to_string(),
1868 short_description: "Unchecked multiplication in timestamp conversion".to_string(),
1869 full_description: "Detects unchecked multiplication when converting time units. \
1870 Conversions like seconds to nanoseconds (multiply by 1_000_000_000) can \
1871 overflow for large values. Use checked_mul() or saturating_mul() to handle \
1872 overflow correctly. Pattern found in InfluxDB research."
1873 .to_string(),
1874 help_uri: None,
1875 default_severity: Severity::Medium,
1876 origin: RuleOrigin::BuiltIn,
1877 cwe_ids: Vec::new(),
1878 fix_suggestion: None,
1879 exploitability: Exploitability::default(),
1880 },
1881 }
1882 }
1883
1884 fn time_multipliers() -> &'static [(&'static str, &'static str)] {
1886 &[
1887 ("1_000_000_000", "seconds to nanoseconds"),
1888 ("1000000000", "seconds to nanoseconds"),
1889 ("1_000_000", "seconds to microseconds or millis to nanos"),
1890 ("1000000", "seconds to microseconds or millis to nanos"),
1891 ("1_000", "seconds to milliseconds or millis to micros"),
1892 ("86_400", "days to seconds"),
1893 ("86400", "days to seconds"),
1894 ("3_600", "hours to seconds"),
1895 ("3600", "hours to seconds"),
1896 ]
1897 }
1898}
1899
1900impl Rule for UncheckedTimestampMultiplicationRule {
1901 fn metadata(&self) -> &RuleMetadata {
1902 &self.metadata
1903 }
1904
1905 fn evaluate(
1906 &self,
1907 package: &MirPackage,
1908 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1909 ) -> Vec<Finding> {
1910 if package.crate_name == "mir-extractor" {
1911 return Vec::new();
1912 }
1913
1914 let mut findings = Vec::new();
1915 let crate_root = Path::new(&package.crate_root);
1916
1917 if !crate_root.exists() {
1918 return findings;
1919 }
1920
1921 for entry in WalkDir::new(crate_root)
1922 .into_iter()
1923 .filter_entry(|e| filter_entry(e))
1924 {
1925 let entry = match entry {
1926 Ok(e) => e,
1927 Err(_) => continue,
1928 };
1929
1930 if !entry.file_type().is_file() {
1931 continue;
1932 }
1933
1934 let path = entry.path();
1935 if path.extension() != Some(OsStr::new("rs")) {
1936 continue;
1937 }
1938
1939 let rel_path = path
1940 .strip_prefix(crate_root)
1941 .unwrap_or(path)
1942 .to_string_lossy()
1943 .replace('\\', "/");
1944
1945 let content = match fs::read_to_string(path) {
1946 Ok(c) => c,
1947 Err(_) => continue,
1948 };
1949
1950 let lines: Vec<&str> = content.lines().collect();
1951
1952 for (idx, line) in lines.iter().enumerate() {
1953 let trimmed = line.trim();
1954
1955 if trimmed.starts_with("//") {
1957 continue;
1958 }
1959
1960 if trimmed.contains("checked_mul")
1962 || trimmed.contains("saturating_mul")
1963 || trimmed.contains("overflowing_mul")
1964 || trimmed.contains("wrapping_mul")
1965 {
1966 continue;
1967 }
1968
1969 for (multiplier, conversion_type) in Self::time_multipliers() {
1971 if trimmed.contains(multiplier) && trimmed.contains('*') {
1973 let is_time_context = trimmed.contains("sec")
1975 || trimmed.contains("time")
1976 || trimmed.contains("nano")
1977 || trimmed.contains("micro")
1978 || trimmed.contains("milli")
1979 || trimmed.contains("duration")
1980 || trimmed.contains("timestamp")
1981 || trimmed.contains("epoch");
1982
1983 let fn_context = lines[..idx].iter().rev().take(15).any(|l| {
1985 l.contains("fn ")
1986 && (l.contains("time")
1987 || l.contains("sec")
1988 || l.contains("nano")
1989 || l.contains("duration")
1990 || l.contains("timestamp")
1991 || l.contains("to_"))
1992 });
1993
1994 if is_time_context || fn_context {
1995 let location = format!("{}:{}", rel_path, idx + 1);
1996
1997 findings.push(Finding {
1998 rule_id: self.metadata.id.clone(),
1999 rule_name: self.metadata.name.clone(),
2000 severity: self.metadata.default_severity,
2001 message: format!(
2002 "Unchecked multiplication by {} ({}). \
2003 This can overflow for large values. Use checked_mul() \
2004 or saturating_mul() for safe conversion.",
2005 multiplier, conversion_type
2006 ),
2007 function: location,
2008 function_signature: String::new(),
2009 evidence: vec![trimmed.to_string()],
2010 span: None,
2011 ..Default::default()
2012 });
2013 }
2014 }
2015 }
2016 }
2017 }
2018
2019 findings
2020 }
2021}
2022
2023pub fn register_input_rules(engine: &mut crate::RuleEngine) {
2029 engine.register_rule(Box::new(CleartextEnvVarRule::new()));
2030 engine.register_rule(Box::new(EnvVarLiteralRule::new()));
2031 engine.register_rule(Box::new(InvisibleUnicodeRule::new()));
2032 engine.register_rule(Box::new(UntrimmedStdinRule::new()));
2033 engine.register_rule(Box::new(InfiniteIteratorRule::new()));
2034 engine.register_rule(Box::new(DivisionByUntrustedRule::new()));
2035 engine.register_rule(Box::new(InsecureYamlDeserializationRule::new()));
2036 engine.register_rule(Box::new(UnboundedReadRule::new()));
2037 engine.register_rule(Box::new(InsecureJsonTomlDeserializationRule::new()));
2038 engine.register_rule(Box::new(SerdeLengthMismatchRule::new()));
2039 engine.register_rule(Box::new(UncheckedTimestampMultiplicationRule::new()));
2040}