mir_extractor/rules/
input.rs

1//! Input validation rules.
2//!
3//! Rules detecting input validation issues:
4//! - Environment variable handling (RUSTCOLA065, RUSTCOLA047)
5//! - Untrimmed stdin input (RUSTCOLA053)
6//! - Invisible Unicode detection (RUSTCOLA048)
7//! - Infinite iterators (RUSTCOLA054)
8//! - Unsafe deserialization (RUSTCOLA089, RUSTCOLA091)
9//! - Unbounded reads (RUSTCOLA090)
10//! - Division by untrusted input (RUSTCOLA077)
11//! - Unchecked timestamp multiplication (RUSTCOLA106)
12
13#![allow(dead_code)]
14
15use std::collections::HashSet;
16use std::ffi::OsStr;
17use std::fs;
18use std::path::Path;
19use walkdir::WalkDir;
20
21use super::utils::filter_entry;
22use crate::{
23    Confidence, Exploitability, Finding, MirFunction, MirPackage, Rule, RuleMetadata, RuleOrigin,
24    Severity,
25};
26
27// Shared input source patterns used by multiple rules
28const INPUT_SOURCE_PATTERNS: &[&str] = &[
29    "= var::<",        // env::var::<T> - generic call (MIR format)
30    "= var(",          // env::var - standard call
31    "var_os(",         // env::var_os
32    "::args(",         // env::args
33    "args_os(",        // env::args_os
34    "::nth(",          // iterator nth (often on args)
35    "read_line(",      // stdin
36    "read_to_string(", // file/stdin reads
37];
38
39// ============================================================================
40// RUSTCOLA065: Cleartext Sensitive Data in Environment Variables
41// ============================================================================
42
43/// Detects sensitive data (passwords, secrets, keys) stored via env::set_var.
44/// Environment variables can be read by child processes and are often logged.
45pub struct CleartextEnvVarRule {
46    metadata: RuleMetadata,
47}
48
49impl CleartextEnvVarRule {
50    pub fn new() -> Self {
51        Self {
52            metadata: RuleMetadata {
53                id: "RUSTCOLA065".to_string(),
54                name: "cleartext-env-var".to_string(),
55                short_description: "Sensitive data in environment variable".to_string(),
56                full_description: "Detects sensitive data (passwords, secrets, tokens, keys) \
57                    being stored in environment variables via std::env::set_var. Environment \
58                    variables can be read by child processes, logged, and are often visible \
59                    in /proc filesystem on Linux. Consider using dedicated secret management \
60                    solutions instead."
61                    .to_string(),
62                help_uri: Some("https://cwe.mitre.org/data/definitions/526.html".to_string()),
63                default_severity: Severity::High,
64                origin: RuleOrigin::BuiltIn,
65                cwe_ids: Vec::new(),
66                fix_suggestion: None,
67                exploitability: Exploitability::default(),
68            },
69        }
70    }
71
72    const SENSITIVE_PATTERNS: &'static [&'static str] = &[
73        "password",
74        "passwd",
75        "pwd",
76        "secret",
77        "token",
78        "apikey",
79        "api_key",
80        "auth",
81        "credential",
82        "cred",
83        "private_key",
84        "privatekey",
85        "access_key",
86        "secret_key",
87    ];
88
89    fn looks_like_sensitive_env_set(&self, function: &MirFunction) -> bool {
90        for line in &function.body {
91            // Look for set_var calls
92            if line.contains("set_var") {
93                // Check if the variable name contains sensitive patterns
94                let line_lower = line.to_lowercase();
95                for pattern in Self::SENSITIVE_PATTERNS {
96                    if line_lower.contains(pattern) {
97                        return true;
98                    }
99                }
100            }
101        }
102        false
103    }
104}
105
106impl Rule for CleartextEnvVarRule {
107    fn metadata(&self) -> &RuleMetadata {
108        &self.metadata
109    }
110
111    fn evaluate(
112        &self,
113        package: &MirPackage,
114        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
115    ) -> Vec<Finding> {
116        let mut findings = Vec::new();
117
118        for function in &package.functions {
119            if self.looks_like_sensitive_env_set(function) {
120                let mut evidence = Vec::new();
121                for line in &function.body {
122                    if line.contains("set_var") {
123                        evidence.push(line.trim().to_string());
124                        if evidence.len() >= 3 {
125                            break;
126                        }
127                    }
128                }
129
130                findings.push(Finding {
131                    rule_id: self.metadata.id.clone(),
132                    rule_name: self.metadata.name.clone(),
133                    severity: self.metadata.default_severity,
134                    message: format!(
135                        "Sensitive data stored in environment variable in `{}`. \
136                        Environment variables are inherited by child processes and \
137                        may be logged. Use dedicated secret management instead.",
138                        function.name
139                    ),
140                    function: function.name.clone(),
141                    function_signature: function.signature.clone(),
142                    evidence,
143                    span: function.span.clone(),
144                    confidence: Confidence::Medium,
145                    cwe_ids: Vec::new(),
146                    fix_suggestion: None,
147                    code_snippet: None,
148                    exploitability: Exploitability::default(),
149                    exploitability_score: Exploitability::default().score(),
150                    ..Default::default()
151                });
152            }
153        }
154
155        findings
156    }
157}
158
159// ============================================================================
160// RUSTCOLA047: Environment Variable Literal Names
161// ============================================================================
162
163/// Detects string literals passed to env::var() - potential config leakage.
164pub struct EnvVarLiteralRule {
165    metadata: RuleMetadata,
166}
167
168impl EnvVarLiteralRule {
169    pub fn new() -> Self {
170        Self {
171            metadata: RuleMetadata {
172                id: "RUSTCOLA047".to_string(),
173                name: "env-var-literal".to_string(),
174                short_description: "Hardcoded environment variable name".to_string(),
175                full_description: "Detects string literals passed directly to std::env::var(). \
176                    Hardcoded environment variable names can leak configuration expectations \
177                    and make it harder to configure applications in different environments. \
178                    Consider using constants or configuration structs."
179                    .to_string(),
180                help_uri: None,
181                default_severity: Severity::Low,
182                origin: RuleOrigin::BuiltIn,
183                cwe_ids: Vec::new(),
184                fix_suggestion: None,
185                exploitability: Exploitability::default(),
186            },
187        }
188    }
189
190    fn has_env_var_literal(&self, function: &MirFunction) -> bool {
191        let body_str = function.body.join("\n");
192
193        // Look for env::var with a const string argument
194        // MIR shows: var::<&str>(const "VAR_NAME")
195        body_str.contains("env::var") && body_str.contains("const \"")
196    }
197}
198
199impl Rule for EnvVarLiteralRule {
200    fn metadata(&self) -> &RuleMetadata {
201        &self.metadata
202    }
203
204    fn evaluate(
205        &self,
206        package: &MirPackage,
207        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
208    ) -> Vec<Finding> {
209        let mut findings = Vec::new();
210
211        for function in &package.functions {
212            if self.has_env_var_literal(function) {
213                let mut evidence = Vec::new();
214                for line in &function.body {
215                    if (line.contains("env::var") || line.contains("var::<"))
216                        && line.contains("const \"")
217                    {
218                        evidence.push(line.trim().to_string());
219                        if evidence.len() >= 3 {
220                            break;
221                        }
222                    }
223                }
224
225                findings.push(Finding {
226                    rule_id: self.metadata.id.clone(),
227                    rule_name: self.metadata.name.clone(),
228                    severity: self.metadata.default_severity,
229                    message: "Hardcoded environment variable name detected. Consider using \
230                        constants or configuration structs for better maintainability."
231                        .to_string(),
232                    function: function.name.clone(),
233                    function_signature: function.signature.clone(),
234                    evidence,
235                    span: function.span.clone(),
236                    confidence: Confidence::Medium,
237                    cwe_ids: Vec::new(),
238                    fix_suggestion: None,
239                    code_snippet: None,
240                    exploitability: Exploitability::default(),
241                    exploitability_score: Exploitability::default().score(),
242                    ..Default::default()
243                });
244            }
245        }
246
247        findings
248    }
249}
250
251// ============================================================================
252// RUSTCOLA048: Invisible Unicode Characters
253// ============================================================================
254
255/// Detects invisible Unicode characters in source code.
256pub struct InvisibleUnicodeRule {
257    metadata: RuleMetadata,
258}
259
260impl InvisibleUnicodeRule {
261    pub fn new() -> Self {
262        Self {
263            metadata: RuleMetadata {
264                id: "RUSTCOLA048".to_string(),
265                name: "invisible-unicode".to_string(),
266                short_description: "Invisible Unicode characters in source".to_string(),
267                full_description: "Detects invisible Unicode characters in source code. \
268                    These can be used to create Trojan Source attacks where code appears \
269                    benign but executes differently. Includes zero-width characters, \
270                    bidirectional overrides, and other invisible control characters."
271                    .to_string(),
272                help_uri: Some("https://trojansource.codes/".to_string()),
273                default_severity: Severity::High,
274                origin: RuleOrigin::BuiltIn,
275                cwe_ids: Vec::new(),
276                fix_suggestion: None,
277                exploitability: Exploitability::default(),
278            },
279        }
280    }
281
282    const INVISIBLE_CHARS: &'static [char] = &[
283        '\u{200B}', // Zero-width space
284        '\u{200C}', // Zero-width non-joiner
285        '\u{200D}', // Zero-width joiner
286        '\u{FEFF}', // Byte order mark
287        '\u{2060}', // Word joiner
288        '\u{202A}', // Left-to-right embedding
289        '\u{202B}', // Right-to-left embedding
290        '\u{202C}', // Pop directional formatting
291        '\u{202D}', // Left-to-right override
292        '\u{202E}', // Right-to-left override
293        '\u{2066}', // Left-to-right isolate
294        '\u{2067}', // Right-to-left isolate
295        '\u{2068}', // First strong isolate
296        '\u{2069}', // Pop directional isolate
297    ];
298
299    fn has_invisible_chars(&self, function: &MirFunction) -> bool {
300        let body_str = function.body.join("\n");
301        for &c in Self::INVISIBLE_CHARS {
302            if body_str.contains(c) {
303                return true;
304            }
305        }
306        false
307    }
308}
309
310impl Rule for InvisibleUnicodeRule {
311    fn metadata(&self) -> &RuleMetadata {
312        &self.metadata
313    }
314
315    fn evaluate(
316        &self,
317        package: &MirPackage,
318        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
319    ) -> Vec<Finding> {
320        let mut findings = Vec::new();
321
322        for function in &package.functions {
323            if self.has_invisible_chars(function) {
324                let mut evidence = Vec::new();
325                for line in &function.body {
326                    let has_invisible = Self::INVISIBLE_CHARS.iter().any(|&c| line.contains(c));
327                    if has_invisible {
328                        evidence.push(line.trim().to_string());
329                        if evidence.len() >= 3 {
330                            break;
331                        }
332                    }
333                }
334
335                findings.push(Finding {
336                    rule_id: self.metadata.id.clone(),
337                    rule_name: self.metadata.name.clone(),
338                    severity: self.metadata.default_severity,
339                    message: format!(
340                        "Invisible Unicode characters detected in `{}`. These may be \
341                        Trojan Source attacks where code appears benign but executes \
342                        differently. Remove or replace with visible equivalents.",
343                        function.name
344                    ),
345                    function: function.name.clone(),
346                    function_signature: function.signature.clone(),
347                    evidence,
348                    span: function.span.clone(),
349                    confidence: Confidence::Medium,
350                    cwe_ids: Vec::new(),
351                    fix_suggestion: None,
352                    code_snippet: None,
353                    exploitability: Exploitability::default(),
354                    exploitability_score: Exploitability::default().score(),
355                    ..Default::default()
356                });
357            }
358        }
359
360        findings
361    }
362}
363
364// ============================================================================
365// RUSTCOLA053: Untrimmed Stdin Input
366// ============================================================================
367
368/// Detects stdin input used without trimming trailing newlines.
369pub struct UntrimmedStdinRule {
370    metadata: RuleMetadata,
371}
372
373impl UntrimmedStdinRule {
374    pub fn new() -> Self {
375        Self {
376            metadata: RuleMetadata {
377                id: "RUSTCOLA053".to_string(),
378                name: "untrimmed-stdin".to_string(),
379                short_description: "Stdin input not trimmed".to_string(),
380                full_description: "Detects stdin().read_line() usage without subsequent \
381                    trim() call. read_line() includes the trailing newline which can cause \
382                    subtle bugs in file paths, passwords, or comparisons. Always call \
383                    .trim() or .trim_end() on stdin input."
384                    .to_string(),
385                help_uri: None,
386                default_severity: Severity::Low,
387                origin: RuleOrigin::BuiltIn,
388                cwe_ids: Vec::new(),
389                fix_suggestion: None,
390                exploitability: Exploitability::default(),
391            },
392        }
393    }
394
395    fn has_untrimmed_stdin(&self, function: &MirFunction) -> bool {
396        let body_str = function.body.join("\n");
397
398        // Check for stdin read_line
399        let has_read_line = body_str.contains("stdin")
400            && (body_str.contains("read_line") || body_str.contains("BufRead"));
401
402        if !has_read_line {
403            return false;
404        }
405
406        // Check for trim calls
407        let has_trim = body_str.contains("trim") || body_str.contains("trim_end");
408
409        !has_trim
410    }
411}
412
413impl Rule for UntrimmedStdinRule {
414    fn metadata(&self) -> &RuleMetadata {
415        &self.metadata
416    }
417
418    fn evaluate(
419        &self,
420        package: &MirPackage,
421        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
422    ) -> Vec<Finding> {
423        let mut findings = Vec::new();
424
425        for function in &package.functions {
426            if self.has_untrimmed_stdin(function) {
427                let mut evidence = Vec::new();
428                for line in &function.body {
429                    if line.contains("stdin") || line.contains("read_line") {
430                        evidence.push(line.trim().to_string());
431                        if evidence.len() >= 3 {
432                            break;
433                        }
434                    }
435                }
436
437                findings.push(Finding {
438                    rule_id: self.metadata.id.clone(),
439                    rule_name: self.metadata.name.clone(),
440                    severity: self.metadata.default_severity,
441                    message: "Stdin read_line() without trim(). The trailing newline can \
442                        cause bugs in paths, passwords, or comparisons. Call .trim() on input."
443                        .to_string(),
444                    function: function.name.clone(),
445                    function_signature: function.signature.clone(),
446                    evidence,
447                    span: function.span.clone(),
448                    confidence: Confidence::Medium,
449                    cwe_ids: Vec::new(),
450                    fix_suggestion: None,
451                    code_snippet: None,
452                    exploitability: Exploitability::default(),
453                    exploitability_score: Exploitability::default().score(),
454                    ..Default::default()
455                });
456            }
457        }
458
459        findings
460    }
461}
462
463// ============================================================================
464// RUSTCOLA054: Infinite Iterator Detection
465// ============================================================================
466
467/// Detects infinite iterators (repeat, cycle, repeat_with) without termination.
468pub struct InfiniteIteratorRule {
469    metadata: RuleMetadata,
470}
471
472impl InfiniteIteratorRule {
473    pub fn new() -> Self {
474        Self {
475            metadata: RuleMetadata {
476                id: "RUSTCOLA054".to_string(),
477                name: "infinite-iterator".to_string(),
478                short_description: "Infinite iterator without termination".to_string(),
479                full_description: "Detects infinite iterators (std::iter::repeat, cycle, \
480                    repeat_with) without termination methods (take, take_while, any, find, \
481                    position, zip). Consuming an infinite iterator without bounds leads to \
482                    infinite loops or memory exhaustion."
483                    .to_string(),
484                help_uri: None,
485                default_severity: Severity::High,
486                origin: RuleOrigin::BuiltIn,
487                cwe_ids: Vec::new(),
488                fix_suggestion: None,
489                exploitability: Exploitability::default(),
490            },
491        }
492    }
493
494    fn looks_like_infinite_iterator(&self, function: &MirFunction) -> bool {
495        let body_str = function.body.join("\n");
496
497        // Skip if function name contains "mir_extractor" or is infrastructure
498        if function.name.contains("mir_extractor") || function.name.contains("mir-extractor") {
499            return false;
500        }
501
502        // Skip functions that are just defining string constants
503        if function.name.contains("::new")
504            || body_str.contains("const \"iter::repeat")
505            || body_str.contains("const \"std::iter::repeat")
506        {
507            return false;
508        }
509
510        // Check for infinite iterator constructors
511        let has_repeat = body_str.contains("std::iter::repeat")
512            || body_str.contains("core::iter::repeat")
513            || body_str.contains("Repeat<");
514        let has_cycle = body_str.contains("::cycle") || body_str.contains("Cycle<");
515        let has_repeat_with = body_str.contains("std::iter::repeat_with")
516            || body_str.contains("core::iter::repeat_with")
517            || body_str.contains("repeat_with::<")
518            || body_str.contains("RepeatWith<");
519
520        if !has_repeat && !has_cycle && !has_repeat_with {
521            return false;
522        }
523
524        // Check if there are termination methods
525        let has_take = body_str.contains("::take(") || body_str.contains(">::take::<");
526        let has_take_while =
527            body_str.contains("::take_while") || body_str.contains(">::take_while::<");
528        let has_any = body_str.contains("::any(") || body_str.contains(">::any::<");
529        let has_find = body_str.contains("::find(") || body_str.contains(">::find::<");
530        let has_position = body_str.contains("::position") || body_str.contains(">::position::<");
531        let has_zip = body_str.contains("::zip");
532        let has_nth = body_str.contains("::nth(") || body_str.contains(">::nth::<");
533
534        // Check for early return (break in loop)
535        let return_count = body_str.matches("return;").count();
536        let has_early_return = return_count > 1;
537
538        // Flag if we have infinite iterator but no termination
539        !has_take
540            && !has_take_while
541            && !has_any
542            && !has_find
543            && !has_position
544            && !has_zip
545            && !has_nth
546            && !has_early_return
547    }
548}
549
550impl Rule for InfiniteIteratorRule {
551    fn metadata(&self) -> &RuleMetadata {
552        &self.metadata
553    }
554
555    fn evaluate(
556        &self,
557        package: &MirPackage,
558        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
559    ) -> Vec<Finding> {
560        let mut findings = Vec::new();
561
562        for function in &package.functions {
563            if self.looks_like_infinite_iterator(function) {
564                let mut evidence = Vec::new();
565                for line in &function.body {
566                    if line.contains("std::iter::repeat")
567                        || line.contains("core::iter::repeat")
568                        || line.contains("::cycle")
569                        || line.contains("repeat_with")
570                    {
571                        evidence.push(line.trim().to_string());
572                        if evidence.len() >= 3 {
573                            break;
574                        }
575                    }
576                }
577
578                findings.push(Finding {
579                    rule_id: self.metadata.id.clone(),
580                    rule_name: self.metadata.name.clone(),
581                    severity: self.metadata.default_severity,
582                    message: "Infinite iterator (repeat, cycle, or repeat_with) without \
583                        termination method (take, take_while, any, find, position, zip). \
584                        This can cause unbounded loops leading to DoS."
585                        .to_string(),
586                    function: function.name.clone(),
587                    function_signature: function.signature.clone(),
588                    evidence,
589                    span: function.span.clone(),
590                    ..Default::default()
591                });
592            }
593        }
594
595        findings
596    }
597}
598
599// ============================================================================
600// RUSTCOLA077: Division by Untrusted Input
601// ============================================================================
602
603/// Detects division operations using untrusted input as denominator without validation.
604pub struct DivisionByUntrustedRule {
605    metadata: RuleMetadata,
606}
607
608impl DivisionByUntrustedRule {
609    pub fn new() -> Self {
610        Self {
611            metadata: RuleMetadata {
612                id: "RUSTCOLA077".to_string(),
613                name: "division-by-untrusted".to_string(),
614                short_description: "Division by untrusted input without validation".to_string(),
615                full_description: "Division or modulo operations use untrusted input as \
616                    the denominator without checking for zero. If the input is zero, this \
617                    causes a panic (DoS). Use checked_div/checked_rem or validate the \
618                    denominator before the operation."
619                    .to_string(),
620                help_uri: Some("https://cwe.mitre.org/data/definitions/369.html".to_string()),
621                default_severity: Severity::Medium,
622                origin: RuleOrigin::BuiltIn,
623                cwe_ids: Vec::new(),
624                fix_suggestion: None,
625                exploitability: Exploitability::default(),
626            },
627        }
628    }
629
630    const DIVISION_PATTERNS: &'static [&'static str] = &[
631        "Div(", "Rem(", // MIR binary ops
632        "div(", "rem(", // Method calls
633        " / ", " % ", // Source patterns
634    ];
635
636    const ZERO_CHECK_PATTERNS: &'static [&'static str] = &[
637        "checked_div",
638        "checked_rem",
639        "saturating_div",
640        "wrapping_div",
641        "!= 0",
642        "!= 0_",
643        "> 0",
644        ">= 1",
645        "is_zero",
646        "NonZero",
647    ];
648
649    /// Track untrusted numeric variables
650    fn track_untrusted_numerics(body: &[String]) -> HashSet<String> {
651        let mut untrusted_vars = HashSet::new();
652
653        for line in body {
654            let trimmed = line.trim();
655
656            let is_source = INPUT_SOURCE_PATTERNS.iter().any(|p| trimmed.contains(p));
657            if is_source {
658                if let Some(eq_pos) = trimmed.find(" = ") {
659                    let target = trimmed[..eq_pos].trim();
660                    if let Some(var) = target
661                        .split(|c: char| !c.is_alphanumeric() && c != '_')
662                        .find(|s| s.starts_with('_'))
663                    {
664                        untrusted_vars.insert(var.to_string());
665                    }
666                }
667            }
668
669            // Track .parse() results from untrusted data
670            if trimmed.contains("::parse::") {
671                let uses_untrusted = untrusted_vars.iter().any(|v| trimmed.contains(v));
672                if uses_untrusted {
673                    if let Some(eq_pos) = trimmed.find(" = ") {
674                        let target = trimmed[..eq_pos].trim();
675                        if let Some(var) = target
676                            .split(|c: char| !c.is_alphanumeric() && c != '_')
677                            .find(|s| s.starts_with('_'))
678                        {
679                            untrusted_vars.insert(var.to_string());
680                        }
681                    }
682                }
683            }
684
685            // Propagate through assignments
686            if trimmed.contains(" = ") && !is_source {
687                if let Some(eq_pos) = trimmed.find(" = ") {
688                    let target = trimmed[..eq_pos].trim();
689                    let source = trimmed[eq_pos + 3..].trim();
690
691                    let uses_untrusted = untrusted_vars.iter().any(|v| source.contains(v));
692                    if uses_untrusted {
693                        if let Some(target_var) = target
694                            .split(|c: char| !c.is_alphanumeric() && c != '_')
695                            .find(|s| s.starts_with('_'))
696                        {
697                            untrusted_vars.insert(target_var.to_string());
698                        }
699                    }
700                }
701            }
702        }
703
704        untrusted_vars
705    }
706
707    fn has_zero_validation(body: &[String], untrusted_vars: &HashSet<String>) -> bool {
708        for line in body {
709            let trimmed = line.trim();
710            let has_check = Self::ZERO_CHECK_PATTERNS
711                .iter()
712                .any(|p| trimmed.contains(p));
713            if has_check {
714                for var in untrusted_vars {
715                    if trimmed.contains(var) {
716                        return true;
717                    }
718                }
719            }
720        }
721        false
722    }
723
724    fn find_unsafe_divisions(body: &[String], untrusted_vars: &HashSet<String>) -> Vec<String> {
725        let mut evidence = Vec::new();
726
727        for line in body {
728            let trimmed = line.trim();
729            let is_division = Self::DIVISION_PATTERNS.iter().any(|p| trimmed.contains(p));
730            if is_division {
731                for var in untrusted_vars {
732                    if trimmed.contains(var) {
733                        evidence.push(trimmed.to_string());
734                        break;
735                    }
736                }
737            }
738        }
739
740        evidence
741    }
742}
743
744impl Rule for DivisionByUntrustedRule {
745    fn metadata(&self) -> &RuleMetadata {
746        &self.metadata
747    }
748
749    fn evaluate(
750        &self,
751        package: &MirPackage,
752        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
753    ) -> Vec<Finding> {
754        let mut findings = Vec::new();
755
756        for function in &package.functions {
757            if function.name.contains("mir_extractor") || function.name.contains("mir-extractor") {
758                continue;
759            }
760
761            let untrusted_vars = Self::track_untrusted_numerics(&function.body);
762            if untrusted_vars.is_empty() {
763                continue;
764            }
765
766            if Self::has_zero_validation(&function.body, &untrusted_vars) {
767                continue;
768            }
769
770            let unsafe_divs = Self::find_unsafe_divisions(&function.body, &untrusted_vars);
771            if !unsafe_divs.is_empty() {
772                findings.push(Finding {
773                    rule_id: self.metadata.id.clone(),
774                    rule_name: self.metadata.name.clone(),
775                    severity: self.metadata.default_severity,
776                    message: format!(
777                        "Division in `{}` uses untrusted input as denominator without \
778                        zero validation. Use checked_div/checked_rem or validate != 0.",
779                        function.name
780                    ),
781                    function: function.name.clone(),
782                    function_signature: function.signature.clone(),
783                    evidence: unsafe_divs.into_iter().take(3).collect(),
784                    span: function.span.clone(),
785                    confidence: Confidence::Medium,
786                    cwe_ids: Vec::new(),
787                    fix_suggestion: None,
788                    code_snippet: None,
789                    exploitability: Exploitability::default(),
790                    exploitability_score: Exploitability::default().score(),
791                    ..Default::default()
792                });
793            }
794        }
795
796        findings
797    }
798}
799
800// ============================================================================
801// RUSTCOLA089: Insecure YAML Deserialization
802// ============================================================================
803
804/// Detects untrusted input passed to YAML deserialization functions.
805pub struct InsecureYamlDeserializationRule {
806    metadata: RuleMetadata,
807}
808
809impl InsecureYamlDeserializationRule {
810    pub fn new() -> Self {
811        Self {
812            metadata: RuleMetadata {
813                id: "RUSTCOLA089".to_string(),
814                name: "insecure-yaml-deserialization".to_string(),
815                short_description: "Untrusted input in YAML deserialization".to_string(),
816                full_description: "User-controlled input is passed to serde_yaml \
817                    deserialization functions without validation. Attackers can craft \
818                    malicious YAML using anchors/aliases for exponential expansion \
819                    (billion laughs), deeply nested structures, or unexpected type \
820                    coercion to cause denial of service or unexpected behavior."
821                    .to_string(),
822                help_uri: Some(
823                    "https://owasp.org/www-project-web-security-testing-guide/".to_string(),
824                ),
825                default_severity: Severity::Medium,
826                origin: RuleOrigin::BuiltIn,
827                cwe_ids: Vec::new(),
828                fix_suggestion: None,
829                exploitability: Exploitability::default(),
830            },
831        }
832    }
833
834    const YAML_SINKS: &'static [&'static str] = &[
835        "serde_yaml::from_str",
836        "serde_yaml::from_slice",
837        "serde_yaml::from_reader",
838        "serde_yaml::from_str::",
839        "serde_yaml::from_slice::",
840        "serde_yaml::from_reader::",
841    ];
842
843    const UNTRUSTED_SOURCES: &'static [&'static str] = &[
844        "env::var",
845        "env::var_os",
846        "std::env::var",
847        "var::<",
848        "var_os::<",
849        "env::args",
850        "std::env::args",
851        "args::<",
852        "= args()",
853        "Args>",
854        "stdin",
855        "Stdin",
856        "read_to_string",
857        "read_to_end",
858        "BufRead::read_line",
859        "TcpStream",
860        "::connect(",
861    ];
862
863    const SANITIZERS: &'static [&'static str] = &[
864        r#"contains("&")"#,
865        r#"contains("*")"#,
866        ".len()",
867        "len() >",
868        "len() <",
869        "serde_json::from_str", // JSON is safer alternative
870        "validate",
871        "sanitize",
872        "allowlist",
873    ];
874
875    fn track_untrusted_vars(&self, function: &MirFunction) -> HashSet<String> {
876        let mut tainted: HashSet<String> = HashSet::new();
877
878        for line in &function.body {
879            for source in Self::UNTRUSTED_SOURCES {
880                if line.contains(source) {
881                    if let Some(var) = self.extract_assigned_var(line) {
882                        tainted.insert(var);
883                    }
884                }
885            }
886
887            // Taint propagation
888            if line.contains(" = ") {
889                if let Some((dest, src_part)) = line.split_once(" = ") {
890                    let dest_var = dest.trim().to_string();
891                    for tvar in tainted.clone() {
892                        if self.contains_var(src_part, &tvar) {
893                            tainted.insert(dest_var.clone());
894                            break;
895                        }
896                    }
897                }
898            }
899        }
900
901        tainted
902    }
903
904    fn extract_assigned_var(&self, line: &str) -> Option<String> {
905        let line = line.trim();
906        if let Some(eq_pos) = line.find(" = ") {
907            let lhs = line[..eq_pos].trim();
908            if lhs.starts_with('_') && lhs.chars().skip(1).all(|c| c.is_ascii_digit()) {
909                return Some(lhs.to_string());
910            }
911            if lhs.starts_with("(*_") {
912                if let Some(end) = lhs.find(')') {
913                    return Some(lhs[2..end].to_string());
914                }
915            }
916        }
917        None
918    }
919
920    fn contains_var(&self, text: &str, var: &str) -> bool {
921        if text.contains(var) {
922            return true;
923        }
924        let var_num = var.trim_start_matches('_');
925        text.contains(&format!("move _{}", var_num))
926            || text.contains(&format!("copy _{}", var_num))
927            || text.contains(&format!("&_{}", var_num))
928            || text.contains(&format!("(*_{})", var_num))
929    }
930
931    fn find_unsafe_yaml_operations(
932        &self,
933        function: &MirFunction,
934        tainted: &HashSet<String>,
935    ) -> Vec<String> {
936        let mut evidence = Vec::new();
937
938        // Check for sanitization
939        for line in &function.body {
940            for sanitizer in Self::SANITIZERS {
941                if line.contains(sanitizer) {
942                    return evidence; // Has sanitization, no finding
943                }
944            }
945        }
946
947        // Look for YAML sinks with tainted arguments
948        for line in &function.body {
949            for sink in Self::YAML_SINKS {
950                if line.contains(sink) {
951                    for tvar in tainted {
952                        if self.contains_var(line, tvar) {
953                            evidence.push(line.trim().to_string());
954                            break;
955                        }
956                    }
957                }
958            }
959        }
960
961        evidence
962    }
963}
964
965impl Rule for InsecureYamlDeserializationRule {
966    fn metadata(&self) -> &RuleMetadata {
967        &self.metadata
968    }
969
970    fn evaluate(
971        &self,
972        package: &MirPackage,
973        inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
974    ) -> Vec<Finding> {
975        let mut findings = Vec::new();
976
977        for function in &package.functions {
978            if function.name.contains("test") {
979                continue;
980            }
981
982            let tainted = self.track_untrusted_vars(function);
983            if tainted.is_empty() {
984                continue;
985            }
986
987            let unsafe_ops = self.find_unsafe_yaml_operations(function, &tainted);
988            if !unsafe_ops.is_empty() {
989                findings.push(Finding {
990                    rule_id: self.metadata.id.clone(),
991                    rule_name: self.metadata.name.clone(),
992                    severity: Severity::Medium,
993                    message: format!(
994                        "Insecure YAML deserialization in `{}`. User-controlled input \
995                        passed to serde_yaml without validation. Malicious YAML can use \
996                        anchors/aliases for billion laughs attacks.",
997                        function.name
998                    ),
999                    function: function.name.clone(),
1000                    function_signature: function.signature.clone(),
1001                    evidence: unsafe_ops.into_iter().take(3).collect(),
1002                    span: function.span.clone(),
1003                    confidence: Confidence::Medium,
1004                    cwe_ids: Vec::new(),
1005                    fix_suggestion: None,
1006                    code_snippet: None,
1007                    exploitability: Exploitability::default(),
1008                    exploitability_score: Exploitability::default().score(),
1009                    ..Default::default()
1010                });
1011            }
1012        }
1013
1014        // Inter-procedural analysis (use shared analysis if available)
1015        if let Some(analysis) = inter_analysis {
1016            let flows = analysis.detect_inter_procedural_flows(package);
1017            let mut reported_functions: HashSet<String> =
1018                findings.iter().map(|f| f.function.clone()).collect();
1019
1020            for flow in flows {
1021                if flow.sink_type != "yaml" {
1022                    continue;
1023                }
1024                if flow.sink_function.contains("mir_extractor") || flow.sanitized {
1025                    continue;
1026                }
1027                if reported_functions.contains(&flow.sink_function) {
1028                    continue;
1029                }
1030
1031                let sink_func = package
1032                    .functions
1033                    .iter()
1034                    .find(|f| f.name == flow.sink_function);
1035
1036                findings.push(Finding {
1037                    rule_id: self.metadata.id.clone(),
1038                    rule_name: self.metadata.name.clone(),
1039                    severity: Severity::Medium,
1040                    message: format!(
1041                        "Inter-procedural YAML injection: untrusted input from `{}` \
1042                        flows to YAML deserialization in `{}`.",
1043                        flow.source_function, flow.sink_function
1044                    ),
1045                    function: flow.sink_function.clone(),
1046                    function_signature: sink_func.map(|f| f.signature.clone()).unwrap_or_default(),
1047                    evidence: vec![flow.describe()],
1048                    span: sink_func.map(|f| f.span.clone()).unwrap_or_default(),
1049                    ..Default::default()
1050                });
1051                reported_functions.insert(flow.sink_function);
1052            }
1053        }
1054
1055        findings
1056    }
1057}
1058
1059// ============================================================================
1060// RUSTCOLA090: Unbounded Read Operations
1061// ============================================================================
1062
1063/// Detects read_to_end/read_to_string on untrusted sources without size limits.
1064pub struct UnboundedReadRule {
1065    metadata: RuleMetadata,
1066}
1067
1068impl UnboundedReadRule {
1069    pub fn new() -> Self {
1070        Self {
1071            metadata: RuleMetadata {
1072                id: "RUSTCOLA090".to_string(),
1073                name: "unbounded-read-to-end".to_string(),
1074                short_description: "Unbounded read on untrusted source".to_string(),
1075                full_description: "read_to_end() or read_to_string() is called on an \
1076                    untrusted source (network stream, stdin, user-controlled file) without \
1077                    size limits. Attackers can send arbitrarily large payloads to exhaust \
1078                    server memory. Use .take(max_size) to limit bytes read."
1079                    .to_string(),
1080                help_uri: Some("https://cwe.mitre.org/data/definitions/400.html".to_string()),
1081                default_severity: Severity::Medium,
1082                origin: RuleOrigin::BuiltIn,
1083                cwe_ids: Vec::new(),
1084                fix_suggestion: None,
1085                exploitability: Exploitability::default(),
1086            },
1087        }
1088    }
1089
1090    const UNTRUSTED_SOURCES: &'static [&'static str] = &[
1091        "TcpStream::connect",
1092        "TcpListener::accept",
1093        "UnixStream::connect",
1094        "::connect(",
1095        "::accept(",
1096        "<TcpStream",
1097        "<UnixStream",
1098        "io::stdin",
1099        "stdin()",
1100        "Stdin",
1101        "env::var",
1102        "env::args",
1103        "var::<",
1104        "args::<",
1105        "Args>",
1106        "File::open",
1107    ];
1108
1109    const UNBOUNDED_SINKS: &'static [&'static str] = &[
1110        "read_to_end",
1111        "read_to_string",
1112        "Read>::read_to_end",
1113        "Read>::read_to_string",
1114    ];
1115
1116    const SAFE_PATTERNS: &'static [&'static str] = &[
1117        ".take(",
1118        "take(",
1119        "metadata(",
1120        ".len()",
1121        "MAX_SIZE",
1122        "max_size",
1123        "limit",
1124        "chunk",
1125    ];
1126
1127    fn has_untrusted_source(&self, function: &MirFunction) -> bool {
1128        for line in &function.body {
1129            for source in Self::UNTRUSTED_SOURCES {
1130                if line.contains(source) {
1131                    return true;
1132                }
1133            }
1134        }
1135        false
1136    }
1137
1138    fn has_safe_limit(&self, function: &MirFunction) -> bool {
1139        for line in &function.body {
1140            for pattern in Self::SAFE_PATTERNS {
1141                if line.to_lowercase().contains(&pattern.to_lowercase()) {
1142                    return true;
1143                }
1144            }
1145        }
1146        false
1147    }
1148
1149    fn find_unbounded_reads(&self, function: &MirFunction) -> Vec<String> {
1150        let mut evidence = Vec::new();
1151        for line in &function.body {
1152            for sink in Self::UNBOUNDED_SINKS {
1153                if line.contains(sink) {
1154                    evidence.push(line.trim().to_string());
1155                }
1156            }
1157        }
1158        evidence
1159    }
1160}
1161
1162impl Rule for UnboundedReadRule {
1163    fn metadata(&self) -> &RuleMetadata {
1164        &self.metadata
1165    }
1166
1167    fn evaluate(
1168        &self,
1169        package: &MirPackage,
1170        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1171    ) -> Vec<Finding> {
1172        let mut findings = Vec::new();
1173
1174        for function in &package.functions {
1175            if function.name.contains("test") {
1176                continue;
1177            }
1178            if !self.has_untrusted_source(function) {
1179                continue;
1180            }
1181            if self.has_safe_limit(function) {
1182                continue;
1183            }
1184
1185            let unbounded_reads = self.find_unbounded_reads(function);
1186            if !unbounded_reads.is_empty() {
1187                let body_str = function.body.join("\n");
1188                let severity = if body_str.contains("TcpStream") || body_str.contains("UnixStream")
1189                {
1190                    Severity::High
1191                } else {
1192                    Severity::Medium
1193                };
1194
1195                findings.push(Finding {
1196                    rule_id: self.metadata.id.clone(),
1197                    rule_name: self.metadata.name.clone(),
1198                    severity,
1199                    message: format!(
1200                        "Unbounded read in `{}`. read_to_end()/read_to_string() without \
1201                        size limits. Use .take(max_bytes) to limit the read size.",
1202                        function.name
1203                    ),
1204                    function: function.name.clone(),
1205                    function_signature: function.signature.clone(),
1206                    evidence: unbounded_reads.into_iter().take(3).collect(),
1207                    span: function.span.clone(),
1208                    confidence: Confidence::Medium,
1209                    cwe_ids: Vec::new(),
1210                    fix_suggestion: None,
1211                    code_snippet: None,
1212                    exploitability: Exploitability::default(),
1213                    exploitability_score: Exploitability::default().score(),
1214                    ..Default::default()
1215                });
1216            }
1217        }
1218
1219        findings
1220    }
1221}
1222
1223// ============================================================================
1224// RUSTCOLA091: Insecure JSON/TOML Deserialization
1225// ============================================================================
1226
1227/// Detects untrusted input passed to JSON/TOML deserialization functions.
1228pub struct InsecureJsonTomlDeserializationRule {
1229    metadata: RuleMetadata,
1230}
1231
1232impl InsecureJsonTomlDeserializationRule {
1233    pub fn new() -> Self {
1234        Self {
1235            metadata: RuleMetadata {
1236                id: "RUSTCOLA091".to_string(),
1237                name: "insecure-json-toml-deserialization".to_string(),
1238                short_description: "Untrusted input in JSON/TOML deserialization".to_string(),
1239                full_description: "User-controlled input is passed to serde_json or toml \
1240                    deserialization functions without validation. Attackers can craft \
1241                    deeply nested structures to cause stack overflow, or very large \
1242                    payloads to cause memory exhaustion."
1243                    .to_string(),
1244                help_uri: Some(
1245                    "https://owasp.org/www-project-web-security-testing-guide/".to_string(),
1246                ),
1247                default_severity: Severity::Medium,
1248                origin: RuleOrigin::BuiltIn,
1249                cwe_ids: Vec::new(),
1250                fix_suggestion: None,
1251                exploitability: Exploitability::default(),
1252            },
1253        }
1254    }
1255
1256    const SINKS: &'static [&'static str] = &[
1257        "serde_json::from_str",
1258        "serde_json::from_slice",
1259        "serde_json::from_reader",
1260        "serde_json::from_str::",
1261        "serde_json::from_slice::",
1262        "serde_json::from_reader::",
1263        "toml::from_str",
1264        "toml::de::from_str",
1265    ];
1266
1267    const UNTRUSTED_SOURCES: &'static [&'static str] = &[
1268        "env::var",
1269        "env::var_os",
1270        "std::env::var",
1271        "var::<",
1272        "var_os::<",
1273        "env::args",
1274        "std::env::args",
1275        "args::<",
1276        "= args()",
1277        "Args>",
1278        "stdin",
1279        "Stdin",
1280        "read_to_string",
1281        "read_to_end",
1282        "File::open",
1283        "TcpStream",
1284        "::connect(",
1285    ];
1286
1287    fn track_untrusted_vars(&self, function: &MirFunction) -> HashSet<String> {
1288        let mut tainted: HashSet<String> = HashSet::new();
1289
1290        for line in &function.body {
1291            for source in Self::UNTRUSTED_SOURCES {
1292                if line.contains(source) {
1293                    if let Some(var) = self.extract_assigned_var(line) {
1294                        tainted.insert(var);
1295                    }
1296                }
1297            }
1298
1299            if line.contains(" = ") {
1300                if let Some((dest, src_part)) = line.split_once(" = ") {
1301                    let dest_var = dest.trim().to_string();
1302                    for tvar in tainted.clone() {
1303                        if self.contains_var(src_part, &tvar) {
1304                            tainted.insert(dest_var.clone());
1305                            break;
1306                        }
1307                    }
1308                }
1309            }
1310        }
1311
1312        tainted
1313    }
1314
1315    fn extract_assigned_var(&self, line: &str) -> Option<String> {
1316        let line = line.trim();
1317        if let Some(eq_pos) = line.find(" = ") {
1318            let lhs = line[..eq_pos].trim();
1319            if lhs.starts_with('_') && lhs.chars().skip(1).all(|c| c.is_ascii_digit()) {
1320                return Some(lhs.to_string());
1321            }
1322        }
1323        None
1324    }
1325
1326    fn contains_var(&self, text: &str, var: &str) -> bool {
1327        if text.contains(var) {
1328            return true;
1329        }
1330        let var_num = var.trim_start_matches('_');
1331        text.contains(&format!("move _{}", var_num)) || text.contains(&format!("copy _{}", var_num))
1332    }
1333
1334    fn has_size_limit_check(&self, function: &MirFunction, tainted: &HashSet<String>) -> bool {
1335        let mut len_result_vars: HashSet<String> = HashSet::new();
1336
1337        for line in &function.body {
1338            let is_string_len = (line.contains("String::len(") || line.contains("str::len("))
1339                && !line.contains("Vec<");
1340
1341            if is_string_len {
1342                for tvar in tainted {
1343                    if self.contains_var(line, tvar) {
1344                        if let Some(var) = self.extract_assigned_var(line) {
1345                            len_result_vars.insert(var);
1346                        }
1347                    }
1348                }
1349            }
1350
1351            if line.contains("Gt(")
1352                || line.contains("Lt(")
1353                || line.contains("Ge(")
1354                || line.contains("Le(")
1355            {
1356                for len_var in &len_result_vars {
1357                    if self.contains_var(line, len_var) {
1358                        return true;
1359                    }
1360                }
1361            }
1362        }
1363
1364        false
1365    }
1366
1367    fn find_unsafe_operations(
1368        &self,
1369        function: &MirFunction,
1370        tainted: &HashSet<String>,
1371    ) -> Vec<String> {
1372        let mut unsafe_ops = Vec::new();
1373
1374        if self.has_size_limit_check(function, tainted) {
1375            return unsafe_ops;
1376        }
1377
1378        for line in &function.body {
1379            let is_sink = Self::SINKS.iter().any(|sink| line.contains(sink));
1380            if !is_sink {
1381                continue;
1382            }
1383
1384            let taint_flows = tainted.iter().any(|t| self.contains_var(line, t));
1385            if taint_flows {
1386                unsafe_ops.push(line.trim().to_string());
1387            }
1388        }
1389
1390        unsafe_ops
1391    }
1392}
1393
1394impl Rule for InsecureJsonTomlDeserializationRule {
1395    fn metadata(&self) -> &RuleMetadata {
1396        &self.metadata
1397    }
1398
1399    fn evaluate(
1400        &self,
1401        package: &MirPackage,
1402        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1403    ) -> Vec<Finding> {
1404        let mut findings = Vec::new();
1405
1406        for function in &package.functions {
1407            if function.name.contains("test") {
1408                continue;
1409            }
1410
1411            let tainted = self.track_untrusted_vars(function);
1412            if tainted.is_empty() {
1413                continue;
1414            }
1415
1416            let unsafe_ops = self.find_unsafe_operations(function, &tainted);
1417            if !unsafe_ops.is_empty() {
1418                let is_toml = unsafe_ops.iter().any(|op| op.contains("toml::"));
1419                let format_name = if is_toml { "TOML" } else { "JSON" };
1420
1421                findings.push(Finding {
1422                    rule_id: self.metadata.id.clone(),
1423                    rule_name: self.metadata.name.clone(),
1424                    severity: Severity::Medium,
1425                    message: format!(
1426                        "Insecure {} deserialization in `{}`. User-controlled input \
1427                        passed without validation. Deeply nested structures can cause \
1428                        stack overflow.",
1429                        format_name, function.name
1430                    ),
1431                    function: function.name.clone(),
1432                    function_signature: function.signature.clone(),
1433                    evidence: unsafe_ops.into_iter().take(3).collect(),
1434                    span: function.span.clone(),
1435                    confidence: Confidence::Medium,
1436                    cwe_ids: Vec::new(),
1437                    fix_suggestion: None,
1438                    code_snippet: None,
1439                    exploitability: Exploitability::default(),
1440                    exploitability_score: Exploitability::default().score(),
1441                    ..Default::default()
1442                });
1443            }
1444        }
1445
1446        findings
1447    }
1448}
1449
1450// ============================================================================
1451// RUSTCOLA081: Serde serialize_* length mismatch
1452// ============================================================================
1453
1454/// Detects when the declared length argument to serialize_struct/serialize_tuple/etc
1455/// doesn't match the actual number of serialize_field/serialize_element calls.
1456pub struct SerdeLengthMismatchRule {
1457    metadata: RuleMetadata,
1458}
1459
1460impl SerdeLengthMismatchRule {
1461    pub fn new() -> Self {
1462        Self {
1463            metadata: RuleMetadata {
1464                id: "RUSTCOLA081".to_string(),
1465                name: "serde-length-mismatch".to_string(),
1466                short_description: "Serde serialize_* length mismatch".to_string(),
1467                full_description: "Detects when the declared field/element count in \
1468                    serialize_struct/serialize_tuple/etc doesn't match the actual number \
1469                    of serialize_field/serialize_element calls. This mismatch can cause \
1470                    deserialization failures, data corruption, or panics in binary formats \
1471                    like bincode, postcard, or MessagePack that rely on precise length hints."
1472                    .to_string(),
1473                default_severity: Severity::Medium,
1474                origin: RuleOrigin::BuiltIn,
1475                cwe_ids: Vec::new(),
1476                fix_suggestion: None,
1477                help_uri: None,
1478                exploitability: Exploitability::default(),
1479            },
1480        }
1481    }
1482
1483    fn find_serializer_declarations(body: &[String]) -> Vec<(String, String, usize, String)> {
1484        let mut declarations = Vec::new();
1485
1486        let mut var_values: std::collections::HashMap<String, usize> =
1487            std::collections::HashMap::new();
1488        for line in body {
1489            let trimmed = line.trim();
1490            if trimmed.contains("Option::<usize>::Some(const ") {
1491                if let Some(eq_pos) = trimmed.find(" = ") {
1492                    let var_name = trimmed[..eq_pos].trim().to_string();
1493                    if let Some(start) = trimmed.find("Some(const ") {
1494                        let after = &trimmed[start + 11..];
1495                        if let Some(end) = after.find("_usize") {
1496                            if let Ok(val) = after[..end].trim().parse::<usize>() {
1497                                var_values.insert(var_name, val);
1498                            }
1499                        }
1500                    }
1501                }
1502            }
1503        }
1504
1505        for line in body {
1506            let trimmed = line.trim();
1507
1508            if trimmed.contains("serialize_struct(")
1509                && !trimmed.contains("serialize_struct_variant")
1510            {
1511                if let Some(decl) = Self::extract_struct_declaration(trimmed) {
1512                    declarations.push(("struct".to_string(), decl.0, decl.1, trimmed.to_string()));
1513                }
1514            }
1515
1516            if trimmed.contains("serialize_tuple(")
1517                && !trimmed.contains("serialize_tuple_struct")
1518                && !trimmed.contains("serialize_tuple_variant")
1519            {
1520                if let Some(len) = Self::extract_tuple_length(trimmed) {
1521                    declarations.push((
1522                        "tuple".to_string(),
1523                        "".to_string(),
1524                        len,
1525                        trimmed.to_string(),
1526                    ));
1527                }
1528            }
1529
1530            if trimmed.contains("serialize_tuple_struct(") {
1531                if let Some(decl) = Self::extract_struct_declaration(trimmed) {
1532                    declarations.push((
1533                        "tuple_struct".to_string(),
1534                        decl.0,
1535                        decl.1,
1536                        trimmed.to_string(),
1537                    ));
1538                }
1539            }
1540
1541            if trimmed.contains("serialize_seq(") {
1542                if let Some(len) = Self::extract_seq_length(trimmed) {
1543                    declarations.push((
1544                        "seq".to_string(),
1545                        "".to_string(),
1546                        len,
1547                        trimmed.to_string(),
1548                    ));
1549                } else if let Some(len) = Self::extract_seq_length_from_var(trimmed, &var_values) {
1550                    declarations.push((
1551                        "seq".to_string(),
1552                        "".to_string(),
1553                        len,
1554                        trimmed.to_string(),
1555                    ));
1556                }
1557            }
1558
1559            if trimmed.contains("serialize_map(") {
1560                if let Some(len) = Self::extract_map_length(trimmed) {
1561                    declarations.push((
1562                        "map".to_string(),
1563                        "".to_string(),
1564                        len,
1565                        trimmed.to_string(),
1566                    ));
1567                } else if let Some(len) = Self::extract_map_length_from_var(trimmed, &var_values) {
1568                    declarations.push((
1569                        "map".to_string(),
1570                        "".to_string(),
1571                        len,
1572                        trimmed.to_string(),
1573                    ));
1574                }
1575            }
1576        }
1577
1578        declarations
1579    }
1580
1581    fn extract_struct_declaration(line: &str) -> Option<(String, usize)> {
1582        let name_start = line.find("const \"")? + 7;
1583        let name_end = line[name_start..].find("\"")? + name_start;
1584        let name = line[name_start..name_end].to_string();
1585
1586        let after_name = &line[name_end..];
1587        if let Some(const_pos) = after_name.find("const ") {
1588            let len_start = const_pos + 6;
1589            let len_str = &after_name[len_start..];
1590            if let Some(usize_pos) = len_str.find("_usize") {
1591                let num_str = &len_str[..usize_pos];
1592                if let Ok(len) = num_str.trim().parse::<usize>() {
1593                    return Some((name, len));
1594                }
1595            }
1596        }
1597
1598        None
1599    }
1600
1601    fn extract_tuple_length(line: &str) -> Option<usize> {
1602        if let Some(const_pos) = line.rfind("const ") {
1603            let after_const = &line[const_pos + 6..];
1604            if let Some(usize_pos) = after_const.find("_usize") {
1605                let num_str = &after_const[..usize_pos];
1606                if let Ok(len) = num_str.trim().parse::<usize>() {
1607                    return Some(len);
1608                }
1609            }
1610        }
1611        None
1612    }
1613
1614    fn extract_seq_length(line: &str) -> Option<usize> {
1615        if line.contains("Option::<usize>::None") || line.contains("None::<usize>") {
1616            return None;
1617        }
1618
1619        if let Some(const_pos) = line.rfind("const ") {
1620            let after_const = &line[const_pos + 6..];
1621            if let Some(usize_pos) = after_const.find("_usize") {
1622                let num_str = &after_const[..usize_pos];
1623                if let Ok(len) = num_str.trim().parse::<usize>() {
1624                    return Some(len);
1625                }
1626            }
1627        }
1628
1629        None
1630    }
1631
1632    fn extract_map_length(line: &str) -> Option<usize> {
1633        Self::extract_seq_length(line)
1634    }
1635
1636    fn extract_seq_length_from_var(
1637        line: &str,
1638        var_values: &std::collections::HashMap<String, usize>,
1639    ) -> Option<usize> {
1640        if let Some(paren_start) = line.find("serialize_seq(") {
1641            let after = &line[paren_start..];
1642            for (var, val) in var_values {
1643                if after.contains(&format!("move {}", var))
1644                    || after.contains(&format!(", {})", var))
1645                {
1646                    return Some(*val);
1647                }
1648            }
1649        }
1650        None
1651    }
1652
1653    fn extract_map_length_from_var(
1654        line: &str,
1655        var_values: &std::collections::HashMap<String, usize>,
1656    ) -> Option<usize> {
1657        if let Some(paren_start) = line.find("serialize_map(") {
1658            let after = &line[paren_start..];
1659            for (var, val) in var_values {
1660                if after.contains(&format!("move {}", var))
1661                    || after.contains(&format!(", {})", var))
1662                {
1663                    return Some(*val);
1664                }
1665            }
1666        }
1667        None
1668    }
1669
1670    fn count_serialize_fields(body: &[String]) -> usize {
1671        body.iter()
1672            .filter(|line| {
1673                let trimmed = line.trim();
1674                trimmed.contains("SerializeStruct>::serialize_field")
1675                    || trimmed.contains("SerializeStructVariant>::serialize_field")
1676            })
1677            .count()
1678    }
1679
1680    fn count_serialize_elements(body: &[String]) -> usize {
1681        body.iter()
1682            .filter(|line| {
1683                let trimmed = line.trim();
1684                trimmed.contains("SerializeTuple>::serialize_element")
1685                    || trimmed.contains("SerializeTupleStruct>::serialize_field")
1686            })
1687            .count()
1688    }
1689
1690    fn count_seq_elements(body: &[String]) -> usize {
1691        body.iter()
1692            .filter(|line| {
1693                let trimmed = line.trim();
1694                trimmed.contains("SerializeSeq>::serialize_element")
1695            })
1696            .count()
1697    }
1698
1699    fn count_map_entries(body: &[String]) -> usize {
1700        body.iter()
1701            .filter(|line| {
1702                let trimmed = line.trim();
1703                trimmed.contains("SerializeMap>::serialize_entry")
1704                    || trimmed.contains("SerializeMap>::serialize_key")
1705            })
1706            .count()
1707    }
1708
1709    fn has_loop_serialization(body: &[String]) -> bool {
1710        let body_str = body.join("\n");
1711
1712        body_str.contains("switchInt")
1713            && (body_str.contains("IntoIterator")
1714                || body_str.contains("Iterator>::next")
1715                || body_str.contains("Range"))
1716    }
1717}
1718
1719impl Rule for SerdeLengthMismatchRule {
1720    fn metadata(&self) -> &RuleMetadata {
1721        &self.metadata
1722    }
1723
1724    fn evaluate(
1725        &self,
1726        package: &MirPackage,
1727        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1728    ) -> Vec<Finding> {
1729        let mut findings = Vec::new();
1730
1731        for function in &package.functions {
1732            if !function.name.contains("serialize") && !function.signature.contains("Serialize") {
1733                continue;
1734            }
1735
1736            let declarations = Self::find_serializer_declarations(&function.body);
1737
1738            if declarations.is_empty() {
1739                continue;
1740            }
1741
1742            for (ser_type, name, declared_len, decl_line) in &declarations {
1743                let has_loop = Self::has_loop_serialization(&function.body);
1744
1745                let actual_count = match ser_type.as_str() {
1746                    "struct" => Self::count_serialize_fields(&function.body),
1747                    "tuple" | "tuple_struct" => Self::count_serialize_elements(&function.body),
1748                    "seq" => {
1749                        if has_loop {
1750                            usize::MAX
1751                        } else {
1752                            Self::count_seq_elements(&function.body)
1753                        }
1754                    }
1755                    "map" => {
1756                        if has_loop {
1757                            usize::MAX
1758                        } else {
1759                            Self::count_map_entries(&function.body)
1760                        }
1761                    }
1762                    _ => continue,
1763                };
1764
1765                if actual_count == usize::MAX {
1766                    let type_desc = match ser_type.as_str() {
1767                        "seq" => "sequence",
1768                        "map" => "map",
1769                        _ => "collection",
1770                    };
1771
1772                    let name_info = if name.is_empty() {
1773                        String::new()
1774                    } else {
1775                        format!(" for `{}`", name)
1776                    };
1777
1778                    findings.push(Finding {
1779                        rule_id: self.metadata.id.clone(),
1780                        rule_name: self.metadata.name.clone(),
1781                        severity: self.metadata.default_severity,
1782                        message: format!(
1783                            "Serde serialize_{}{} declares constant length {} but uses loop-based serialization. \
1784                            The hardcoded length hint will likely not match the actual number of {} entries. \
1785                            Use `None` for dynamic-length collections or use `self.{}.len()` instead.",
1786                            ser_type, name_info, declared_len, type_desc,
1787                            if ser_type == "seq" { "data" } else { "items" }
1788                        ),
1789                        function: function.name.clone(),
1790                        function_signature: function.signature.clone(),
1791                        evidence: vec![decl_line.clone()],
1792                        span: function.span.clone(),
1793                    confidence: Confidence::Medium,
1794                    cwe_ids: Vec::new(),
1795                    fix_suggestion: None,
1796                    code_snippet: None,
1797                exploitability: Exploitability::default(),
1798                exploitability_score: Exploitability::default().score(),
1799                ..Default::default()
1800                    });
1801                    continue;
1802                }
1803
1804                if actual_count != *declared_len {
1805                    let type_desc = match ser_type.as_str() {
1806                        "struct" => "struct fields",
1807                        "tuple" | "tuple_struct" => "tuple elements",
1808                        "seq" => "sequence elements",
1809                        "map" => "map entries",
1810                        _ => "elements",
1811                    };
1812
1813                    let name_info = if name.is_empty() {
1814                        String::new()
1815                    } else {
1816                        format!(" for `{}`", name)
1817                    };
1818
1819                    findings.push(Finding {
1820                        rule_id: self.metadata.id.clone(),
1821                        rule_name: self.metadata.name.clone(),
1822                        severity: self.metadata.default_severity,
1823                        message: format!(
1824                            "Serde serialize_{}{} declares {} {} but actually serializes {}. \
1825                            This mismatch can cause deserialization failures in binary formats. \
1826                            Update the length argument to match the actual count.",
1827                            ser_type, name_info, declared_len, type_desc, actual_count
1828                        ),
1829                        function: function.name.clone(),
1830                        function_signature: function.signature.clone(),
1831                        evidence: vec![decl_line.clone()],
1832                        span: function.span.clone(),
1833                        confidence: Confidence::Medium,
1834                        cwe_ids: Vec::new(),
1835                        fix_suggestion: None,
1836                        code_snippet: None,
1837                        exploitability: Exploitability::default(),
1838                        exploitability_score: Exploitability::default().score(),
1839                        ..Default::default()
1840                    });
1841                }
1842            }
1843        }
1844
1845        findings
1846    }
1847}
1848
1849// ============================================================================
1850// RUSTCOLA106: Unchecked Timestamp Multiplication Rule
1851// ============================================================================
1852
1853/// Detects unchecked multiplication when converting time units (seconds to nanos, etc.).
1854///
1855/// Time unit conversions often involve multiplying by large constants (1_000_000_000 for
1856/// seconds to nanoseconds). Without overflow checks, this can silently wrap around,
1857/// causing incorrect timestamps.
1858pub struct UncheckedTimestampMultiplicationRule {
1859    metadata: RuleMetadata,
1860}
1861
1862impl UncheckedTimestampMultiplicationRule {
1863    pub fn new() -> Self {
1864        Self {
1865            metadata: RuleMetadata {
1866                id: "RUSTCOLA106".to_string(),
1867                name: "unchecked-timestamp-multiplication".to_string(),
1868                short_description: "Unchecked multiplication in timestamp conversion".to_string(),
1869                full_description: "Detects unchecked multiplication when converting time units. \
1870                    Conversions like seconds to nanoseconds (multiply by 1_000_000_000) can \
1871                    overflow for large values. Use checked_mul() or saturating_mul() to handle \
1872                    overflow correctly. Pattern found in InfluxDB research."
1873                    .to_string(),
1874                help_uri: None,
1875                default_severity: Severity::Medium,
1876                origin: RuleOrigin::BuiltIn,
1877                cwe_ids: Vec::new(),
1878                fix_suggestion: None,
1879                exploitability: Exploitability::default(),
1880            },
1881        }
1882    }
1883
1884    /// Large multipliers that indicate time unit conversion
1885    fn time_multipliers() -> &'static [(&'static str, &'static str)] {
1886        &[
1887            ("1_000_000_000", "seconds to nanoseconds"),
1888            ("1000000000", "seconds to nanoseconds"),
1889            ("1_000_000", "seconds to microseconds or millis to nanos"),
1890            ("1000000", "seconds to microseconds or millis to nanos"),
1891            ("1_000", "seconds to milliseconds or millis to micros"),
1892            ("86_400", "days to seconds"),
1893            ("86400", "days to seconds"),
1894            ("3_600", "hours to seconds"),
1895            ("3600", "hours to seconds"),
1896        ]
1897    }
1898}
1899
1900impl Rule for UncheckedTimestampMultiplicationRule {
1901    fn metadata(&self) -> &RuleMetadata {
1902        &self.metadata
1903    }
1904
1905    fn evaluate(
1906        &self,
1907        package: &MirPackage,
1908        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1909    ) -> Vec<Finding> {
1910        if package.crate_name == "mir-extractor" {
1911            return Vec::new();
1912        }
1913
1914        let mut findings = Vec::new();
1915        let crate_root = Path::new(&package.crate_root);
1916
1917        if !crate_root.exists() {
1918            return findings;
1919        }
1920
1921        for entry in WalkDir::new(crate_root)
1922            .into_iter()
1923            .filter_entry(|e| filter_entry(e))
1924        {
1925            let entry = match entry {
1926                Ok(e) => e,
1927                Err(_) => continue,
1928            };
1929
1930            if !entry.file_type().is_file() {
1931                continue;
1932            }
1933
1934            let path = entry.path();
1935            if path.extension() != Some(OsStr::new("rs")) {
1936                continue;
1937            }
1938
1939            let rel_path = path
1940                .strip_prefix(crate_root)
1941                .unwrap_or(path)
1942                .to_string_lossy()
1943                .replace('\\', "/");
1944
1945            let content = match fs::read_to_string(path) {
1946                Ok(c) => c,
1947                Err(_) => continue,
1948            };
1949
1950            let lines: Vec<&str> = content.lines().collect();
1951
1952            for (idx, line) in lines.iter().enumerate() {
1953                let trimmed = line.trim();
1954
1955                // Skip comments
1956                if trimmed.starts_with("//") {
1957                    continue;
1958                }
1959
1960                // Skip if already using checked/saturating operations
1961                if trimmed.contains("checked_mul")
1962                    || trimmed.contains("saturating_mul")
1963                    || trimmed.contains("overflowing_mul")
1964                    || trimmed.contains("wrapping_mul")
1965                {
1966                    continue;
1967                }
1968
1969                // Check for unchecked multiplication with time constants
1970                for (multiplier, conversion_type) in Self::time_multipliers() {
1971                    // Look for patterns like: value * 1_000_000_000 or 1_000_000_000 * value
1972                    if trimmed.contains(multiplier) && trimmed.contains('*') {
1973                        // Additional check: is this likely a timestamp context?
1974                        let is_time_context = trimmed.contains("sec")
1975                            || trimmed.contains("time")
1976                            || trimmed.contains("nano")
1977                            || trimmed.contains("micro")
1978                            || trimmed.contains("milli")
1979                            || trimmed.contains("duration")
1980                            || trimmed.contains("timestamp")
1981                            || trimmed.contains("epoch");
1982
1983                        // Also flag if function name suggests time handling
1984                        let fn_context = lines[..idx].iter().rev().take(15).any(|l| {
1985                            l.contains("fn ")
1986                                && (l.contains("time")
1987                                    || l.contains("sec")
1988                                    || l.contains("nano")
1989                                    || l.contains("duration")
1990                                    || l.contains("timestamp")
1991                                    || l.contains("to_"))
1992                        });
1993
1994                        if is_time_context || fn_context {
1995                            let location = format!("{}:{}", rel_path, idx + 1);
1996
1997                            findings.push(Finding {
1998                                rule_id: self.metadata.id.clone(),
1999                                rule_name: self.metadata.name.clone(),
2000                                severity: self.metadata.default_severity,
2001                                message: format!(
2002                                    "Unchecked multiplication by {} ({}). \
2003                                    This can overflow for large values. Use checked_mul() \
2004                                    or saturating_mul() for safe conversion.",
2005                                    multiplier, conversion_type
2006                                ),
2007                                function: location,
2008                                function_signature: String::new(),
2009                                evidence: vec![trimmed.to_string()],
2010                                span: None,
2011                                ..Default::default()
2012                            });
2013                        }
2014                    }
2015                }
2016            }
2017        }
2018
2019        findings
2020    }
2021}
2022
2023// ============================================================================
2024// Registration
2025// ============================================================================
2026
2027/// Register all input validation rules with the rule engine.
2028pub fn register_input_rules(engine: &mut crate::RuleEngine) {
2029    engine.register_rule(Box::new(CleartextEnvVarRule::new()));
2030    engine.register_rule(Box::new(EnvVarLiteralRule::new()));
2031    engine.register_rule(Box::new(InvisibleUnicodeRule::new()));
2032    engine.register_rule(Box::new(UntrimmedStdinRule::new()));
2033    engine.register_rule(Box::new(InfiniteIteratorRule::new()));
2034    engine.register_rule(Box::new(DivisionByUntrustedRule::new()));
2035    engine.register_rule(Box::new(InsecureYamlDeserializationRule::new()));
2036    engine.register_rule(Box::new(UnboundedReadRule::new()));
2037    engine.register_rule(Box::new(InsecureJsonTomlDeserializationRule::new()));
2038    engine.register_rule(Box::new(SerdeLengthMismatchRule::new()));
2039    engine.register_rule(Box::new(UncheckedTimestampMultiplicationRule::new()));
2040}
mir_extractor/rules/input.rs

mir_extractor/rules/
input.rs