Skip to main content

mir_extractor/rules/
ffi.rs

1//! FFI safety rules.
2//!
3//! Rules detecting FFI-related security issues:
4//! - Allocator mismatch between Rust and C (RUSTCOLA017)
5//! - Unsafe CString pointer usage (RUSTCOLA036)
6//! - Packed field references (RUSTCOLA035)
7//! - FFI buffer leaks (RUSTCOLA016)
8//! - FFI pointer returns (RUSTCOLA073)
9
10use super::filter_entry;
11use super::utils::{strip_string_literals, StringLiteralState};
12use crate::{
13    Confidence, Exploitability, Finding, MirPackage, Rule, RuleMetadata, RuleOrigin, Severity,
14};
15use std::collections::{HashMap, HashSet};
16use std::ffi::OsStr;
17use std::fs;
18use std::path::Path;
19use walkdir::WalkDir;
20
21// ============================================================================
22// RUSTCOLA017: Allocator Mismatch FFI Rule
23// ============================================================================
24
25/// Detects functions that mix Rust and foreign allocation APIs.
26pub struct AllocatorMismatchFfiRule {
27    metadata: RuleMetadata,
28}
29
30impl AllocatorMismatchFfiRule {
31    pub fn new() -> Self {
32        Self {
33            metadata: RuleMetadata {
34                id: "RUSTCOLA017".to_string(),
35                name: "allocator-mismatch".to_string(),
36                short_description: "Mixed allocator/deallocator usage".to_string(),
37                full_description: "Detects functions that mix Rust and foreign allocation APIs, \
38                    such as freeing Box/CString allocations with libc::free or wrapping \
39                    libc::malloc pointers with Box::from_raw."
40                    .to_string(),
41                help_uri: Some(
42                    "https://doc.rust-lang.org/std/boxed/struct.Box.html#method.from_raw"
43                        .to_string(),
44                ),
45                default_severity: Severity::High,
46                origin: RuleOrigin::BuiltIn,
47                cwe_ids: Vec::new(),
48                fix_suggestion: None,
49                exploitability: Exploitability::default(),
50            },
51        }
52    }
53}
54
55impl Rule for AllocatorMismatchFfiRule {
56    fn metadata(&self) -> &RuleMetadata {
57        &self.metadata
58    }
59
60    fn cache_key(&self) -> String {
61        format!("{}:v1", self.metadata.id)
62    }
63
64    fn evaluate(
65        &self,
66        package: &MirPackage,
67        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
68    ) -> Vec<Finding> {
69        let mut findings = Vec::new();
70
71        for function in &package.functions {
72            // Track Rust-allocated pointers (Box::into_raw, CString::into_raw)
73            let mut rust_allocated_vars = Vec::new();
74
75            // Track C-allocated pointers (malloc, calloc, realloc)
76            let mut c_allocated_vars = Vec::new();
77
78            // Track variable aliases (e.g., _4 = copy _2)
79            let mut var_aliases: HashMap<String, String> = HashMap::new();
80
81            for (idx, line) in function.body.iter().enumerate() {
82                // Track variable aliases: "_4 = copy _2" or "_4 = move _2"
83                if (line.contains(" = copy ") || line.contains(" = move "))
84                    && line.trim().starts_with('_')
85                {
86                    let parts: Vec<&str> = line.split('=').collect();
87                    if parts.len() >= 2 {
88                        let lhs = parts[0].trim();
89                        let rhs = parts[1].trim();
90                        if let Some(src_var) = rhs.split_whitespace().nth(1) {
91                            if src_var.starts_with('_') {
92                                var_aliases.insert(lhs.to_string(), src_var.to_string());
93                            }
94                        }
95                    }
96                }
97
98                // Detect Rust allocations: Box::into_raw, CString::into_raw
99                if (line.contains("Box::") && line.contains("::into_raw")
100                    || line.contains("CString::") && line.contains("::into_raw"))
101                    && line.contains(" = ")
102                {
103                    if let Some(var_name) = line.trim().split('=').next() {
104                        let var = var_name.trim().to_string();
105                        rust_allocated_vars.push((var.clone(), idx, line.trim().to_string()));
106                    }
107                }
108
109                // Detect C allocations: malloc, calloc, realloc
110                if (line.contains("malloc(")
111                    || line.contains("calloc(")
112                    || line.contains("realloc("))
113                    && line.contains(" = ")
114                {
115                    if let Some(var_name) = line.trim().split('=').next() {
116                        let var = var_name.trim().to_string();
117                        c_allocated_vars.push((var.clone(), idx, line.trim().to_string()));
118                    }
119                }
120
121                // Check for libc::free on Rust-allocated pointers
122                if line.contains("free(") {
123                    for (rust_var, alloc_idx, alloc_line) in &rust_allocated_vars {
124                        let mut is_freed = line.contains(rust_var);
125
126                        for (alias, original) in &var_aliases {
127                            if original == rust_var && line.contains(alias) {
128                                is_freed = true;
129                                break;
130                            }
131                        }
132
133                        if is_freed && idx > *alloc_idx && idx < alloc_idx + 50 {
134                            findings.push(Finding {
135                                rule_id: self.metadata.id.clone(),
136                                rule_name: self.metadata.name.clone(),
137                                severity: self.metadata.default_severity,
138                                message: format!(
139                                    "Rust-allocated pointer freed with libc::free in `{}`",
140                                    function.name
141                                ),
142                                function: function.name.clone(),
143                                function_signature: function.signature.clone(),
144                                evidence: vec![
145                                    format!("Rust allocation: {}", alloc_line),
146                                    format!("C deallocation: {}", line.trim()),
147                                ],
148                                span: function.span.clone(),
149                                confidence: Confidence::Medium,
150                                cwe_ids: Vec::new(),
151                                fix_suggestion: None,
152                                code_snippet: None,
153                                exploitability: Exploitability::default(),
154                                exploitability_score: Exploitability::default().score(),
155                            ..Default::default()
156                            });
157                        }
158                    }
159                }
160
161                // Check for Box::from_raw on C-allocated pointers
162                if line.contains("Box::") && line.contains("::from_raw(") {
163                    for (c_var, alloc_idx, alloc_line) in &c_allocated_vars {
164                        let mut is_converted = line.contains(c_var);
165
166                        for (alias, original) in &var_aliases {
167                            if original == c_var && line.contains(alias) {
168                                is_converted = true;
169                                break;
170                            }
171                        }
172
173                        if is_converted && idx > *alloc_idx && idx < alloc_idx + 50 {
174                            findings.push(Finding {
175                                rule_id: self.metadata.id.clone(),
176                                rule_name: self.metadata.name.clone(),
177                                severity: self.metadata.default_severity,
178                                message: format!(
179                                    "C-allocated pointer converted to Box::from_raw in `{}`",
180                                    function.name
181                                ),
182                                function: function.name.clone(),
183                                function_signature: function.signature.clone(),
184                                evidence: vec![
185                                    format!("C allocation: {}", alloc_line),
186                                    format!("Rust deallocation: {}", line.trim()),
187                                ],
188                                span: function.span.clone(),
189                                confidence: Confidence::Medium,
190                                cwe_ids: Vec::new(),
191                                fix_suggestion: None,
192                                code_snippet: None,
193                                exploitability: Exploitability::default(),
194                                exploitability_score: Exploitability::default().score(),
195                            ..Default::default()
196                            });
197                        }
198                    }
199                }
200
201                // Check for CString::from_raw on C-allocated strings
202                if line.contains("CString::") && line.contains("::from_raw(") {
203                    for (c_var, alloc_idx, alloc_line) in &c_allocated_vars {
204                        let mut is_converted = line.contains(c_var);
205
206                        for (alias, original) in &var_aliases {
207                            if original == c_var && line.contains(alias) {
208                                is_converted = true;
209                                break;
210                            }
211                        }
212
213                        if is_converted && idx > *alloc_idx && idx < alloc_idx + 50 {
214                            findings.push(Finding {
215                                rule_id: self.metadata.id.clone(),
216                                rule_name: self.metadata.name.clone(),
217                                severity: self.metadata.default_severity,
218                                message: format!(
219                                    "C-allocated pointer converted to CString::from_raw in `{}`",
220                                    function.name
221                                ),
222                                function: function.name.clone(),
223                                function_signature: function.signature.clone(),
224                                evidence: vec![
225                                    format!("C allocation: {}", alloc_line),
226                                    format!("Rust deallocation: {}", line.trim()),
227                                ],
228                                span: function.span.clone(),
229                                confidence: Confidence::Medium,
230                                cwe_ids: Vec::new(),
231                                fix_suggestion: None,
232                                code_snippet: None,
233                                exploitability: Exploitability::default(),
234                                exploitability_score: Exploitability::default().score(),
235                            ..Default::default()
236                            });
237                        }
238                    }
239                }
240            }
241        }
242
243        findings
244    }
245}
246
247// ============================================================================
248// RUSTCOLA073: Unsafe FFI Pointer Return Rule
249// ============================================================================
250
251/// Detects extern "C" functions that return raw pointers without safety documentation.
252pub struct UnsafeFfiPointerReturnRule {
253    metadata: RuleMetadata,
254}
255
256impl UnsafeFfiPointerReturnRule {
257    pub fn new() -> Self {
258        Self {
259            metadata: RuleMetadata {
260                id: "RUSTCOLA073".to_string(),
261                name: "unsafe-ffi-pointer-return".to_string(),
262                short_description: "FFI function returns raw pointer without safety invariants".to_string(),
263                full_description: "Detects extern \"C\" functions that return raw pointers (*const T or *mut T). \
264                    These functions expose memory that must be managed correctly by callers, but the Rust \
265                    type system cannot enforce this across FFI boundaries. Functions returning raw pointers \
266                    should document ownership semantics (who frees the memory), lifetime requirements, \
267                    and validity invariants.".to_string(),
268                help_uri: Some("https://doc.rust-lang.org/nomicon/ffi.html".to_string()),
269                default_severity: Severity::Medium,
270                origin: RuleOrigin::BuiltIn,
271                cwe_ids: Vec::new(),
272                fix_suggestion: None,
273                exploitability: Exploitability::default(),
274            },
275        }
276    }
277
278    /// Check if a function signature indicates an extern "C" function returning a pointer
279    fn is_ffi_returning_pointer(signature: &str, body: &[String]) -> Option<String> {
280        if !signature.contains("extern \"C\"") && !signature.contains("extern \"system\"") {
281            return None;
282        }
283
284        if let Some(arrow_pos) = signature.find("->") {
285            let return_type = signature[arrow_pos + 2..].trim();
286            if return_type.starts_with("*const") || return_type.starts_with("*mut") {
287                let has_safety_doc = body.iter().any(|line| {
288                    let lower = line.to_lowercase();
289                    lower.contains("safety:")
290                        || lower.contains("# safety")
291                        || lower.contains("invariant")
292                        || lower.contains("ownership")
293                        || lower.contains("caller must")
294                        || lower.contains("must be freed")
295                });
296
297                if !has_safety_doc {
298                    return Some(return_type.to_string());
299                }
300            }
301        }
302
303        None
304    }
305}
306
307impl Rule for UnsafeFfiPointerReturnRule {
308    fn metadata(&self) -> &RuleMetadata {
309        &self.metadata
310    }
311
312    fn evaluate(
313        &self,
314        package: &MirPackage,
315        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
316    ) -> Vec<Finding> {
317        let mut findings = Vec::new();
318
319        for function in &package.functions {
320            if let Some(return_type) =
321                Self::is_ffi_returning_pointer(&function.signature, &function.body)
322            {
323                findings.push(Finding {
324                    rule_id: self.metadata.id.clone(),
325                    rule_name: self.metadata.name.clone(),
326                    severity: self.metadata.default_severity,
327                    message: format!(
328                        "extern \"C\" function `{}` returns raw pointer `{}` without documented safety invariants.",
329                        function.name,
330                        return_type
331                    ),
332                    function: function.name.clone(),
333                    function_signature: function.signature.clone(),
334                    evidence: vec![
335                        format!("Returns: {}", return_type),
336                        "No safety documentation found".to_string(),
337                    ],
338                    span: function.span.clone(),
339                    confidence: Confidence::Medium,
340                    cwe_ids: Vec::new(),
341                    fix_suggestion: None,
342                    code_snippet: None,
343                exploitability: Exploitability::default(),
344                exploitability_score: Exploitability::default().score(),
345                ..Default::default()
346                });
347            }
348        }
349
350        findings
351    }
352}
353
354// ============================================================================
355// RUSTCOLA035: Packed Field Reference Rule
356// ============================================================================
357
358/// Detects taking references to fields of #[repr(packed)] structs (undefined behavior).
359pub struct PackedFieldReferenceRule {
360    metadata: RuleMetadata,
361}
362
363impl PackedFieldReferenceRule {
364    pub fn new() -> Self {
365        Self {
366            metadata: RuleMetadata {
367                id: "RUSTCOLA035".to_string(),
368                name: "repr-packed-field-reference".to_string(),
369                short_description: "Reference to packed struct field".to_string(),
370                full_description: "Detects taking references to fields of #[repr(packed)] structs. \
371                    Creating references to packed struct fields creates unaligned references, which \
372                    is undefined behavior in Rust. Use ptr::addr_of! or ptr::addr_of_mut! instead.".to_string(),
373                help_uri: Some("https://doc.rust-lang.org/nomicon/other-reprs.html#reprpacked".to_string()),
374                default_severity: Severity::High,
375                origin: RuleOrigin::BuiltIn,
376                cwe_ids: Vec::new(),
377                fix_suggestion: None,
378                exploitability: Exploitability::default(),
379            },
380        }
381    }
382}
383
384impl Rule for PackedFieldReferenceRule {
385    fn metadata(&self) -> &RuleMetadata {
386        &self.metadata
387    }
388
389    fn evaluate(
390        &self,
391        package: &MirPackage,
392        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
393    ) -> Vec<Finding> {
394        if package.crate_name == "mir-extractor" {
395            return Vec::new();
396        }
397
398        let mut findings = Vec::new();
399        let crate_root = Path::new(&package.crate_root);
400
401        if !crate_root.exists() {
402            return findings;
403        }
404
405        // First pass: identify packed structs
406        let mut packed_structs = HashSet::new();
407
408        for entry in WalkDir::new(crate_root)
409            .into_iter()
410            .filter_entry(|e| filter_entry(e))
411        {
412            let entry = match entry {
413                Ok(e) => e,
414                Err(_) => continue,
415            };
416
417            if !entry.file_type().is_file() {
418                continue;
419            }
420
421            let path = entry.path();
422            if path.extension() != Some(OsStr::new("rs")) {
423                continue;
424            }
425
426            let content = match fs::read_to_string(path) {
427                Ok(c) => c,
428                Err(_) => continue,
429            };
430
431            let lines: Vec<&str> = content.lines().collect();
432
433            for (idx, line) in lines.iter().enumerate() {
434                let trimmed = line.trim();
435
436                if trimmed.starts_with("#[repr(packed") {
437                    for j in (idx + 1).min(lines.len())..lines.len() {
438                        let struct_line = lines[j].trim();
439                        if struct_line.starts_with("struct ")
440                            || struct_line.starts_with("pub struct ")
441                        {
442                            let after_struct = if struct_line.starts_with("pub struct ") {
443                                &struct_line[11..]
444                            } else {
445                                &struct_line[7..]
446                            };
447
448                            if let Some(name_end) =
449                                after_struct.find(|c: char| !c.is_alphanumeric() && c != '_')
450                            {
451                                let struct_name = &after_struct[..name_end];
452                                packed_structs.insert(struct_name.to_string());
453                            }
454                            break;
455                        }
456                    }
457                }
458            }
459        }
460
461        // Second pass: look for references to packed struct fields
462        for entry in WalkDir::new(crate_root)
463            .into_iter()
464            .filter_entry(|e| filter_entry(e))
465        {
466            let entry = match entry {
467                Ok(e) => e,
468                Err(_) => continue,
469            };
470
471            if !entry.file_type().is_file() {
472                continue;
473            }
474
475            let path = entry.path();
476            if path.extension() != Some(OsStr::new("rs")) {
477                continue;
478            }
479
480            let rel_path = path
481                .strip_prefix(crate_root)
482                .unwrap_or(path)
483                .to_string_lossy()
484                .replace('\\', "/");
485
486            let content = match fs::read_to_string(path) {
487                Ok(c) => c,
488                Err(_) => continue,
489            };
490
491            let lines: Vec<&str> = content.lines().collect();
492
493            for (idx, line) in lines.iter().enumerate() {
494                let trimmed = line.trim();
495
496                for struct_name in &packed_structs {
497                    if (trimmed.contains(&format!("&{}", struct_name.to_lowercase()))
498                        || trimmed.contains(&format!("&mut {}", struct_name.to_lowercase()))
499                        || trimmed.contains("&self.")
500                        || trimmed.contains("&mut self."))
501                        && trimmed.contains('.')
502                        && !trimmed.contains("ptr::addr_of")
503                    {
504                        let location = format!("{}:{}", rel_path, idx + 1);
505
506                        findings.push(Finding {
507                            rule_id: self.metadata.id.clone(),
508                            rule_name: self.metadata.name.clone(),
509                            severity: self.metadata.default_severity,
510                            message: format!(
511                                "Potential reference to packed struct field (possibly {})",
512                                struct_name
513                            ),
514                            function: location.clone(),
515                            function_signature: String::new(),
516                            evidence: vec![trimmed.to_string()],
517                            span: None,
518                            ..Default::default()
519                        });
520                    }
521                }
522            }
523        }
524
525        findings
526    }
527}
528
529// ============================================================================
530// RUSTCOLA036: Unsafe CString Pointer Rule
531// ============================================================================
532
533/// Detects CString::new(...).unwrap().as_ptr() patterns where the CString
534/// temporary is dropped immediately, creating a dangling pointer.
535pub struct UnsafeCStringPointerRule {
536    metadata: RuleMetadata,
537}
538
539impl UnsafeCStringPointerRule {
540    pub fn new() -> Self {
541        Self {
542            metadata: RuleMetadata {
543                id: "RUSTCOLA036".to_string(),
544                name: "unsafe-cstring-pointer".to_string(),
545                short_description: "Unsafe CString pointer from temporary".to_string(),
546                full_description: "Detects patterns like CString::new(...).unwrap().as_ptr() where \
547                    the CString is a temporary that gets dropped immediately, leaving a dangling pointer. \
548                    The pointer must outlive the CString it came from. Store the CString in a variable \
549                    to extend its lifetime.".to_string(),
550                help_uri: Some("https://www.jetbrains.com/help/inspectopedia/RsCStringPointer.html".to_string()),
551                default_severity: Severity::High,
552                origin: RuleOrigin::BuiltIn,
553                cwe_ids: Vec::new(),
554                fix_suggestion: None,
555                exploitability: Exploitability::default(),
556            },
557        }
558    }
559
560    fn is_cstring_temp_pattern(line: &str) -> bool {
561        if !line.contains("CString::new") || !line.contains(".as_ptr()") {
562            return false;
563        }
564
565        let has_intermediate_method = line.contains(".unwrap()")
566            || line.contains(".expect(")
567            || line.contains(".unwrap_or")
568            || line.contains("?");
569
570        let looks_temporary = has_intermediate_method && !line.contains("let ");
571        let direct_chain = line.contains("CString::new(") && line.contains(").as_ptr()");
572
573        looks_temporary || direct_chain
574    }
575}
576
577impl Rule for UnsafeCStringPointerRule {
578    fn metadata(&self) -> &RuleMetadata {
579        &self.metadata
580    }
581
582    fn evaluate(
583        &self,
584        package: &MirPackage,
585        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
586    ) -> Vec<Finding> {
587        if package.crate_name == "mir-extractor" {
588            return Vec::new();
589        }
590
591        let mut findings = Vec::new();
592        let crate_root = Path::new(&package.crate_root);
593
594        if !crate_root.exists() {
595            return findings;
596        }
597
598        for entry in WalkDir::new(crate_root)
599            .into_iter()
600            .filter_entry(|e| filter_entry(e))
601        {
602            let entry = match entry {
603                Ok(e) => e,
604                Err(_) => continue,
605            };
606
607            if !entry.file_type().is_file() {
608                continue;
609            }
610
611            let path = entry.path();
612            if path.extension() != Some(OsStr::new("rs")) {
613                continue;
614            }
615
616            let rel_path = path
617                .strip_prefix(crate_root)
618                .unwrap_or(path)
619                .to_string_lossy()
620                .replace('\\', "/");
621
622            let content = match fs::read_to_string(path) {
623                Ok(c) => c,
624                Err(_) => continue,
625            };
626
627            let lines: Vec<&str> = content.lines().collect();
628
629            for (idx, line) in lines.iter().enumerate() {
630                let trimmed = line.trim();
631
632                if Self::is_cstring_temp_pattern(trimmed) {
633                    let location = format!("{}:{}", rel_path, idx + 1);
634
635                    findings.push(Finding::new(
636                        self.metadata.id.clone(),
637                        self.metadata.name.clone(),
638                        self.metadata.default_severity,
639                        "CString temporary used with as_ptr() creates dangling pointer".to_string(),
640                        location,
641                        String::new(),
642                        vec![trimmed.to_string()],
643                        None,
644                    ));
645                }
646            }
647        }
648
649        findings
650    }
651}
652
653// ============================================================================
654// RUSTCOLA059: Ctor/Dtor Std API Rule
655// ============================================================================
656
657/// Detects functions annotated with #[ctor] or #[dtor] that call std:: APIs.
658pub struct CtorDtorStdApiRule {
659    metadata: RuleMetadata,
660}
661
662impl CtorDtorStdApiRule {
663    pub fn new() -> Self {
664        Self {
665            metadata: RuleMetadata {
666                id: "RUSTCOLA059".to_string(),
667                name: "ctor-dtor-std-api".to_string(),
668                short_description: "#[ctor]/#[dtor] invoking std APIs".to_string(),
669                full_description: "Detects functions annotated with #[ctor] or #[dtor] that call std:: APIs. Code running in constructors/destructors (before main or during program teardown) can cause initialization ordering issues, deadlocks, or undefined behavior when calling standard library functions that expect a fully initialized runtime. Mirrors CodeQL rust/ctor-initialization.".to_string(),
670                help_uri: Some("https://docs.rs/ctor/latest/ctor/".to_string()),
671                default_severity: Severity::Medium,
672                origin: RuleOrigin::BuiltIn,
673                cwe_ids: Vec::new(),
674                fix_suggestion: None,
675                exploitability: Exploitability::default(),
676            },
677        }
678    }
679
680    fn looks_like_ctor_dtor_with_std_calls(&self, function: &crate::MirFunction) -> bool {
681        let name = &function.name;
682
683        // Exclude the rule implementation itself
684        if name.contains("CtorDtorStdApiRule")
685            || name.contains("looks_like_ctor_dtor_with_std_calls")
686        {
687            return false;
688        }
689
690        // Heuristic: Look for functions that start with ctor_ or dtor_
691        // These are likely annotated with #[ctor] or #[dtor]
692        // Note: This won't catch all cases (e.g., different naming), but is a reasonable heuristic
693        let looks_like_ctor_dtor_name = name.starts_with("ctor_") || name.starts_with("dtor_");
694
695        if !looks_like_ctor_dtor_name {
696            return false;
697        }
698
699        // Check for std:: API calls or common std patterns in the body or signature
700        let has_std_refs = function.body.iter().any(|line| {
701            line.contains("std::") || line.contains("_print(") // println!/print! desugars to _print
702        }) || function.signature.contains("std::");
703
704        has_std_refs
705    }
706}
707
708impl Rule for CtorDtorStdApiRule {
709    fn metadata(&self) -> &RuleMetadata {
710        &self.metadata
711    }
712
713    fn evaluate(
714        &self,
715        package: &MirPackage,
716        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
717    ) -> Vec<Finding> {
718        let mut findings = Vec::new();
719
720        for function in &package.functions {
721            if self.looks_like_ctor_dtor_with_std_calls(function) {
722                // Collect evidence of std:: calls or _print
723                let mut evidence = vec![];
724                for line in &function.body {
725                    if line.contains("std::") || line.contains("_print(") {
726                        evidence.push(line.clone());
727                        if evidence.len() >= 3 {
728                            break;
729                        }
730                    }
731                }
732
733                if !evidence.is_empty() {
734                    findings.push(Finding {
735                        rule_id: self.metadata.id.clone(),
736                        rule_name: self.metadata.name.clone(),
737                        severity: self.metadata.default_severity,
738                        message: "Constructor/destructor function calls std library APIs. Code running before main() or during program teardown can cause initialization issues, deadlocks, or undefined behavior.".to_string(),
739                        function: function.name.clone(),
740                        function_signature: function.signature.clone(),
741                        evidence,
742                        span: function.span.clone(),
743                    ..Default::default()
744                    });
745                }
746            }
747        }
748
749        findings
750    }
751}
752
753// ============================================================================
754// RUSTCOLA016: FFI Buffer Leak Rule
755// ============================================================================
756
757/// Detects extern functions that hand out raw pointers or heap buffers and contain
758/// early-return code paths, risking leaks or dangling pointers when cleanup is skipped.
759pub struct FfiBufferLeakRule {
760    metadata: RuleMetadata,
761}
762
763impl FfiBufferLeakRule {
764    pub fn new() -> Self {
765        Self {
766            metadata: RuleMetadata {
767                id: "RUSTCOLA016".to_string(),
768                name: "ffi-buffer-leak-early-return".to_string(),
769                short_description: "FFI buffer escapes with early return".to_string(),
770                full_description: "Detects extern functions that hand out raw pointers or heap buffers and contain early-return code paths, risking leaks or dangling pointers when cleanup is skipped.".to_string(),
771                help_uri: None,
772                default_severity: Severity::High,
773                origin: RuleOrigin::BuiltIn,
774                cwe_ids: Vec::new(),
775                fix_suggestion: None,
776                exploitability: Exploitability::default(),
777            },
778        }
779    }
780
781    fn pointer_escape_patterns() -> &'static [&'static str] {
782        &[
783            "Box::into_raw",
784            "Vec::into_raw_parts",
785            "Vec::with_capacity",
786            "CString::into_raw",
787            ".as_mut_ptr()",
788            ".as_ptr()",
789        ]
790    }
791
792    fn captures_early_exit(line: &str, position: usize, last_index: usize) -> bool {
793        let trimmed = line.trim();
794        if trimmed.is_empty() {
795            return false;
796        }
797
798        if trimmed.contains('?') {
799            return true;
800        }
801
802        if trimmed.contains("return Err") {
803            return true;
804        }
805
806        if (trimmed.starts_with("return ") || trimmed.contains(" return ")) && position < last_index
807        {
808            return true;
809        }
810
811        false
812    }
813
814    fn is_pointer_escape(line: &str) -> bool {
815        Self::pointer_escape_patterns()
816            .iter()
817            .any(|needle| line.contains(needle))
818    }
819}
820
821impl Rule for FfiBufferLeakRule {
822    fn metadata(&self) -> &RuleMetadata {
823        &self.metadata
824    }
825
826    fn evaluate(
827        &self,
828        package: &MirPackage,
829        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
830    ) -> Vec<Finding> {
831        let mut findings = Vec::new();
832        let crate_root = Path::new(&package.crate_root);
833
834        if !crate_root.exists() {
835            return findings;
836        }
837
838        for entry in WalkDir::new(crate_root)
839            .into_iter()
840            .filter_entry(|e| filter_entry(e))
841        {
842            let entry = match entry {
843                Ok(e) => e,
844                Err(_) => continue,
845            };
846
847            if !entry.file_type().is_file() {
848                continue;
849            }
850
851            if entry.path().extension().and_then(OsStr::to_str) != Some("rs") {
852                continue;
853            }
854
855            let Ok(source) = fs::read_to_string(entry.path()) else {
856                continue;
857            };
858
859            let rel_path = entry
860                .path()
861                .strip_prefix(crate_root)
862                .unwrap_or_else(|_| entry.path())
863                .to_string_lossy()
864                .replace('\\', "/");
865
866            let lines: Vec<&str> = source.lines().collect();
867            let mut idx = 0usize;
868            let mut string_state = StringLiteralState::default();
869            let mut pending_no_mangle: Option<usize> = None;
870            let mut pending_extern: Option<usize> = None;
871
872            while idx < lines.len() {
873                let raw_line = lines[idx];
874                let (sanitized_line, state_after_line) =
875                    strip_string_literals(string_state, raw_line);
876                let trimmed = sanitized_line.trim();
877                let trimmed_original = raw_line.trim();
878
879                if trimmed.starts_with("#[no_mangle") {
880                    pending_no_mangle = Some(idx);
881                    string_state = state_after_line;
882                    idx += 1;
883                    continue;
884                }
885
886                if trimmed.contains("extern \"C\"") && !trimmed.contains("fn ") {
887                    pending_extern = Some(idx);
888                    string_state = state_after_line;
889                    idx += 1;
890                    continue;
891                }
892
893                let mut is_ffi_fn = false;
894                let mut start_idx = idx;
895
896                if trimmed.contains("extern \"C\"") && trimmed.contains("fn ") {
897                    is_ffi_fn = true;
898                } else if pending_extern.is_some() && trimmed.contains("fn ") {
899                    is_ffi_fn = true;
900                    start_idx = pending_extern.unwrap();
901                } else if pending_no_mangle.is_some() && trimmed.contains("fn ") {
902                    is_ffi_fn = true;
903                    start_idx = pending_no_mangle.unwrap();
904                }
905
906                if !is_ffi_fn {
907                    if !trimmed.is_empty() && !trimmed.starts_with("#[") {
908                        pending_no_mangle = None;
909                        pending_extern = None;
910                    }
911                    string_state = state_after_line;
912                    idx += 1;
913                    continue;
914                }
915
916                let mut block_lines: Vec<String> = Vec::new();
917                let mut sanitized_block: Vec<String> = Vec::new();
918                if start_idx < idx {
919                    for attr_idx in start_idx..idx {
920                        let attr_line = lines[attr_idx].trim();
921                        if !attr_line.is_empty() {
922                            block_lines.push(attr_line.to_string());
923                            sanitized_block.push(attr_line.to_string());
924                        }
925                    }
926                }
927
928                if !trimmed_original.is_empty() {
929                    block_lines.push(trimmed_original.to_string());
930                    sanitized_block.push(trimmed.to_string());
931                }
932
933                let mut brace_balance: i32 = 0;
934                let mut body_started = false;
935                let mut j = idx;
936                let mut current_state = state_after_line;
937                let mut current_sanitized = sanitized_line;
938
939                loop {
940                    let trimmed_sanitized = current_sanitized.trim();
941                    let opens = current_sanitized.chars().filter(|c| *c == '{').count() as i32;
942                    let closes = current_sanitized.chars().filter(|c| *c == '}').count() as i32;
943                    brace_balance += opens;
944                    if brace_balance > 0 {
945                        body_started = true;
946                    }
947                    brace_balance -= closes;
948
949                    let body_done = if body_started && brace_balance <= 0 {
950                        true
951                    } else if !body_started && trimmed_sanitized.ends_with(';') {
952                        true
953                    } else {
954                        false
955                    };
956
957                    if body_done {
958                        j += 1;
959                        break;
960                    }
961
962                    j += 1;
963                    if j >= lines.len() {
964                        break;
965                    }
966
967                    let next_line = lines[j];
968                    let (next_sanitized, next_state) =
969                        strip_string_literals(current_state, next_line);
970                    current_state = next_state;
971
972                    let trimmed_original_next = next_line.trim();
973                    if !trimmed_original_next.is_empty() {
974                        block_lines.push(trimmed_original_next.to_string());
975                        sanitized_block.push(next_sanitized.trim().to_string());
976                    }
977
978                    current_sanitized = next_sanitized;
979                }
980
981                let signature_line = block_lines
982                    .iter()
983                    .find(|line| line.contains("fn "))
984                    .cloned()
985                    .unwrap_or_else(|| block_lines.first().cloned().unwrap_or_default());
986
987                let last_index = sanitized_block
988                    .iter()
989                    .rposition(|line| !line.trim().is_empty())
990                    .unwrap_or(0);
991
992                let pointer_lines: Vec<String> = block_lines
993                    .iter()
994                    .zip(sanitized_block.iter())
995                    .filter_map(|(line, sanitized)| {
996                        if Self::is_pointer_escape(sanitized) {
997                            Some(line.clone())
998                        } else {
999                            None
1000                        }
1001                    })
1002                    .collect();
1003
1004                let early_lines: Vec<(usize, String)> = sanitized_block
1005                    .iter()
1006                    .enumerate()
1007                    .filter_map(|(pos, sanitized)| {
1008                        if Self::captures_early_exit(sanitized, pos, last_index) {
1009                            Some((pos, block_lines[pos].clone()))
1010                        } else {
1011                            None
1012                        }
1013                    })
1014                    .collect();
1015
1016                if !pointer_lines.is_empty() && !early_lines.is_empty() {
1017                    let mut evidence = Vec::new();
1018                    let mut seen = HashSet::new();
1019
1020                    for line in pointer_lines
1021                        .iter()
1022                        .chain(early_lines.iter().map(|(_, l)| l))
1023                    {
1024                        if seen.insert(line.clone()) {
1025                            evidence.push(line.clone());
1026                        }
1027                    }
1028
1029                    let location = format!("{}:{}", rel_path, start_idx + 1);
1030                    findings.push(Finding {
1031                        rule_id: self.metadata.id.clone(),
1032                        rule_name: self.metadata.name.clone(),
1033                        severity: self.metadata.default_severity,
1034                        message: "Potential FFI buffer leak due to early return before cleanup"
1035                            .to_string(),
1036                        function: location,
1037                        function_signature: signature_line,
1038                        evidence,
1039                        span: None,
1040                        ..Default::default()
1041                    });
1042                }
1043
1044                pending_no_mangle = None;
1045                pending_extern = None;
1046                string_state = current_state;
1047                idx = j;
1048            }
1049        }
1050
1051        findings
1052    }
1053}
1054
1055// ============================================================================
1056// RUSTCOLA116: Panic in FFI Boundary Rule
1057// ============================================================================
1058
1059/// Detects potential panics in extern "C" functions which cause undefined behavior.
1060///
1061/// Unwinding across FFI boundaries (from Rust into C code) is undefined behavior.
1062/// This rule detects panic-prone operations inside `extern "C"` functions.
1063pub struct PanicInFfiBoundaryRule {
1064    metadata: RuleMetadata,
1065}
1066
1067impl PanicInFfiBoundaryRule {
1068    pub fn new() -> Self {
1069        Self {
1070            metadata: RuleMetadata {
1071                id: "RUSTCOLA116".to_string(),
1072                name: "panic-in-ffi-boundary".to_string(),
1073                short_description: "Potential panic in extern \"C\" function".to_string(),
1074                full_description: "Detects potential panics in extern \"C\" functions. Unwinding \
1075                    across FFI boundaries is undefined behavior in Rust. Operations like unwrap(), \
1076                    expect(), panic!(), assert!(), and indexing can all panic. Use catch_unwind \
1077                    or return error codes instead."
1078                    .to_string(),
1079                help_uri: Some(
1080                    "https://doc.rust-lang.org/nomicon/ffi.html#ffi-and-panics".to_string(),
1081                ),
1082                default_severity: Severity::High,
1083                origin: RuleOrigin::BuiltIn,
1084                cwe_ids: Vec::new(),
1085                fix_suggestion: None,
1086                exploitability: Exploitability::default(),
1087            },
1088        }
1089    }
1090
1091    /// Patterns that can cause panics
1092    fn panic_patterns() -> &'static [(&'static str, &'static str)] {
1093        &[
1094            (".unwrap()", "unwrap() can panic on None/Err"),
1095            (".expect(", "expect() can panic on None/Err"),
1096            ("panic!", "explicit panic"),
1097            ("unreachable!", "unreachable! panics if reached"),
1098            ("unimplemented!", "unimplemented! always panics"),
1099            ("todo!", "todo! always panics"),
1100            ("assert!", "assert! panics on false"),
1101            ("assert_eq!", "assert_eq! panics on mismatch"),
1102            ("assert_ne!", "assert_ne! panics on match"),
1103            ("debug_assert!", "debug_assert! panics in debug builds"),
1104            ("[", "array/slice indexing can panic on out-of-bounds"),
1105        ]
1106    }
1107}
1108
1109impl Rule for PanicInFfiBoundaryRule {
1110    fn metadata(&self) -> &RuleMetadata {
1111        &self.metadata
1112    }
1113
1114    fn evaluate(
1115        &self,
1116        package: &MirPackage,
1117        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1118    ) -> Vec<Finding> {
1119        if package.crate_name == "mir-extractor" {
1120            return Vec::new();
1121        }
1122
1123        let mut findings = Vec::new();
1124        let crate_root = Path::new(&package.crate_root);
1125
1126        if !crate_root.exists() {
1127            return findings;
1128        }
1129
1130        for entry in WalkDir::new(crate_root)
1131            .into_iter()
1132            .filter_entry(|e| filter_entry(e))
1133        {
1134            let entry = match entry {
1135                Ok(e) => e,
1136                Err(_) => continue,
1137            };
1138
1139            if !entry.file_type().is_file() {
1140                continue;
1141            }
1142
1143            let path = entry.path();
1144            if path.extension() != Some(OsStr::new("rs")) {
1145                continue;
1146            }
1147
1148            let rel_path = path
1149                .strip_prefix(crate_root)
1150                .unwrap_or(path)
1151                .to_string_lossy()
1152                .replace('\\', "/");
1153
1154            let content = match fs::read_to_string(path) {
1155                Ok(c) => c,
1156                Err(_) => continue,
1157            };
1158
1159            let lines: Vec<&str> = content.lines().collect();
1160            let mut in_extern_c_fn = false;
1161            let mut extern_fn_start = 0;
1162            let mut extern_fn_name = String::new();
1163            let mut brace_depth = 0;
1164
1165            for (idx, line) in lines.iter().enumerate() {
1166                let trimmed = line.trim();
1167
1168                // Skip comments
1169                if trimmed.starts_with("//") {
1170                    continue;
1171                }
1172
1173                // Detect extern "C" fn or #[no_mangle] pub extern "C" fn
1174                if (trimmed.contains("extern \"C\"") || trimmed.contains("extern \"system\""))
1175                    && trimmed.contains("fn ")
1176                {
1177                    in_extern_c_fn = true;
1178                    extern_fn_start = idx;
1179                    brace_depth = 0;
1180
1181                    // Extract function name
1182                    if let Some(fn_pos) = trimmed.find("fn ") {
1183                        let after_fn = &trimmed[fn_pos + 3..];
1184                        extern_fn_name = after_fn
1185                            .split(|c: char| c == '(' || c == '<' || c.is_whitespace())
1186                            .next()
1187                            .unwrap_or("")
1188                            .to_string();
1189                    }
1190                }
1191
1192                if in_extern_c_fn {
1193                    brace_depth += trimmed.chars().filter(|&c| c == '{').count() as i32;
1194                    brace_depth -= trimmed.chars().filter(|&c| c == '}').count() as i32;
1195
1196                    // Check for panic-prone patterns
1197                    for (pattern, reason) in Self::panic_patterns() {
1198                        // Special handling for indexing - only flag if it looks like array access
1199                        if *pattern == "[" {
1200                            // Look for variable[index] pattern but not slice declarations
1201                            if trimmed.contains('[')
1202                                && trimmed.contains(']')
1203                                && !trimmed.contains("&[")
1204                                && !trimmed.contains(": [")
1205                                && !trimmed.contains("-> [")
1206                                && !trimmed.starts_with("let ")
1207                                && !trimmed.starts_with("const ")
1208                                && !trimmed.starts_with("static ")
1209                            {
1210                                // Check if it's an actual indexing operation
1211                                let has_index_op = trimmed
1212                                    .chars()
1213                                    .zip(trimmed.chars().skip(1))
1214                                    .any(|(a, b)| a.is_alphanumeric() && b == '[');
1215
1216                                if has_index_op {
1217                                    let location = format!("{}:{}", rel_path, idx + 1);
1218
1219                                    findings.push(Finding {
1220                                        rule_id: self.metadata.id.clone(),
1221                                        rule_name: self.metadata.name.clone(),
1222                                        severity: Severity::Medium, // Lower for indexing
1223                                        message: format!(
1224                                            "Potential panic in extern \"C\" fn `{}`: {}. \
1225                                            Consider using .get() with bounds checking.",
1226                                            extern_fn_name, reason
1227                                        ),
1228                                        function: location,
1229                                        function_signature: String::new(),
1230                                        evidence: vec![trimmed.to_string()],
1231                                        span: None,
1232                                        ..Default::default()
1233                                    });
1234                                }
1235                            }
1236                        } else if trimmed.contains(pattern) {
1237                            let location = format!("{}:{}", rel_path, idx + 1);
1238
1239                            findings.push(Finding {
1240                                rule_id: self.metadata.id.clone(),
1241                                rule_name: self.metadata.name.clone(),
1242                                severity: self.metadata.default_severity,
1243                                message: format!(
1244                                    "Potential panic in extern \"C\" fn `{}`: {}. \
1245                                    Unwinding across FFI boundaries is undefined behavior. \
1246                                    Use catch_unwind or return error codes.",
1247                                    extern_fn_name, reason
1248                                ),
1249                                function: location,
1250                                function_signature: String::new(),
1251                                evidence: vec![trimmed.to_string()],
1252                                span: None,
1253                                ..Default::default()
1254                            });
1255                        }
1256                    }
1257
1258                    // End of function
1259                    if brace_depth <= 0 && idx > extern_fn_start {
1260                        in_extern_c_fn = false;
1261                    }
1262                }
1263            }
1264        }
1265
1266        findings
1267    }
1268}
1269
1270// ============================================================================
1271// RUSTCOLA107: Embedded Interpreter Usage Rule
1272// ============================================================================
1273
1274/// Detects usage of embedded interpreters which create code injection attack surfaces.
1275///
1276/// Embedded interpreters like PyO3, rlua, v8, deno_core can execute arbitrary code
1277/// if not properly sandboxed. This rule flags their usage for security review.
1278pub struct EmbeddedInterpreterUsageRule {
1279    metadata: RuleMetadata,
1280}
1281
1282impl EmbeddedInterpreterUsageRule {
1283    pub fn new() -> Self {
1284        Self {
1285            metadata: RuleMetadata {
1286                id: "RUSTCOLA107".to_string(),
1287                name: "embedded-interpreter-usage".to_string(),
1288                short_description: "Embedded interpreter creates code injection surface"
1289                    .to_string(),
1290                full_description: "Detects usage of embedded interpreters like PyO3 (Python), \
1291                    rlua/mlua (Lua), rusty_v8/deno_core (JavaScript). These create potential \
1292                    code injection attack surfaces if user input reaches the interpreter. \
1293                    Ensure proper sandboxing and input validation."
1294                    .to_string(),
1295                help_uri: None,
1296                default_severity: Severity::Medium,
1297                origin: RuleOrigin::BuiltIn,
1298                cwe_ids: Vec::new(),
1299                fix_suggestion: None,
1300                exploitability: Exploitability::default(),
1301            },
1302        }
1303    }
1304
1305    /// Interpreter crates and their initialization patterns
1306    fn interpreter_patterns() -> &'static [(&'static str, &'static str, &'static str)] {
1307        &[
1308            ("pyo3", "Python::with_gil", "Python interpreter (PyO3)"),
1309            ("pyo3", "Python::acquire_gil", "Python interpreter (PyO3)"),
1310            (
1311                "pyo3",
1312                "prepare_freethreaded_python",
1313                "Python interpreter (PyO3)",
1314            ),
1315            ("rlua", "Lua::new", "Lua interpreter (rlua)"),
1316            ("mlua", "Lua::new", "Lua interpreter (mlua)"),
1317            ("rusty_v8", "v8::Isolate", "V8 JavaScript engine"),
1318            ("deno_core", "JsRuntime::new", "Deno JavaScript runtime"),
1319            ("rhai", "Engine::new", "Rhai scripting engine"),
1320            ("rquickjs", "Context::new", "QuickJS runtime"),
1321            ("wasmer", "Instance::new", "WebAssembly runtime (Wasmer)"),
1322            (
1323                "wasmtime",
1324                "Instance::new",
1325                "WebAssembly runtime (Wasmtime)",
1326            ),
1327        ]
1328    }
1329}
1330
1331impl Rule for EmbeddedInterpreterUsageRule {
1332    fn metadata(&self) -> &RuleMetadata {
1333        &self.metadata
1334    }
1335
1336    fn evaluate(
1337        &self,
1338        package: &MirPackage,
1339        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1340    ) -> Vec<Finding> {
1341        if package.crate_name == "mir-extractor" {
1342            return Vec::new();
1343        }
1344
1345        let mut findings = Vec::new();
1346        let crate_root = Path::new(&package.crate_root);
1347
1348        if !crate_root.exists() {
1349            return findings;
1350        }
1351
1352        for entry in WalkDir::new(crate_root)
1353            .into_iter()
1354            .filter_entry(|e| filter_entry(e))
1355        {
1356            let entry = match entry {
1357                Ok(e) => e,
1358                Err(_) => continue,
1359            };
1360
1361            if !entry.file_type().is_file() {
1362                continue;
1363            }
1364
1365            let path = entry.path();
1366            if path.extension() != Some(OsStr::new("rs")) {
1367                continue;
1368            }
1369
1370            let rel_path = path
1371                .strip_prefix(crate_root)
1372                .unwrap_or(path)
1373                .to_string_lossy()
1374                .replace('\\', "/");
1375
1376            let content = match fs::read_to_string(path) {
1377                Ok(c) => c,
1378                Err(_) => continue,
1379            };
1380
1381            // Quick check: does file use any interpreter crate?
1382            let mut relevant_crates: Vec<&str> = Vec::new();
1383            for (crate_name, _, _) in Self::interpreter_patterns() {
1384                if content.contains(crate_name) && !relevant_crates.contains(crate_name) {
1385                    relevant_crates.push(crate_name);
1386                }
1387            }
1388
1389            if relevant_crates.is_empty() {
1390                continue;
1391            }
1392
1393            let lines: Vec<&str> = content.lines().collect();
1394
1395            for (idx, line) in lines.iter().enumerate() {
1396                let trimmed = line.trim();
1397
1398                // Skip comments
1399                if trimmed.starts_with("//") {
1400                    continue;
1401                }
1402
1403                for (crate_name, pattern, description) in Self::interpreter_patterns() {
1404                    if relevant_crates.contains(crate_name) && trimmed.contains(pattern) {
1405                        let location = format!("{}:{}", rel_path, idx + 1);
1406
1407                        findings.push(Finding {
1408                            rule_id: self.metadata.id.clone(),
1409                            rule_name: self.metadata.name.clone(),
1410                            severity: self.metadata.default_severity,
1411                            message: format!(
1412                                "{} detected. Embedded interpreters can execute arbitrary code. \
1413                                Ensure user input is validated before evaluation and consider \
1414                                sandboxing the interpreter context.",
1415                                description
1416                            ),
1417                            function: location,
1418                            function_signature: String::new(),
1419                            evidence: vec![trimmed.to_string()],
1420                            span: None,
1421                            ..Default::default()
1422                        });
1423                    }
1424                }
1425            }
1426        }
1427
1428        findings
1429    }
1430}
1431
1432// ============================================================================
1433// RUSTCOLA103: WASM Linear Memory OOB Rule
1434// ============================================================================
1435
1436/// Detects patterns in WASM-targeted code that may cause linear memory
1437/// out-of-bounds access.
1438///
1439/// In WebAssembly, memory is a contiguous linear array. Unchecked pointer
1440/// arithmetic or slice creation from raw pointers can access arbitrary memory.
1441pub struct WasmLinearMemoryOobRule {
1442    metadata: RuleMetadata,
1443}
1444
1445impl WasmLinearMemoryOobRule {
1446    pub fn new() -> Self {
1447        Self {
1448            metadata: RuleMetadata {
1449                id: "RUSTCOLA103".to_string(),
1450                name: "wasm-linear-memory-oob".to_string(),
1451                short_description: "WASM linear memory out-of-bounds risk".to_string(),
1452                full_description: "Detects patterns in WASM-targeted code that may allow \
1453                    out-of-bounds access to linear memory. In WASM, memory is a contiguous \
1454                    array and unchecked pointer operations can access arbitrary memory. \
1455                    Use bounds checking or safe abstractions like wasm-bindgen."
1456                    .to_string(),
1457                help_uri: Some("https://webassembly.org/docs/security/".to_string()),
1458                default_severity: Severity::High,
1459                origin: RuleOrigin::BuiltIn,
1460                cwe_ids: Vec::new(),
1461                fix_suggestion: None,
1462                exploitability: Exploitability::default(),
1463            },
1464        }
1465    }
1466
1467    /// Patterns indicating WASM memory operations
1468    fn wasm_memory_patterns() -> &'static [(&'static str, &'static str)] {
1469        &[
1470            // Raw pointer operations in WASM exports
1471            (
1472                "slice::from_raw_parts",
1473                "Creating slice from raw pointer without bounds check",
1474            ),
1475            (
1476                "slice::from_raw_parts_mut",
1477                "Creating mutable slice from raw pointer without bounds check",
1478            ),
1479            (
1480                "std::ptr::read",
1481                "Reading from raw pointer without bounds check",
1482            ),
1483            (
1484                "std::ptr::write",
1485                "Writing to raw pointer without bounds check",
1486            ),
1487            ("ptr::read", "Reading from raw pointer"),
1488            ("ptr::write", "Writing to raw pointer"),
1489            ("ptr::copy", "Copying via raw pointer"),
1490            ("ptr::copy_nonoverlapping", "Copying via raw pointer"),
1491            // Pointer arithmetic
1492            (".offset(", "Pointer offset without bounds validation"),
1493            (".add(", "Pointer addition without bounds validation"),
1494            (".sub(", "Pointer subtraction without bounds validation"),
1495        ]
1496    }
1497
1498    /// WASM-specific attributes and patterns
1499    fn wasm_export_indicators() -> &'static [&'static str] {
1500        &[
1501            "#[no_mangle]",
1502            "#[wasm_bindgen]",
1503            "extern \"C\"",
1504            "#[export_name",
1505        ]
1506    }
1507}
1508
1509impl Rule for WasmLinearMemoryOobRule {
1510    fn metadata(&self) -> &RuleMetadata {
1511        &self.metadata
1512    }
1513
1514    fn evaluate(
1515        &self,
1516        package: &MirPackage,
1517        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1518    ) -> Vec<Finding> {
1519        let mut findings = Vec::new();
1520        let crate_root = Path::new(&package.crate_root);
1521
1522        if !crate_root.exists() {
1523            return findings;
1524        }
1525
1526        for entry in WalkDir::new(crate_root)
1527            .into_iter()
1528            .filter_entry(|e| filter_entry(e))
1529        {
1530            let entry = match entry {
1531                Ok(e) => e,
1532                Err(_) => continue,
1533            };
1534
1535            if !entry.file_type().is_file() {
1536                continue;
1537            }
1538
1539            let path = entry.path();
1540            if path.extension() != Some(OsStr::new("rs")) {
1541                continue;
1542            }
1543
1544            let rel_path = path
1545                .strip_prefix(crate_root)
1546                .unwrap_or(path)
1547                .to_string_lossy()
1548                .replace('\\', "/");
1549
1550            let content = match fs::read_to_string(path) {
1551                Ok(c) => c,
1552                Err(_) => continue,
1553            };
1554
1555            // Quick check: is this likely WASM code?
1556            let is_wasm_target = content.contains("wasm_bindgen")
1557                || content.contains("wasm32")
1558                || content.contains("#[no_mangle]")
1559                || package.crate_name.contains("wasm");
1560
1561            if !is_wasm_target {
1562                continue;
1563            }
1564
1565            let lines: Vec<&str> = content.lines().collect();
1566            let mut in_wasm_export = false;
1567            let mut export_fn_name = String::new();
1568
1569            for (idx, line) in lines.iter().enumerate() {
1570                let trimmed = line.trim();
1571
1572                // Skip comments
1573                if trimmed.starts_with("//") {
1574                    continue;
1575                }
1576
1577                // Track WASM export functions
1578                for indicator in Self::wasm_export_indicators() {
1579                    if trimmed.contains(indicator) {
1580                        in_wasm_export = true;
1581                    }
1582                }
1583
1584                // Extract function name if we're at a function definition
1585                if in_wasm_export
1586                    && (trimmed.starts_with("pub fn ")
1587                        || trimmed.starts_with("pub unsafe fn ")
1588                        || trimmed.starts_with("fn ")
1589                        || trimmed.starts_with("unsafe fn "))
1590                {
1591                    if let Some(fn_pos) = trimmed.find("fn ") {
1592                        let after_fn = &trimmed[fn_pos + 3..];
1593                        export_fn_name = after_fn
1594                            .split(|c| c == '(' || c == '<')
1595                            .next()
1596                            .unwrap_or("")
1597                            .trim()
1598                            .to_string();
1599                    }
1600                }
1601
1602                // Reset on function end (simplified)
1603                if trimmed == "}" && in_wasm_export && !export_fn_name.is_empty() {
1604                    // Could track brace depth for accuracy
1605                }
1606
1607                // Check for dangerous memory patterns in WASM exports
1608                if in_wasm_export {
1609                    for (pattern, description) in Self::wasm_memory_patterns() {
1610                        if trimmed.contains(pattern) {
1611                            // Check if there's bounds checking nearby
1612                            let has_bounds_check = lines
1613                                [idx.saturating_sub(3)..=(idx + 1).min(lines.len() - 1)]
1614                                .iter()
1615                                .any(|l| {
1616                                    l.contains("if ")
1617                                        && (l.contains(" < ")
1618                                            || l.contains(" <= ")
1619                                            || l.contains(".len()")
1620                                            || l.contains("bounds"))
1621                                });
1622
1623                            if !has_bounds_check {
1624                                let location = format!("{}:{}", rel_path, idx + 1);
1625
1626                                findings.push(Finding {
1627                                    rule_id: self.metadata.id.clone(),
1628                                    rule_name: self.metadata.name.clone(),
1629                                    severity: self.metadata.default_severity,
1630                                    message: format!(
1631                                        "Potential WASM linear memory OOB in export '{}': {}. \
1632                                        In WebAssembly, this can access arbitrary memory. \
1633                                        Add bounds checking or use wasm-bindgen's safe abstractions.",
1634                                        export_fn_name, description
1635                                    ),
1636                                    function: location,
1637                                    function_signature: String::new(),
1638                                    evidence: vec![trimmed.to_string()],
1639                                    span: None,
1640                    ..Default::default()
1641                                });
1642                            }
1643                        }
1644                    }
1645                }
1646            }
1647        }
1648
1649        findings
1650    }
1651}
1652
1653// ============================================================================
1654// RUSTCOLA126: WASM Host Function Trust Assumptions
1655// ============================================================================
1656
1657/// Detects untrusted data from WASM host functions used without validation.
1658/// Host-provided data in WebAssembly should be treated as untrusted input.
1659pub struct WasmHostFunctionTrustRule {
1660    metadata: RuleMetadata,
1661}
1662
1663impl WasmHostFunctionTrustRule {
1664    pub fn new() -> Self {
1665        Self {
1666            metadata: RuleMetadata {
1667                id: "RUSTCOLA126".to_string(),
1668                name: "wasm-host-function-trust".to_string(),
1669                short_description: "Untrusted data from WASM host functions".to_string(),
1670                full_description: "Detects patterns where data received from WebAssembly host functions \
1671                    (wasmtime, wasmer, wasm-bindgen imports) is used without validation. Host-provided \
1672                    data should be treated as untrusted input since the host environment may be compromised \
1673                    or malicious.".to_string(),
1674                help_uri: Some("https://docs.rs/wasmtime/latest/wasmtime/".to_string()),
1675                default_severity: Severity::Medium,
1676                origin: RuleOrigin::BuiltIn,
1677                cwe_ids: Vec::new(),
1678                fix_suggestion: None,
1679                exploitability: Exploitability::default(),
1680            },
1681        }
1682    }
1683
1684    fn host_import_patterns() -> Vec<(&'static str, &'static str)> {
1685        vec![
1686            ("extern \"C\"", "C FFI import - host-provided function"),
1687            ("#[wasm_bindgen]", "wasm-bindgen import from host"),
1688            ("import_func!", "wasmer import macro"),
1689            ("Func::wrap", "wasmtime host function wrap"),
1690            ("Linker::func_wrap", "wasmtime linker import"),
1691            ("imports!", "wasmer imports macro"),
1692            ("Instance::new", "WASM instance with imports"),
1693        ]
1694    }
1695
1696    fn dangerous_usages() -> Vec<&'static str> {
1697        vec![
1698            "from_raw_parts",
1699            "from_utf8_unchecked",
1700            "transmute",
1701            "as_ptr",
1702            "offset(",
1703            "add(",
1704            "slice::from_raw_parts",
1705            "str::from_utf8_unchecked",
1706            "CStr::from_ptr",
1707        ]
1708    }
1709}
1710
1711impl Rule for WasmHostFunctionTrustRule {
1712    fn metadata(&self) -> &RuleMetadata {
1713        &self.metadata
1714    }
1715
1716    fn evaluate(
1717        &self,
1718        package: &MirPackage,
1719        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1720    ) -> Vec<Finding> {
1721        let mut findings = Vec::new();
1722        let crate_root = Path::new(&package.crate_root);
1723
1724        // Skip if not WASM-related crate
1725        if !package.crate_name.contains("wasm")
1726            && !package.crate_name.contains("plugin")
1727            && !package.crate_name.contains("runtime")
1728        {
1729            // Still scan but don't require wasm in name
1730        }
1731
1732        for entry in walkdir::WalkDir::new(crate_root)
1733            .into_iter()
1734            .filter_map(Result::ok)
1735            .filter(|e| e.file_type().is_file())
1736        {
1737            let path = entry.path();
1738            if path.extension() != Some(OsStr::new("rs")) {
1739                continue;
1740            }
1741
1742            let rel_path = path
1743                .strip_prefix(crate_root)
1744                .unwrap_or(path)
1745                .to_string_lossy()
1746                .replace('\\', "/");
1747
1748            let content = match fs::read_to_string(path) {
1749                Ok(c) => c,
1750                Err(_) => continue,
1751            };
1752
1753            // Check for WASM host function patterns
1754            let has_wasm_imports = Self::host_import_patterns()
1755                .iter()
1756                .any(|(p, _)| content.contains(p));
1757
1758            if !has_wasm_imports {
1759                continue;
1760            }
1761
1762            let lines: Vec<&str> = content.lines().collect();
1763
1764            for (idx, line) in lines.iter().enumerate() {
1765                let trimmed = line.trim();
1766
1767                // Skip comments
1768                if trimmed.starts_with("//") || trimmed.starts_with("/*") {
1769                    continue;
1770                }
1771
1772                // Look for dangerous usage of potentially host-provided data
1773                for dangerous in Self::dangerous_usages() {
1774                    if trimmed.contains(dangerous) {
1775                        // Check context for host function calls nearby
1776                        let context_start = idx.saturating_sub(10);
1777                        let context_end = (idx + 5).min(lines.len());
1778                        let context = &lines[context_start..context_end];
1779
1780                        let has_host_import = Self::host_import_patterns()
1781                            .iter()
1782                            .any(|(p, _)| context.iter().any(|l| l.contains(p)));
1783
1784                        // Check if there's validation
1785                        let has_validation = context.iter().any(|l| {
1786                            l.contains("if ")
1787                                || l.contains("match ")
1788                                || l.contains("validate")
1789                                || l.contains("check")
1790                                || l.contains(".is_ok()")
1791                                || l.contains(".is_err()")
1792                                || l.contains("?.")
1793                                || l.contains("try!")
1794                        });
1795
1796                        if has_host_import && !has_validation {
1797                            let location = format!("{}:{}", rel_path, idx + 1);
1798
1799                            findings.push(Finding {
1800                                rule_id: self.metadata.id.clone(),
1801                                rule_name: self.metadata.name.clone(),
1802                                severity: self.metadata.default_severity,
1803                                message: format!(
1804                                    "Potentially untrusted host data used in '{}' without validation. \
1805                                    Data from WASM host functions should be validated before use in \
1806                                    unsafe operations.",
1807                                    dangerous
1808                                ),
1809                                function: location,
1810                                function_signature: String::new(),
1811                                evidence: vec![trimmed.to_string()],
1812                                span: None,
1813                    ..Default::default()
1814                            });
1815                        }
1816                    }
1817                }
1818            }
1819        }
1820
1821        findings
1822    }
1823}
1824
1825// ============================================================================
1826// RUSTCOLA127: WASM Component Model Capability Leaks
1827// ============================================================================
1828
1829/// Detects capability leaks in WASM component model exports.
1830/// Resources and capabilities should not leak to untrusted guests.
1831pub struct WasmCapabilityLeakRule {
1832    metadata: RuleMetadata,
1833}
1834
1835impl WasmCapabilityLeakRule {
1836    pub fn new() -> Self {
1837        Self {
1838            metadata: RuleMetadata {
1839                id: "RUSTCOLA127".to_string(),
1840                name: "wasm-capability-leak".to_string(),
1841                short_description: "WASM component model capability leak".to_string(),
1842                full_description:
1843                    "Detects patterns where sensitive capabilities (filesystem access, \
1844                    network sockets, environment variables) may leak to WebAssembly guest modules \
1845                    through component model exports or WASI permissions. Apply principle of least \
1846                    privilege to guest capabilities."
1847                        .to_string(),
1848                help_uri: Some("https://component-model.bytecodealliance.org/".to_string()),
1849                default_severity: Severity::High,
1850                origin: RuleOrigin::BuiltIn,
1851                cwe_ids: Vec::new(),
1852                fix_suggestion: None,
1853                exploitability: Exploitability::default(),
1854            },
1855        }
1856    }
1857
1858    fn capability_patterns() -> Vec<(&'static str, &'static str)> {
1859        vec![
1860            (
1861                "WasiCtxBuilder::new().inherit_stdio()",
1862                "Inherits all stdio - may leak sensitive output",
1863            ),
1864            (
1865                "inherit_env()",
1866                "Inherits environment variables - may leak secrets",
1867            ),
1868            (
1869                "inherit_network()",
1870                "Inherits network access - may allow exfiltration",
1871            ),
1872            (
1873                "inherit_args()",
1874                "Inherits command line args - may leak secrets",
1875            ),
1876            (
1877                "preopened_dir",
1878                "Preopen directory access - verify scope is minimal",
1879            ),
1880            (
1881                "allow_ip_name_lookup",
1882                "Allows DNS lookups - potential for exfiltration",
1883            ),
1884            ("allow_udp", "Allows UDP sockets"),
1885            ("allow_tcp", "Allows TCP connections"),
1886            (
1887                ".ctx_builder().build()",
1888                "Check WasiCtx configuration for minimal privileges",
1889            ),
1890        ]
1891    }
1892
1893    fn sensitive_exports() -> Vec<&'static str> {
1894        vec![
1895            "std::fs::",
1896            "std::net::",
1897            "std::process::",
1898            "std::env::",
1899            "tokio::fs::",
1900            "tokio::net::",
1901            "async_std::fs::",
1902            "async_std::net::",
1903        ]
1904    }
1905}
1906
1907impl Rule for WasmCapabilityLeakRule {
1908    fn metadata(&self) -> &RuleMetadata {
1909        &self.metadata
1910    }
1911
1912    fn evaluate(
1913        &self,
1914        package: &MirPackage,
1915        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1916    ) -> Vec<Finding> {
1917        let mut findings = Vec::new();
1918        let crate_root = Path::new(&package.crate_root);
1919
1920        for entry in walkdir::WalkDir::new(crate_root)
1921            .into_iter()
1922            .filter_map(Result::ok)
1923            .filter(|e| e.file_type().is_file())
1924        {
1925            let path = entry.path();
1926            if path.extension() != Some(OsStr::new("rs")) {
1927                continue;
1928            }
1929
1930            let rel_path = path
1931                .strip_prefix(crate_root)
1932                .unwrap_or(path)
1933                .to_string_lossy()
1934                .replace('\\', "/");
1935
1936            let content = match fs::read_to_string(path) {
1937                Ok(c) => c,
1938                Err(_) => continue,
1939            };
1940
1941            // Check for WASM runtime usage
1942            let is_wasm_host = content.contains("wasmtime")
1943                || content.contains("wasmer")
1944                || content.contains("WasiCtx")
1945                || content.contains("wasi_common");
1946
1947            if !is_wasm_host {
1948                continue;
1949            }
1950
1951            let lines: Vec<&str> = content.lines().collect();
1952
1953            for (idx, line) in lines.iter().enumerate() {
1954                let trimmed = line.trim();
1955
1956                // Skip comments
1957                if trimmed.starts_with("//") || trimmed.starts_with("/*") {
1958                    continue;
1959                }
1960
1961                // Check for overly permissive capability patterns
1962                for (pattern, description) in Self::capability_patterns() {
1963                    if trimmed.contains(pattern) {
1964                        let location = format!("{}:{}", rel_path, idx + 1);
1965
1966                        findings.push(Finding {
1967                            rule_id: self.metadata.id.clone(),
1968                            rule_name: self.metadata.name.clone(),
1969                            severity: self.metadata.default_severity,
1970                            message: format!(
1971                                "Potential capability leak to WASM guest: {}. \
1972                                Apply principle of least privilege - only grant necessary capabilities.",
1973                                description
1974                            ),
1975                            function: location.clone(),
1976                            function_signature: String::new(),
1977                            evidence: vec![trimmed.to_string()],
1978                            span: None,
1979                    ..Default::default()
1980                        });
1981                    }
1982                }
1983
1984                // Check for exports of sensitive APIs
1985                for sensitive in Self::sensitive_exports() {
1986                    if trimmed.contains(sensitive)
1987                        && (trimmed.contains("Linker::")
1988                            || trimmed.contains("func_wrap")
1989                            || trimmed.contains("define(")
1990                            || trimmed.contains("export("))
1991                    {
1992                        let location = format!("{}:{}", rel_path, idx + 1);
1993
1994                        findings.push(Finding {
1995                            rule_id: self.metadata.id.clone(),
1996                            rule_name: self.metadata.name.clone(),
1997                            severity: Severity::High,
1998                            message: format!(
1999                                "Exporting sensitive capability '{}' to WASM guest. \
2000                                Verify this is intentional and properly sandboxed.",
2001                                sensitive
2002                            ),
2003                            function: location,
2004                            function_signature: String::new(),
2005                            evidence: vec![trimmed.to_string()],
2006                            span: None,
2007                            ..Default::default()
2008                        });
2009                    }
2010                }
2011            }
2012        }
2013
2014        findings
2015    }
2016}
2017
2018// ============================================================================
2019// Registration
2020// ============================================================================
2021
2022/// Register all FFI rules with the rule engine.
2023pub fn register_ffi_rules(engine: &mut crate::RuleEngine) {
2024    engine.register_rule(Box::new(AllocatorMismatchFfiRule::new()));
2025    engine.register_rule(Box::new(UnsafeFfiPointerReturnRule::new()));
2026    engine.register_rule(Box::new(PackedFieldReferenceRule::new()));
2027    engine.register_rule(Box::new(UnsafeCStringPointerRule::new()));
2028    engine.register_rule(Box::new(CtorDtorStdApiRule::new()));
2029    engine.register_rule(Box::new(FfiBufferLeakRule::new()));
2030    engine.register_rule(Box::new(PanicInFfiBoundaryRule::new()));
2031    engine.register_rule(Box::new(EmbeddedInterpreterUsageRule::new()));
2032    engine.register_rule(Box::new(WasmLinearMemoryOobRule::new()));
2033    engine.register_rule(Box::new(WasmHostFunctionTrustRule::new()));
2034    engine.register_rule(Box::new(WasmCapabilityLeakRule::new()));
2035}