1use super::filter_entry;
11use super::utils::{strip_string_literals, StringLiteralState};
12use crate::{
13 Confidence, Exploitability, Finding, MirPackage, Rule, RuleMetadata, RuleOrigin, Severity,
14};
15use std::collections::{HashMap, HashSet};
16use std::ffi::OsStr;
17use std::fs;
18use std::path::Path;
19use walkdir::WalkDir;
20
21pub struct AllocatorMismatchFfiRule {
27 metadata: RuleMetadata,
28}
29
30impl AllocatorMismatchFfiRule {
31 pub fn new() -> Self {
32 Self {
33 metadata: RuleMetadata {
34 id: "RUSTCOLA017".to_string(),
35 name: "allocator-mismatch".to_string(),
36 short_description: "Mixed allocator/deallocator usage".to_string(),
37 full_description: "Detects functions that mix Rust and foreign allocation APIs, \
38 such as freeing Box/CString allocations with libc::free or wrapping \
39 libc::malloc pointers with Box::from_raw."
40 .to_string(),
41 help_uri: Some(
42 "https://doc.rust-lang.org/std/boxed/struct.Box.html#method.from_raw"
43 .to_string(),
44 ),
45 default_severity: Severity::High,
46 origin: RuleOrigin::BuiltIn,
47 cwe_ids: Vec::new(),
48 fix_suggestion: None,
49 exploitability: Exploitability::default(),
50 },
51 }
52 }
53}
54
55impl Rule for AllocatorMismatchFfiRule {
56 fn metadata(&self) -> &RuleMetadata {
57 &self.metadata
58 }
59
60 fn cache_key(&self) -> String {
61 format!("{}:v1", self.metadata.id)
62 }
63
64 fn evaluate(
65 &self,
66 package: &MirPackage,
67 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
68 ) -> Vec<Finding> {
69 let mut findings = Vec::new();
70
71 for function in &package.functions {
72 let mut rust_allocated_vars = Vec::new();
74
75 let mut c_allocated_vars = Vec::new();
77
78 let mut var_aliases: HashMap<String, String> = HashMap::new();
80
81 for (idx, line) in function.body.iter().enumerate() {
82 if (line.contains(" = copy ") || line.contains(" = move "))
84 && line.trim().starts_with('_')
85 {
86 let parts: Vec<&str> = line.split('=').collect();
87 if parts.len() >= 2 {
88 let lhs = parts[0].trim();
89 let rhs = parts[1].trim();
90 if let Some(src_var) = rhs.split_whitespace().nth(1) {
91 if src_var.starts_with('_') {
92 var_aliases.insert(lhs.to_string(), src_var.to_string());
93 }
94 }
95 }
96 }
97
98 if (line.contains("Box::") && line.contains("::into_raw")
100 || line.contains("CString::") && line.contains("::into_raw"))
101 && line.contains(" = ")
102 {
103 if let Some(var_name) = line.trim().split('=').next() {
104 let var = var_name.trim().to_string();
105 rust_allocated_vars.push((var.clone(), idx, line.trim().to_string()));
106 }
107 }
108
109 if (line.contains("malloc(")
111 || line.contains("calloc(")
112 || line.contains("realloc("))
113 && line.contains(" = ")
114 {
115 if let Some(var_name) = line.trim().split('=').next() {
116 let var = var_name.trim().to_string();
117 c_allocated_vars.push((var.clone(), idx, line.trim().to_string()));
118 }
119 }
120
121 if line.contains("free(") {
123 for (rust_var, alloc_idx, alloc_line) in &rust_allocated_vars {
124 let mut is_freed = line.contains(rust_var);
125
126 for (alias, original) in &var_aliases {
127 if original == rust_var && line.contains(alias) {
128 is_freed = true;
129 break;
130 }
131 }
132
133 if is_freed && idx > *alloc_idx && idx < alloc_idx + 50 {
134 findings.push(Finding {
135 rule_id: self.metadata.id.clone(),
136 rule_name: self.metadata.name.clone(),
137 severity: self.metadata.default_severity,
138 message: format!(
139 "Rust-allocated pointer freed with libc::free in `{}`",
140 function.name
141 ),
142 function: function.name.clone(),
143 function_signature: function.signature.clone(),
144 evidence: vec![
145 format!("Rust allocation: {}", alloc_line),
146 format!("C deallocation: {}", line.trim()),
147 ],
148 span: function.span.clone(),
149 confidence: Confidence::Medium,
150 cwe_ids: Vec::new(),
151 fix_suggestion: None,
152 code_snippet: None,
153 exploitability: Exploitability::default(),
154 exploitability_score: Exploitability::default().score(),
155 ..Default::default()
156 });
157 }
158 }
159 }
160
161 if line.contains("Box::") && line.contains("::from_raw(") {
163 for (c_var, alloc_idx, alloc_line) in &c_allocated_vars {
164 let mut is_converted = line.contains(c_var);
165
166 for (alias, original) in &var_aliases {
167 if original == c_var && line.contains(alias) {
168 is_converted = true;
169 break;
170 }
171 }
172
173 if is_converted && idx > *alloc_idx && idx < alloc_idx + 50 {
174 findings.push(Finding {
175 rule_id: self.metadata.id.clone(),
176 rule_name: self.metadata.name.clone(),
177 severity: self.metadata.default_severity,
178 message: format!(
179 "C-allocated pointer converted to Box::from_raw in `{}`",
180 function.name
181 ),
182 function: function.name.clone(),
183 function_signature: function.signature.clone(),
184 evidence: vec![
185 format!("C allocation: {}", alloc_line),
186 format!("Rust deallocation: {}", line.trim()),
187 ],
188 span: function.span.clone(),
189 confidence: Confidence::Medium,
190 cwe_ids: Vec::new(),
191 fix_suggestion: None,
192 code_snippet: None,
193 exploitability: Exploitability::default(),
194 exploitability_score: Exploitability::default().score(),
195 ..Default::default()
196 });
197 }
198 }
199 }
200
201 if line.contains("CString::") && line.contains("::from_raw(") {
203 for (c_var, alloc_idx, alloc_line) in &c_allocated_vars {
204 let mut is_converted = line.contains(c_var);
205
206 for (alias, original) in &var_aliases {
207 if original == c_var && line.contains(alias) {
208 is_converted = true;
209 break;
210 }
211 }
212
213 if is_converted && idx > *alloc_idx && idx < alloc_idx + 50 {
214 findings.push(Finding {
215 rule_id: self.metadata.id.clone(),
216 rule_name: self.metadata.name.clone(),
217 severity: self.metadata.default_severity,
218 message: format!(
219 "C-allocated pointer converted to CString::from_raw in `{}`",
220 function.name
221 ),
222 function: function.name.clone(),
223 function_signature: function.signature.clone(),
224 evidence: vec![
225 format!("C allocation: {}", alloc_line),
226 format!("Rust deallocation: {}", line.trim()),
227 ],
228 span: function.span.clone(),
229 confidence: Confidence::Medium,
230 cwe_ids: Vec::new(),
231 fix_suggestion: None,
232 code_snippet: None,
233 exploitability: Exploitability::default(),
234 exploitability_score: Exploitability::default().score(),
235 ..Default::default()
236 });
237 }
238 }
239 }
240 }
241 }
242
243 findings
244 }
245}
246
247pub struct UnsafeFfiPointerReturnRule {
253 metadata: RuleMetadata,
254}
255
256impl UnsafeFfiPointerReturnRule {
257 pub fn new() -> Self {
258 Self {
259 metadata: RuleMetadata {
260 id: "RUSTCOLA073".to_string(),
261 name: "unsafe-ffi-pointer-return".to_string(),
262 short_description: "FFI function returns raw pointer without safety invariants".to_string(),
263 full_description: "Detects extern \"C\" functions that return raw pointers (*const T or *mut T). \
264 These functions expose memory that must be managed correctly by callers, but the Rust \
265 type system cannot enforce this across FFI boundaries. Functions returning raw pointers \
266 should document ownership semantics (who frees the memory), lifetime requirements, \
267 and validity invariants.".to_string(),
268 help_uri: Some("https://doc.rust-lang.org/nomicon/ffi.html".to_string()),
269 default_severity: Severity::Medium,
270 origin: RuleOrigin::BuiltIn,
271 cwe_ids: Vec::new(),
272 fix_suggestion: None,
273 exploitability: Exploitability::default(),
274 },
275 }
276 }
277
278 fn is_ffi_returning_pointer(signature: &str, body: &[String]) -> Option<String> {
280 if !signature.contains("extern \"C\"") && !signature.contains("extern \"system\"") {
281 return None;
282 }
283
284 if let Some(arrow_pos) = signature.find("->") {
285 let return_type = signature[arrow_pos + 2..].trim();
286 if return_type.starts_with("*const") || return_type.starts_with("*mut") {
287 let has_safety_doc = body.iter().any(|line| {
288 let lower = line.to_lowercase();
289 lower.contains("safety:")
290 || lower.contains("# safety")
291 || lower.contains("invariant")
292 || lower.contains("ownership")
293 || lower.contains("caller must")
294 || lower.contains("must be freed")
295 });
296
297 if !has_safety_doc {
298 return Some(return_type.to_string());
299 }
300 }
301 }
302
303 None
304 }
305}
306
307impl Rule for UnsafeFfiPointerReturnRule {
308 fn metadata(&self) -> &RuleMetadata {
309 &self.metadata
310 }
311
312 fn evaluate(
313 &self,
314 package: &MirPackage,
315 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
316 ) -> Vec<Finding> {
317 let mut findings = Vec::new();
318
319 for function in &package.functions {
320 if let Some(return_type) =
321 Self::is_ffi_returning_pointer(&function.signature, &function.body)
322 {
323 findings.push(Finding {
324 rule_id: self.metadata.id.clone(),
325 rule_name: self.metadata.name.clone(),
326 severity: self.metadata.default_severity,
327 message: format!(
328 "extern \"C\" function `{}` returns raw pointer `{}` without documented safety invariants.",
329 function.name,
330 return_type
331 ),
332 function: function.name.clone(),
333 function_signature: function.signature.clone(),
334 evidence: vec![
335 format!("Returns: {}", return_type),
336 "No safety documentation found".to_string(),
337 ],
338 span: function.span.clone(),
339 confidence: Confidence::Medium,
340 cwe_ids: Vec::new(),
341 fix_suggestion: None,
342 code_snippet: None,
343 exploitability: Exploitability::default(),
344 exploitability_score: Exploitability::default().score(),
345 ..Default::default()
346 });
347 }
348 }
349
350 findings
351 }
352}
353
354pub struct PackedFieldReferenceRule {
360 metadata: RuleMetadata,
361}
362
363impl PackedFieldReferenceRule {
364 pub fn new() -> Self {
365 Self {
366 metadata: RuleMetadata {
367 id: "RUSTCOLA035".to_string(),
368 name: "repr-packed-field-reference".to_string(),
369 short_description: "Reference to packed struct field".to_string(),
370 full_description: "Detects taking references to fields of #[repr(packed)] structs. \
371 Creating references to packed struct fields creates unaligned references, which \
372 is undefined behavior in Rust. Use ptr::addr_of! or ptr::addr_of_mut! instead.".to_string(),
373 help_uri: Some("https://doc.rust-lang.org/nomicon/other-reprs.html#reprpacked".to_string()),
374 default_severity: Severity::High,
375 origin: RuleOrigin::BuiltIn,
376 cwe_ids: Vec::new(),
377 fix_suggestion: None,
378 exploitability: Exploitability::default(),
379 },
380 }
381 }
382}
383
384impl Rule for PackedFieldReferenceRule {
385 fn metadata(&self) -> &RuleMetadata {
386 &self.metadata
387 }
388
389 fn evaluate(
390 &self,
391 package: &MirPackage,
392 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
393 ) -> Vec<Finding> {
394 if package.crate_name == "mir-extractor" {
395 return Vec::new();
396 }
397
398 let mut findings = Vec::new();
399 let crate_root = Path::new(&package.crate_root);
400
401 if !crate_root.exists() {
402 return findings;
403 }
404
405 let mut packed_structs = HashSet::new();
407
408 for entry in WalkDir::new(crate_root)
409 .into_iter()
410 .filter_entry(|e| filter_entry(e))
411 {
412 let entry = match entry {
413 Ok(e) => e,
414 Err(_) => continue,
415 };
416
417 if !entry.file_type().is_file() {
418 continue;
419 }
420
421 let path = entry.path();
422 if path.extension() != Some(OsStr::new("rs")) {
423 continue;
424 }
425
426 let content = match fs::read_to_string(path) {
427 Ok(c) => c,
428 Err(_) => continue,
429 };
430
431 let lines: Vec<&str> = content.lines().collect();
432
433 for (idx, line) in lines.iter().enumerate() {
434 let trimmed = line.trim();
435
436 if trimmed.starts_with("#[repr(packed") {
437 for j in (idx + 1).min(lines.len())..lines.len() {
438 let struct_line = lines[j].trim();
439 if struct_line.starts_with("struct ")
440 || struct_line.starts_with("pub struct ")
441 {
442 let after_struct = if struct_line.starts_with("pub struct ") {
443 &struct_line[11..]
444 } else {
445 &struct_line[7..]
446 };
447
448 if let Some(name_end) =
449 after_struct.find(|c: char| !c.is_alphanumeric() && c != '_')
450 {
451 let struct_name = &after_struct[..name_end];
452 packed_structs.insert(struct_name.to_string());
453 }
454 break;
455 }
456 }
457 }
458 }
459 }
460
461 for entry in WalkDir::new(crate_root)
463 .into_iter()
464 .filter_entry(|e| filter_entry(e))
465 {
466 let entry = match entry {
467 Ok(e) => e,
468 Err(_) => continue,
469 };
470
471 if !entry.file_type().is_file() {
472 continue;
473 }
474
475 let path = entry.path();
476 if path.extension() != Some(OsStr::new("rs")) {
477 continue;
478 }
479
480 let rel_path = path
481 .strip_prefix(crate_root)
482 .unwrap_or(path)
483 .to_string_lossy()
484 .replace('\\', "/");
485
486 let content = match fs::read_to_string(path) {
487 Ok(c) => c,
488 Err(_) => continue,
489 };
490
491 let lines: Vec<&str> = content.lines().collect();
492
493 for (idx, line) in lines.iter().enumerate() {
494 let trimmed = line.trim();
495
496 for struct_name in &packed_structs {
497 if (trimmed.contains(&format!("&{}", struct_name.to_lowercase()))
498 || trimmed.contains(&format!("&mut {}", struct_name.to_lowercase()))
499 || trimmed.contains("&self.")
500 || trimmed.contains("&mut self."))
501 && trimmed.contains('.')
502 && !trimmed.contains("ptr::addr_of")
503 {
504 let location = format!("{}:{}", rel_path, idx + 1);
505
506 findings.push(Finding {
507 rule_id: self.metadata.id.clone(),
508 rule_name: self.metadata.name.clone(),
509 severity: self.metadata.default_severity,
510 message: format!(
511 "Potential reference to packed struct field (possibly {})",
512 struct_name
513 ),
514 function: location.clone(),
515 function_signature: String::new(),
516 evidence: vec![trimmed.to_string()],
517 span: None,
518 ..Default::default()
519 });
520 }
521 }
522 }
523 }
524
525 findings
526 }
527}
528
529pub struct UnsafeCStringPointerRule {
536 metadata: RuleMetadata,
537}
538
539impl UnsafeCStringPointerRule {
540 pub fn new() -> Self {
541 Self {
542 metadata: RuleMetadata {
543 id: "RUSTCOLA036".to_string(),
544 name: "unsafe-cstring-pointer".to_string(),
545 short_description: "Unsafe CString pointer from temporary".to_string(),
546 full_description: "Detects patterns like CString::new(...).unwrap().as_ptr() where \
547 the CString is a temporary that gets dropped immediately, leaving a dangling pointer. \
548 The pointer must outlive the CString it came from. Store the CString in a variable \
549 to extend its lifetime.".to_string(),
550 help_uri: Some("https://www.jetbrains.com/help/inspectopedia/RsCStringPointer.html".to_string()),
551 default_severity: Severity::High,
552 origin: RuleOrigin::BuiltIn,
553 cwe_ids: Vec::new(),
554 fix_suggestion: None,
555 exploitability: Exploitability::default(),
556 },
557 }
558 }
559
560 fn is_cstring_temp_pattern(line: &str) -> bool {
561 if !line.contains("CString::new") || !line.contains(".as_ptr()") {
562 return false;
563 }
564
565 let has_intermediate_method = line.contains(".unwrap()")
566 || line.contains(".expect(")
567 || line.contains(".unwrap_or")
568 || line.contains("?");
569
570 let looks_temporary = has_intermediate_method && !line.contains("let ");
571 let direct_chain = line.contains("CString::new(") && line.contains(").as_ptr()");
572
573 looks_temporary || direct_chain
574 }
575}
576
577impl Rule for UnsafeCStringPointerRule {
578 fn metadata(&self) -> &RuleMetadata {
579 &self.metadata
580 }
581
582 fn evaluate(
583 &self,
584 package: &MirPackage,
585 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
586 ) -> Vec<Finding> {
587 if package.crate_name == "mir-extractor" {
588 return Vec::new();
589 }
590
591 let mut findings = Vec::new();
592 let crate_root = Path::new(&package.crate_root);
593
594 if !crate_root.exists() {
595 return findings;
596 }
597
598 for entry in WalkDir::new(crate_root)
599 .into_iter()
600 .filter_entry(|e| filter_entry(e))
601 {
602 let entry = match entry {
603 Ok(e) => e,
604 Err(_) => continue,
605 };
606
607 if !entry.file_type().is_file() {
608 continue;
609 }
610
611 let path = entry.path();
612 if path.extension() != Some(OsStr::new("rs")) {
613 continue;
614 }
615
616 let rel_path = path
617 .strip_prefix(crate_root)
618 .unwrap_or(path)
619 .to_string_lossy()
620 .replace('\\', "/");
621
622 let content = match fs::read_to_string(path) {
623 Ok(c) => c,
624 Err(_) => continue,
625 };
626
627 let lines: Vec<&str> = content.lines().collect();
628
629 for (idx, line) in lines.iter().enumerate() {
630 let trimmed = line.trim();
631
632 if Self::is_cstring_temp_pattern(trimmed) {
633 let location = format!("{}:{}", rel_path, idx + 1);
634
635 findings.push(Finding::new(
636 self.metadata.id.clone(),
637 self.metadata.name.clone(),
638 self.metadata.default_severity,
639 "CString temporary used with as_ptr() creates dangling pointer".to_string(),
640 location,
641 String::new(),
642 vec![trimmed.to_string()],
643 None,
644 ));
645 }
646 }
647 }
648
649 findings
650 }
651}
652
653pub struct CtorDtorStdApiRule {
659 metadata: RuleMetadata,
660}
661
662impl CtorDtorStdApiRule {
663 pub fn new() -> Self {
664 Self {
665 metadata: RuleMetadata {
666 id: "RUSTCOLA059".to_string(),
667 name: "ctor-dtor-std-api".to_string(),
668 short_description: "#[ctor]/#[dtor] invoking std APIs".to_string(),
669 full_description: "Detects functions annotated with #[ctor] or #[dtor] that call std:: APIs. Code running in constructors/destructors (before main or during program teardown) can cause initialization ordering issues, deadlocks, or undefined behavior when calling standard library functions that expect a fully initialized runtime. Mirrors CodeQL rust/ctor-initialization.".to_string(),
670 help_uri: Some("https://docs.rs/ctor/latest/ctor/".to_string()),
671 default_severity: Severity::Medium,
672 origin: RuleOrigin::BuiltIn,
673 cwe_ids: Vec::new(),
674 fix_suggestion: None,
675 exploitability: Exploitability::default(),
676 },
677 }
678 }
679
680 fn looks_like_ctor_dtor_with_std_calls(&self, function: &crate::MirFunction) -> bool {
681 let name = &function.name;
682
683 if name.contains("CtorDtorStdApiRule")
685 || name.contains("looks_like_ctor_dtor_with_std_calls")
686 {
687 return false;
688 }
689
690 let looks_like_ctor_dtor_name = name.starts_with("ctor_") || name.starts_with("dtor_");
694
695 if !looks_like_ctor_dtor_name {
696 return false;
697 }
698
699 let has_std_refs = function.body.iter().any(|line| {
701 line.contains("std::") || line.contains("_print(") }) || function.signature.contains("std::");
703
704 has_std_refs
705 }
706}
707
708impl Rule for CtorDtorStdApiRule {
709 fn metadata(&self) -> &RuleMetadata {
710 &self.metadata
711 }
712
713 fn evaluate(
714 &self,
715 package: &MirPackage,
716 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
717 ) -> Vec<Finding> {
718 let mut findings = Vec::new();
719
720 for function in &package.functions {
721 if self.looks_like_ctor_dtor_with_std_calls(function) {
722 let mut evidence = vec![];
724 for line in &function.body {
725 if line.contains("std::") || line.contains("_print(") {
726 evidence.push(line.clone());
727 if evidence.len() >= 3 {
728 break;
729 }
730 }
731 }
732
733 if !evidence.is_empty() {
734 findings.push(Finding {
735 rule_id: self.metadata.id.clone(),
736 rule_name: self.metadata.name.clone(),
737 severity: self.metadata.default_severity,
738 message: "Constructor/destructor function calls std library APIs. Code running before main() or during program teardown can cause initialization issues, deadlocks, or undefined behavior.".to_string(),
739 function: function.name.clone(),
740 function_signature: function.signature.clone(),
741 evidence,
742 span: function.span.clone(),
743 ..Default::default()
744 });
745 }
746 }
747 }
748
749 findings
750 }
751}
752
753pub struct FfiBufferLeakRule {
760 metadata: RuleMetadata,
761}
762
763impl FfiBufferLeakRule {
764 pub fn new() -> Self {
765 Self {
766 metadata: RuleMetadata {
767 id: "RUSTCOLA016".to_string(),
768 name: "ffi-buffer-leak-early-return".to_string(),
769 short_description: "FFI buffer escapes with early return".to_string(),
770 full_description: "Detects extern functions that hand out raw pointers or heap buffers and contain early-return code paths, risking leaks or dangling pointers when cleanup is skipped.".to_string(),
771 help_uri: None,
772 default_severity: Severity::High,
773 origin: RuleOrigin::BuiltIn,
774 cwe_ids: Vec::new(),
775 fix_suggestion: None,
776 exploitability: Exploitability::default(),
777 },
778 }
779 }
780
781 fn pointer_escape_patterns() -> &'static [&'static str] {
782 &[
783 "Box::into_raw",
784 "Vec::into_raw_parts",
785 "Vec::with_capacity",
786 "CString::into_raw",
787 ".as_mut_ptr()",
788 ".as_ptr()",
789 ]
790 }
791
792 fn captures_early_exit(line: &str, position: usize, last_index: usize) -> bool {
793 let trimmed = line.trim();
794 if trimmed.is_empty() {
795 return false;
796 }
797
798 if trimmed.contains('?') {
799 return true;
800 }
801
802 if trimmed.contains("return Err") {
803 return true;
804 }
805
806 if (trimmed.starts_with("return ") || trimmed.contains(" return ")) && position < last_index
807 {
808 return true;
809 }
810
811 false
812 }
813
814 fn is_pointer_escape(line: &str) -> bool {
815 Self::pointer_escape_patterns()
816 .iter()
817 .any(|needle| line.contains(needle))
818 }
819}
820
821impl Rule for FfiBufferLeakRule {
822 fn metadata(&self) -> &RuleMetadata {
823 &self.metadata
824 }
825
826 fn evaluate(
827 &self,
828 package: &MirPackage,
829 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
830 ) -> Vec<Finding> {
831 let mut findings = Vec::new();
832 let crate_root = Path::new(&package.crate_root);
833
834 if !crate_root.exists() {
835 return findings;
836 }
837
838 for entry in WalkDir::new(crate_root)
839 .into_iter()
840 .filter_entry(|e| filter_entry(e))
841 {
842 let entry = match entry {
843 Ok(e) => e,
844 Err(_) => continue,
845 };
846
847 if !entry.file_type().is_file() {
848 continue;
849 }
850
851 if entry.path().extension().and_then(OsStr::to_str) != Some("rs") {
852 continue;
853 }
854
855 let Ok(source) = fs::read_to_string(entry.path()) else {
856 continue;
857 };
858
859 let rel_path = entry
860 .path()
861 .strip_prefix(crate_root)
862 .unwrap_or_else(|_| entry.path())
863 .to_string_lossy()
864 .replace('\\', "/");
865
866 let lines: Vec<&str> = source.lines().collect();
867 let mut idx = 0usize;
868 let mut string_state = StringLiteralState::default();
869 let mut pending_no_mangle: Option<usize> = None;
870 let mut pending_extern: Option<usize> = None;
871
872 while idx < lines.len() {
873 let raw_line = lines[idx];
874 let (sanitized_line, state_after_line) =
875 strip_string_literals(string_state, raw_line);
876 let trimmed = sanitized_line.trim();
877 let trimmed_original = raw_line.trim();
878
879 if trimmed.starts_with("#[no_mangle") {
880 pending_no_mangle = Some(idx);
881 string_state = state_after_line;
882 idx += 1;
883 continue;
884 }
885
886 if trimmed.contains("extern \"C\"") && !trimmed.contains("fn ") {
887 pending_extern = Some(idx);
888 string_state = state_after_line;
889 idx += 1;
890 continue;
891 }
892
893 let mut is_ffi_fn = false;
894 let mut start_idx = idx;
895
896 if trimmed.contains("extern \"C\"") && trimmed.contains("fn ") {
897 is_ffi_fn = true;
898 } else if pending_extern.is_some() && trimmed.contains("fn ") {
899 is_ffi_fn = true;
900 start_idx = pending_extern.unwrap();
901 } else if pending_no_mangle.is_some() && trimmed.contains("fn ") {
902 is_ffi_fn = true;
903 start_idx = pending_no_mangle.unwrap();
904 }
905
906 if !is_ffi_fn {
907 if !trimmed.is_empty() && !trimmed.starts_with("#[") {
908 pending_no_mangle = None;
909 pending_extern = None;
910 }
911 string_state = state_after_line;
912 idx += 1;
913 continue;
914 }
915
916 let mut block_lines: Vec<String> = Vec::new();
917 let mut sanitized_block: Vec<String> = Vec::new();
918 if start_idx < idx {
919 for attr_idx in start_idx..idx {
920 let attr_line = lines[attr_idx].trim();
921 if !attr_line.is_empty() {
922 block_lines.push(attr_line.to_string());
923 sanitized_block.push(attr_line.to_string());
924 }
925 }
926 }
927
928 if !trimmed_original.is_empty() {
929 block_lines.push(trimmed_original.to_string());
930 sanitized_block.push(trimmed.to_string());
931 }
932
933 let mut brace_balance: i32 = 0;
934 let mut body_started = false;
935 let mut j = idx;
936 let mut current_state = state_after_line;
937 let mut current_sanitized = sanitized_line;
938
939 loop {
940 let trimmed_sanitized = current_sanitized.trim();
941 let opens = current_sanitized.chars().filter(|c| *c == '{').count() as i32;
942 let closes = current_sanitized.chars().filter(|c| *c == '}').count() as i32;
943 brace_balance += opens;
944 if brace_balance > 0 {
945 body_started = true;
946 }
947 brace_balance -= closes;
948
949 let body_done = if body_started && brace_balance <= 0 {
950 true
951 } else if !body_started && trimmed_sanitized.ends_with(';') {
952 true
953 } else {
954 false
955 };
956
957 if body_done {
958 j += 1;
959 break;
960 }
961
962 j += 1;
963 if j >= lines.len() {
964 break;
965 }
966
967 let next_line = lines[j];
968 let (next_sanitized, next_state) =
969 strip_string_literals(current_state, next_line);
970 current_state = next_state;
971
972 let trimmed_original_next = next_line.trim();
973 if !trimmed_original_next.is_empty() {
974 block_lines.push(trimmed_original_next.to_string());
975 sanitized_block.push(next_sanitized.trim().to_string());
976 }
977
978 current_sanitized = next_sanitized;
979 }
980
981 let signature_line = block_lines
982 .iter()
983 .find(|line| line.contains("fn "))
984 .cloned()
985 .unwrap_or_else(|| block_lines.first().cloned().unwrap_or_default());
986
987 let last_index = sanitized_block
988 .iter()
989 .rposition(|line| !line.trim().is_empty())
990 .unwrap_or(0);
991
992 let pointer_lines: Vec<String> = block_lines
993 .iter()
994 .zip(sanitized_block.iter())
995 .filter_map(|(line, sanitized)| {
996 if Self::is_pointer_escape(sanitized) {
997 Some(line.clone())
998 } else {
999 None
1000 }
1001 })
1002 .collect();
1003
1004 let early_lines: Vec<(usize, String)> = sanitized_block
1005 .iter()
1006 .enumerate()
1007 .filter_map(|(pos, sanitized)| {
1008 if Self::captures_early_exit(sanitized, pos, last_index) {
1009 Some((pos, block_lines[pos].clone()))
1010 } else {
1011 None
1012 }
1013 })
1014 .collect();
1015
1016 if !pointer_lines.is_empty() && !early_lines.is_empty() {
1017 let mut evidence = Vec::new();
1018 let mut seen = HashSet::new();
1019
1020 for line in pointer_lines
1021 .iter()
1022 .chain(early_lines.iter().map(|(_, l)| l))
1023 {
1024 if seen.insert(line.clone()) {
1025 evidence.push(line.clone());
1026 }
1027 }
1028
1029 let location = format!("{}:{}", rel_path, start_idx + 1);
1030 findings.push(Finding {
1031 rule_id: self.metadata.id.clone(),
1032 rule_name: self.metadata.name.clone(),
1033 severity: self.metadata.default_severity,
1034 message: "Potential FFI buffer leak due to early return before cleanup"
1035 .to_string(),
1036 function: location,
1037 function_signature: signature_line,
1038 evidence,
1039 span: None,
1040 ..Default::default()
1041 });
1042 }
1043
1044 pending_no_mangle = None;
1045 pending_extern = None;
1046 string_state = current_state;
1047 idx = j;
1048 }
1049 }
1050
1051 findings
1052 }
1053}
1054
1055pub struct PanicInFfiBoundaryRule {
1064 metadata: RuleMetadata,
1065}
1066
1067impl PanicInFfiBoundaryRule {
1068 pub fn new() -> Self {
1069 Self {
1070 metadata: RuleMetadata {
1071 id: "RUSTCOLA116".to_string(),
1072 name: "panic-in-ffi-boundary".to_string(),
1073 short_description: "Potential panic in extern \"C\" function".to_string(),
1074 full_description: "Detects potential panics in extern \"C\" functions. Unwinding \
1075 across FFI boundaries is undefined behavior in Rust. Operations like unwrap(), \
1076 expect(), panic!(), assert!(), and indexing can all panic. Use catch_unwind \
1077 or return error codes instead."
1078 .to_string(),
1079 help_uri: Some(
1080 "https://doc.rust-lang.org/nomicon/ffi.html#ffi-and-panics".to_string(),
1081 ),
1082 default_severity: Severity::High,
1083 origin: RuleOrigin::BuiltIn,
1084 cwe_ids: Vec::new(),
1085 fix_suggestion: None,
1086 exploitability: Exploitability::default(),
1087 },
1088 }
1089 }
1090
1091 fn panic_patterns() -> &'static [(&'static str, &'static str)] {
1093 &[
1094 (".unwrap()", "unwrap() can panic on None/Err"),
1095 (".expect(", "expect() can panic on None/Err"),
1096 ("panic!", "explicit panic"),
1097 ("unreachable!", "unreachable! panics if reached"),
1098 ("unimplemented!", "unimplemented! always panics"),
1099 ("todo!", "todo! always panics"),
1100 ("assert!", "assert! panics on false"),
1101 ("assert_eq!", "assert_eq! panics on mismatch"),
1102 ("assert_ne!", "assert_ne! panics on match"),
1103 ("debug_assert!", "debug_assert! panics in debug builds"),
1104 ("[", "array/slice indexing can panic on out-of-bounds"),
1105 ]
1106 }
1107}
1108
1109impl Rule for PanicInFfiBoundaryRule {
1110 fn metadata(&self) -> &RuleMetadata {
1111 &self.metadata
1112 }
1113
1114 fn evaluate(
1115 &self,
1116 package: &MirPackage,
1117 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1118 ) -> Vec<Finding> {
1119 if package.crate_name == "mir-extractor" {
1120 return Vec::new();
1121 }
1122
1123 let mut findings = Vec::new();
1124 let crate_root = Path::new(&package.crate_root);
1125
1126 if !crate_root.exists() {
1127 return findings;
1128 }
1129
1130 for entry in WalkDir::new(crate_root)
1131 .into_iter()
1132 .filter_entry(|e| filter_entry(e))
1133 {
1134 let entry = match entry {
1135 Ok(e) => e,
1136 Err(_) => continue,
1137 };
1138
1139 if !entry.file_type().is_file() {
1140 continue;
1141 }
1142
1143 let path = entry.path();
1144 if path.extension() != Some(OsStr::new("rs")) {
1145 continue;
1146 }
1147
1148 let rel_path = path
1149 .strip_prefix(crate_root)
1150 .unwrap_or(path)
1151 .to_string_lossy()
1152 .replace('\\', "/");
1153
1154 let content = match fs::read_to_string(path) {
1155 Ok(c) => c,
1156 Err(_) => continue,
1157 };
1158
1159 let lines: Vec<&str> = content.lines().collect();
1160 let mut in_extern_c_fn = false;
1161 let mut extern_fn_start = 0;
1162 let mut extern_fn_name = String::new();
1163 let mut brace_depth = 0;
1164
1165 for (idx, line) in lines.iter().enumerate() {
1166 let trimmed = line.trim();
1167
1168 if trimmed.starts_with("//") {
1170 continue;
1171 }
1172
1173 if (trimmed.contains("extern \"C\"") || trimmed.contains("extern \"system\""))
1175 && trimmed.contains("fn ")
1176 {
1177 in_extern_c_fn = true;
1178 extern_fn_start = idx;
1179 brace_depth = 0;
1180
1181 if let Some(fn_pos) = trimmed.find("fn ") {
1183 let after_fn = &trimmed[fn_pos + 3..];
1184 extern_fn_name = after_fn
1185 .split(|c: char| c == '(' || c == '<' || c.is_whitespace())
1186 .next()
1187 .unwrap_or("")
1188 .to_string();
1189 }
1190 }
1191
1192 if in_extern_c_fn {
1193 brace_depth += trimmed.chars().filter(|&c| c == '{').count() as i32;
1194 brace_depth -= trimmed.chars().filter(|&c| c == '}').count() as i32;
1195
1196 for (pattern, reason) in Self::panic_patterns() {
1198 if *pattern == "[" {
1200 if trimmed.contains('[')
1202 && trimmed.contains(']')
1203 && !trimmed.contains("&[")
1204 && !trimmed.contains(": [")
1205 && !trimmed.contains("-> [")
1206 && !trimmed.starts_with("let ")
1207 && !trimmed.starts_with("const ")
1208 && !trimmed.starts_with("static ")
1209 {
1210 let has_index_op = trimmed
1212 .chars()
1213 .zip(trimmed.chars().skip(1))
1214 .any(|(a, b)| a.is_alphanumeric() && b == '[');
1215
1216 if has_index_op {
1217 let location = format!("{}:{}", rel_path, idx + 1);
1218
1219 findings.push(Finding {
1220 rule_id: self.metadata.id.clone(),
1221 rule_name: self.metadata.name.clone(),
1222 severity: Severity::Medium, message: format!(
1224 "Potential panic in extern \"C\" fn `{}`: {}. \
1225 Consider using .get() with bounds checking.",
1226 extern_fn_name, reason
1227 ),
1228 function: location,
1229 function_signature: String::new(),
1230 evidence: vec![trimmed.to_string()],
1231 span: None,
1232 ..Default::default()
1233 });
1234 }
1235 }
1236 } else if trimmed.contains(pattern) {
1237 let location = format!("{}:{}", rel_path, idx + 1);
1238
1239 findings.push(Finding {
1240 rule_id: self.metadata.id.clone(),
1241 rule_name: self.metadata.name.clone(),
1242 severity: self.metadata.default_severity,
1243 message: format!(
1244 "Potential panic in extern \"C\" fn `{}`: {}. \
1245 Unwinding across FFI boundaries is undefined behavior. \
1246 Use catch_unwind or return error codes.",
1247 extern_fn_name, reason
1248 ),
1249 function: location,
1250 function_signature: String::new(),
1251 evidence: vec![trimmed.to_string()],
1252 span: None,
1253 ..Default::default()
1254 });
1255 }
1256 }
1257
1258 if brace_depth <= 0 && idx > extern_fn_start {
1260 in_extern_c_fn = false;
1261 }
1262 }
1263 }
1264 }
1265
1266 findings
1267 }
1268}
1269
1270pub struct EmbeddedInterpreterUsageRule {
1279 metadata: RuleMetadata,
1280}
1281
1282impl EmbeddedInterpreterUsageRule {
1283 pub fn new() -> Self {
1284 Self {
1285 metadata: RuleMetadata {
1286 id: "RUSTCOLA107".to_string(),
1287 name: "embedded-interpreter-usage".to_string(),
1288 short_description: "Embedded interpreter creates code injection surface"
1289 .to_string(),
1290 full_description: "Detects usage of embedded interpreters like PyO3 (Python), \
1291 rlua/mlua (Lua), rusty_v8/deno_core (JavaScript). These create potential \
1292 code injection attack surfaces if user input reaches the interpreter. \
1293 Ensure proper sandboxing and input validation."
1294 .to_string(),
1295 help_uri: None,
1296 default_severity: Severity::Medium,
1297 origin: RuleOrigin::BuiltIn,
1298 cwe_ids: Vec::new(),
1299 fix_suggestion: None,
1300 exploitability: Exploitability::default(),
1301 },
1302 }
1303 }
1304
1305 fn interpreter_patterns() -> &'static [(&'static str, &'static str, &'static str)] {
1307 &[
1308 ("pyo3", "Python::with_gil", "Python interpreter (PyO3)"),
1309 ("pyo3", "Python::acquire_gil", "Python interpreter (PyO3)"),
1310 (
1311 "pyo3",
1312 "prepare_freethreaded_python",
1313 "Python interpreter (PyO3)",
1314 ),
1315 ("rlua", "Lua::new", "Lua interpreter (rlua)"),
1316 ("mlua", "Lua::new", "Lua interpreter (mlua)"),
1317 ("rusty_v8", "v8::Isolate", "V8 JavaScript engine"),
1318 ("deno_core", "JsRuntime::new", "Deno JavaScript runtime"),
1319 ("rhai", "Engine::new", "Rhai scripting engine"),
1320 ("rquickjs", "Context::new", "QuickJS runtime"),
1321 ("wasmer", "Instance::new", "WebAssembly runtime (Wasmer)"),
1322 (
1323 "wasmtime",
1324 "Instance::new",
1325 "WebAssembly runtime (Wasmtime)",
1326 ),
1327 ]
1328 }
1329}
1330
1331impl Rule for EmbeddedInterpreterUsageRule {
1332 fn metadata(&self) -> &RuleMetadata {
1333 &self.metadata
1334 }
1335
1336 fn evaluate(
1337 &self,
1338 package: &MirPackage,
1339 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1340 ) -> Vec<Finding> {
1341 if package.crate_name == "mir-extractor" {
1342 return Vec::new();
1343 }
1344
1345 let mut findings = Vec::new();
1346 let crate_root = Path::new(&package.crate_root);
1347
1348 if !crate_root.exists() {
1349 return findings;
1350 }
1351
1352 for entry in WalkDir::new(crate_root)
1353 .into_iter()
1354 .filter_entry(|e| filter_entry(e))
1355 {
1356 let entry = match entry {
1357 Ok(e) => e,
1358 Err(_) => continue,
1359 };
1360
1361 if !entry.file_type().is_file() {
1362 continue;
1363 }
1364
1365 let path = entry.path();
1366 if path.extension() != Some(OsStr::new("rs")) {
1367 continue;
1368 }
1369
1370 let rel_path = path
1371 .strip_prefix(crate_root)
1372 .unwrap_or(path)
1373 .to_string_lossy()
1374 .replace('\\', "/");
1375
1376 let content = match fs::read_to_string(path) {
1377 Ok(c) => c,
1378 Err(_) => continue,
1379 };
1380
1381 let mut relevant_crates: Vec<&str> = Vec::new();
1383 for (crate_name, _, _) in Self::interpreter_patterns() {
1384 if content.contains(crate_name) && !relevant_crates.contains(crate_name) {
1385 relevant_crates.push(crate_name);
1386 }
1387 }
1388
1389 if relevant_crates.is_empty() {
1390 continue;
1391 }
1392
1393 let lines: Vec<&str> = content.lines().collect();
1394
1395 for (idx, line) in lines.iter().enumerate() {
1396 let trimmed = line.trim();
1397
1398 if trimmed.starts_with("//") {
1400 continue;
1401 }
1402
1403 for (crate_name, pattern, description) in Self::interpreter_patterns() {
1404 if relevant_crates.contains(crate_name) && trimmed.contains(pattern) {
1405 let location = format!("{}:{}", rel_path, idx + 1);
1406
1407 findings.push(Finding {
1408 rule_id: self.metadata.id.clone(),
1409 rule_name: self.metadata.name.clone(),
1410 severity: self.metadata.default_severity,
1411 message: format!(
1412 "{} detected. Embedded interpreters can execute arbitrary code. \
1413 Ensure user input is validated before evaluation and consider \
1414 sandboxing the interpreter context.",
1415 description
1416 ),
1417 function: location,
1418 function_signature: String::new(),
1419 evidence: vec![trimmed.to_string()],
1420 span: None,
1421 ..Default::default()
1422 });
1423 }
1424 }
1425 }
1426 }
1427
1428 findings
1429 }
1430}
1431
1432pub struct WasmLinearMemoryOobRule {
1442 metadata: RuleMetadata,
1443}
1444
1445impl WasmLinearMemoryOobRule {
1446 pub fn new() -> Self {
1447 Self {
1448 metadata: RuleMetadata {
1449 id: "RUSTCOLA103".to_string(),
1450 name: "wasm-linear-memory-oob".to_string(),
1451 short_description: "WASM linear memory out-of-bounds risk".to_string(),
1452 full_description: "Detects patterns in WASM-targeted code that may allow \
1453 out-of-bounds access to linear memory. In WASM, memory is a contiguous \
1454 array and unchecked pointer operations can access arbitrary memory. \
1455 Use bounds checking or safe abstractions like wasm-bindgen."
1456 .to_string(),
1457 help_uri: Some("https://webassembly.org/docs/security/".to_string()),
1458 default_severity: Severity::High,
1459 origin: RuleOrigin::BuiltIn,
1460 cwe_ids: Vec::new(),
1461 fix_suggestion: None,
1462 exploitability: Exploitability::default(),
1463 },
1464 }
1465 }
1466
1467 fn wasm_memory_patterns() -> &'static [(&'static str, &'static str)] {
1469 &[
1470 (
1472 "slice::from_raw_parts",
1473 "Creating slice from raw pointer without bounds check",
1474 ),
1475 (
1476 "slice::from_raw_parts_mut",
1477 "Creating mutable slice from raw pointer without bounds check",
1478 ),
1479 (
1480 "std::ptr::read",
1481 "Reading from raw pointer without bounds check",
1482 ),
1483 (
1484 "std::ptr::write",
1485 "Writing to raw pointer without bounds check",
1486 ),
1487 ("ptr::read", "Reading from raw pointer"),
1488 ("ptr::write", "Writing to raw pointer"),
1489 ("ptr::copy", "Copying via raw pointer"),
1490 ("ptr::copy_nonoverlapping", "Copying via raw pointer"),
1491 (".offset(", "Pointer offset without bounds validation"),
1493 (".add(", "Pointer addition without bounds validation"),
1494 (".sub(", "Pointer subtraction without bounds validation"),
1495 ]
1496 }
1497
1498 fn wasm_export_indicators() -> &'static [&'static str] {
1500 &[
1501 "#[no_mangle]",
1502 "#[wasm_bindgen]",
1503 "extern \"C\"",
1504 "#[export_name",
1505 ]
1506 }
1507}
1508
1509impl Rule for WasmLinearMemoryOobRule {
1510 fn metadata(&self) -> &RuleMetadata {
1511 &self.metadata
1512 }
1513
1514 fn evaluate(
1515 &self,
1516 package: &MirPackage,
1517 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1518 ) -> Vec<Finding> {
1519 let mut findings = Vec::new();
1520 let crate_root = Path::new(&package.crate_root);
1521
1522 if !crate_root.exists() {
1523 return findings;
1524 }
1525
1526 for entry in WalkDir::new(crate_root)
1527 .into_iter()
1528 .filter_entry(|e| filter_entry(e))
1529 {
1530 let entry = match entry {
1531 Ok(e) => e,
1532 Err(_) => continue,
1533 };
1534
1535 if !entry.file_type().is_file() {
1536 continue;
1537 }
1538
1539 let path = entry.path();
1540 if path.extension() != Some(OsStr::new("rs")) {
1541 continue;
1542 }
1543
1544 let rel_path = path
1545 .strip_prefix(crate_root)
1546 .unwrap_or(path)
1547 .to_string_lossy()
1548 .replace('\\', "/");
1549
1550 let content = match fs::read_to_string(path) {
1551 Ok(c) => c,
1552 Err(_) => continue,
1553 };
1554
1555 let is_wasm_target = content.contains("wasm_bindgen")
1557 || content.contains("wasm32")
1558 || content.contains("#[no_mangle]")
1559 || package.crate_name.contains("wasm");
1560
1561 if !is_wasm_target {
1562 continue;
1563 }
1564
1565 let lines: Vec<&str> = content.lines().collect();
1566 let mut in_wasm_export = false;
1567 let mut export_fn_name = String::new();
1568
1569 for (idx, line) in lines.iter().enumerate() {
1570 let trimmed = line.trim();
1571
1572 if trimmed.starts_with("//") {
1574 continue;
1575 }
1576
1577 for indicator in Self::wasm_export_indicators() {
1579 if trimmed.contains(indicator) {
1580 in_wasm_export = true;
1581 }
1582 }
1583
1584 if in_wasm_export
1586 && (trimmed.starts_with("pub fn ")
1587 || trimmed.starts_with("pub unsafe fn ")
1588 || trimmed.starts_with("fn ")
1589 || trimmed.starts_with("unsafe fn "))
1590 {
1591 if let Some(fn_pos) = trimmed.find("fn ") {
1592 let after_fn = &trimmed[fn_pos + 3..];
1593 export_fn_name = after_fn
1594 .split(|c| c == '(' || c == '<')
1595 .next()
1596 .unwrap_or("")
1597 .trim()
1598 .to_string();
1599 }
1600 }
1601
1602 if trimmed == "}" && in_wasm_export && !export_fn_name.is_empty() {
1604 }
1606
1607 if in_wasm_export {
1609 for (pattern, description) in Self::wasm_memory_patterns() {
1610 if trimmed.contains(pattern) {
1611 let has_bounds_check = lines
1613 [idx.saturating_sub(3)..=(idx + 1).min(lines.len() - 1)]
1614 .iter()
1615 .any(|l| {
1616 l.contains("if ")
1617 && (l.contains(" < ")
1618 || l.contains(" <= ")
1619 || l.contains(".len()")
1620 || l.contains("bounds"))
1621 });
1622
1623 if !has_bounds_check {
1624 let location = format!("{}:{}", rel_path, idx + 1);
1625
1626 findings.push(Finding {
1627 rule_id: self.metadata.id.clone(),
1628 rule_name: self.metadata.name.clone(),
1629 severity: self.metadata.default_severity,
1630 message: format!(
1631 "Potential WASM linear memory OOB in export '{}': {}. \
1632 In WebAssembly, this can access arbitrary memory. \
1633 Add bounds checking or use wasm-bindgen's safe abstractions.",
1634 export_fn_name, description
1635 ),
1636 function: location,
1637 function_signature: String::new(),
1638 evidence: vec![trimmed.to_string()],
1639 span: None,
1640 ..Default::default()
1641 });
1642 }
1643 }
1644 }
1645 }
1646 }
1647 }
1648
1649 findings
1650 }
1651}
1652
1653pub struct WasmHostFunctionTrustRule {
1660 metadata: RuleMetadata,
1661}
1662
1663impl WasmHostFunctionTrustRule {
1664 pub fn new() -> Self {
1665 Self {
1666 metadata: RuleMetadata {
1667 id: "RUSTCOLA126".to_string(),
1668 name: "wasm-host-function-trust".to_string(),
1669 short_description: "Untrusted data from WASM host functions".to_string(),
1670 full_description: "Detects patterns where data received from WebAssembly host functions \
1671 (wasmtime, wasmer, wasm-bindgen imports) is used without validation. Host-provided \
1672 data should be treated as untrusted input since the host environment may be compromised \
1673 or malicious.".to_string(),
1674 help_uri: Some("https://docs.rs/wasmtime/latest/wasmtime/".to_string()),
1675 default_severity: Severity::Medium,
1676 origin: RuleOrigin::BuiltIn,
1677 cwe_ids: Vec::new(),
1678 fix_suggestion: None,
1679 exploitability: Exploitability::default(),
1680 },
1681 }
1682 }
1683
1684 fn host_import_patterns() -> Vec<(&'static str, &'static str)> {
1685 vec![
1686 ("extern \"C\"", "C FFI import - host-provided function"),
1687 ("#[wasm_bindgen]", "wasm-bindgen import from host"),
1688 ("import_func!", "wasmer import macro"),
1689 ("Func::wrap", "wasmtime host function wrap"),
1690 ("Linker::func_wrap", "wasmtime linker import"),
1691 ("imports!", "wasmer imports macro"),
1692 ("Instance::new", "WASM instance with imports"),
1693 ]
1694 }
1695
1696 fn dangerous_usages() -> Vec<&'static str> {
1697 vec![
1698 "from_raw_parts",
1699 "from_utf8_unchecked",
1700 "transmute",
1701 "as_ptr",
1702 "offset(",
1703 "add(",
1704 "slice::from_raw_parts",
1705 "str::from_utf8_unchecked",
1706 "CStr::from_ptr",
1707 ]
1708 }
1709}
1710
1711impl Rule for WasmHostFunctionTrustRule {
1712 fn metadata(&self) -> &RuleMetadata {
1713 &self.metadata
1714 }
1715
1716 fn evaluate(
1717 &self,
1718 package: &MirPackage,
1719 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1720 ) -> Vec<Finding> {
1721 let mut findings = Vec::new();
1722 let crate_root = Path::new(&package.crate_root);
1723
1724 if !package.crate_name.contains("wasm")
1726 && !package.crate_name.contains("plugin")
1727 && !package.crate_name.contains("runtime")
1728 {
1729 }
1731
1732 for entry in walkdir::WalkDir::new(crate_root)
1733 .into_iter()
1734 .filter_map(Result::ok)
1735 .filter(|e| e.file_type().is_file())
1736 {
1737 let path = entry.path();
1738 if path.extension() != Some(OsStr::new("rs")) {
1739 continue;
1740 }
1741
1742 let rel_path = path
1743 .strip_prefix(crate_root)
1744 .unwrap_or(path)
1745 .to_string_lossy()
1746 .replace('\\', "/");
1747
1748 let content = match fs::read_to_string(path) {
1749 Ok(c) => c,
1750 Err(_) => continue,
1751 };
1752
1753 let has_wasm_imports = Self::host_import_patterns()
1755 .iter()
1756 .any(|(p, _)| content.contains(p));
1757
1758 if !has_wasm_imports {
1759 continue;
1760 }
1761
1762 let lines: Vec<&str> = content.lines().collect();
1763
1764 for (idx, line) in lines.iter().enumerate() {
1765 let trimmed = line.trim();
1766
1767 if trimmed.starts_with("//") || trimmed.starts_with("/*") {
1769 continue;
1770 }
1771
1772 for dangerous in Self::dangerous_usages() {
1774 if trimmed.contains(dangerous) {
1775 let context_start = idx.saturating_sub(10);
1777 let context_end = (idx + 5).min(lines.len());
1778 let context = &lines[context_start..context_end];
1779
1780 let has_host_import = Self::host_import_patterns()
1781 .iter()
1782 .any(|(p, _)| context.iter().any(|l| l.contains(p)));
1783
1784 let has_validation = context.iter().any(|l| {
1786 l.contains("if ")
1787 || l.contains("match ")
1788 || l.contains("validate")
1789 || l.contains("check")
1790 || l.contains(".is_ok()")
1791 || l.contains(".is_err()")
1792 || l.contains("?.")
1793 || l.contains("try!")
1794 });
1795
1796 if has_host_import && !has_validation {
1797 let location = format!("{}:{}", rel_path, idx + 1);
1798
1799 findings.push(Finding {
1800 rule_id: self.metadata.id.clone(),
1801 rule_name: self.metadata.name.clone(),
1802 severity: self.metadata.default_severity,
1803 message: format!(
1804 "Potentially untrusted host data used in '{}' without validation. \
1805 Data from WASM host functions should be validated before use in \
1806 unsafe operations.",
1807 dangerous
1808 ),
1809 function: location,
1810 function_signature: String::new(),
1811 evidence: vec![trimmed.to_string()],
1812 span: None,
1813 ..Default::default()
1814 });
1815 }
1816 }
1817 }
1818 }
1819 }
1820
1821 findings
1822 }
1823}
1824
1825pub struct WasmCapabilityLeakRule {
1832 metadata: RuleMetadata,
1833}
1834
1835impl WasmCapabilityLeakRule {
1836 pub fn new() -> Self {
1837 Self {
1838 metadata: RuleMetadata {
1839 id: "RUSTCOLA127".to_string(),
1840 name: "wasm-capability-leak".to_string(),
1841 short_description: "WASM component model capability leak".to_string(),
1842 full_description:
1843 "Detects patterns where sensitive capabilities (filesystem access, \
1844 network sockets, environment variables) may leak to WebAssembly guest modules \
1845 through component model exports or WASI permissions. Apply principle of least \
1846 privilege to guest capabilities."
1847 .to_string(),
1848 help_uri: Some("https://component-model.bytecodealliance.org/".to_string()),
1849 default_severity: Severity::High,
1850 origin: RuleOrigin::BuiltIn,
1851 cwe_ids: Vec::new(),
1852 fix_suggestion: None,
1853 exploitability: Exploitability::default(),
1854 },
1855 }
1856 }
1857
1858 fn capability_patterns() -> Vec<(&'static str, &'static str)> {
1859 vec![
1860 (
1861 "WasiCtxBuilder::new().inherit_stdio()",
1862 "Inherits all stdio - may leak sensitive output",
1863 ),
1864 (
1865 "inherit_env()",
1866 "Inherits environment variables - may leak secrets",
1867 ),
1868 (
1869 "inherit_network()",
1870 "Inherits network access - may allow exfiltration",
1871 ),
1872 (
1873 "inherit_args()",
1874 "Inherits command line args - may leak secrets",
1875 ),
1876 (
1877 "preopened_dir",
1878 "Preopen directory access - verify scope is minimal",
1879 ),
1880 (
1881 "allow_ip_name_lookup",
1882 "Allows DNS lookups - potential for exfiltration",
1883 ),
1884 ("allow_udp", "Allows UDP sockets"),
1885 ("allow_tcp", "Allows TCP connections"),
1886 (
1887 ".ctx_builder().build()",
1888 "Check WasiCtx configuration for minimal privileges",
1889 ),
1890 ]
1891 }
1892
1893 fn sensitive_exports() -> Vec<&'static str> {
1894 vec![
1895 "std::fs::",
1896 "std::net::",
1897 "std::process::",
1898 "std::env::",
1899 "tokio::fs::",
1900 "tokio::net::",
1901 "async_std::fs::",
1902 "async_std::net::",
1903 ]
1904 }
1905}
1906
1907impl Rule for WasmCapabilityLeakRule {
1908 fn metadata(&self) -> &RuleMetadata {
1909 &self.metadata
1910 }
1911
1912 fn evaluate(
1913 &self,
1914 package: &MirPackage,
1915 _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1916 ) -> Vec<Finding> {
1917 let mut findings = Vec::new();
1918 let crate_root = Path::new(&package.crate_root);
1919
1920 for entry in walkdir::WalkDir::new(crate_root)
1921 .into_iter()
1922 .filter_map(Result::ok)
1923 .filter(|e| e.file_type().is_file())
1924 {
1925 let path = entry.path();
1926 if path.extension() != Some(OsStr::new("rs")) {
1927 continue;
1928 }
1929
1930 let rel_path = path
1931 .strip_prefix(crate_root)
1932 .unwrap_or(path)
1933 .to_string_lossy()
1934 .replace('\\', "/");
1935
1936 let content = match fs::read_to_string(path) {
1937 Ok(c) => c,
1938 Err(_) => continue,
1939 };
1940
1941 let is_wasm_host = content.contains("wasmtime")
1943 || content.contains("wasmer")
1944 || content.contains("WasiCtx")
1945 || content.contains("wasi_common");
1946
1947 if !is_wasm_host {
1948 continue;
1949 }
1950
1951 let lines: Vec<&str> = content.lines().collect();
1952
1953 for (idx, line) in lines.iter().enumerate() {
1954 let trimmed = line.trim();
1955
1956 if trimmed.starts_with("//") || trimmed.starts_with("/*") {
1958 continue;
1959 }
1960
1961 for (pattern, description) in Self::capability_patterns() {
1963 if trimmed.contains(pattern) {
1964 let location = format!("{}:{}", rel_path, idx + 1);
1965
1966 findings.push(Finding {
1967 rule_id: self.metadata.id.clone(),
1968 rule_name: self.metadata.name.clone(),
1969 severity: self.metadata.default_severity,
1970 message: format!(
1971 "Potential capability leak to WASM guest: {}. \
1972 Apply principle of least privilege - only grant necessary capabilities.",
1973 description
1974 ),
1975 function: location.clone(),
1976 function_signature: String::new(),
1977 evidence: vec![trimmed.to_string()],
1978 span: None,
1979 ..Default::default()
1980 });
1981 }
1982 }
1983
1984 for sensitive in Self::sensitive_exports() {
1986 if trimmed.contains(sensitive)
1987 && (trimmed.contains("Linker::")
1988 || trimmed.contains("func_wrap")
1989 || trimmed.contains("define(")
1990 || trimmed.contains("export("))
1991 {
1992 let location = format!("{}:{}", rel_path, idx + 1);
1993
1994 findings.push(Finding {
1995 rule_id: self.metadata.id.clone(),
1996 rule_name: self.metadata.name.clone(),
1997 severity: Severity::High,
1998 message: format!(
1999 "Exporting sensitive capability '{}' to WASM guest. \
2000 Verify this is intentional and properly sandboxed.",
2001 sensitive
2002 ),
2003 function: location,
2004 function_signature: String::new(),
2005 evidence: vec![trimmed.to_string()],
2006 span: None,
2007 ..Default::default()
2008 });
2009 }
2010 }
2011 }
2012 }
2013
2014 findings
2015 }
2016}
2017
2018pub fn register_ffi_rules(engine: &mut crate::RuleEngine) {
2024 engine.register_rule(Box::new(AllocatorMismatchFfiRule::new()));
2025 engine.register_rule(Box::new(UnsafeFfiPointerReturnRule::new()));
2026 engine.register_rule(Box::new(PackedFieldReferenceRule::new()));
2027 engine.register_rule(Box::new(UnsafeCStringPointerRule::new()));
2028 engine.register_rule(Box::new(CtorDtorStdApiRule::new()));
2029 engine.register_rule(Box::new(FfiBufferLeakRule::new()));
2030 engine.register_rule(Box::new(PanicInFfiBoundaryRule::new()));
2031 engine.register_rule(Box::new(EmbeddedInterpreterUsageRule::new()));
2032 engine.register_rule(Box::new(WasmLinearMemoryOobRule::new()));
2033 engine.register_rule(Box::new(WasmHostFunctionTrustRule::new()));
2034 engine.register_rule(Box::new(WasmCapabilityLeakRule::new()));
2035}