Skip to main content

mir_extractor/
lib.rs

1#![cfg_attr(feature = "hir-driver", feature(rustc_private))]
2// Some helper functions are used only by subsets of rules.
3#![allow(dead_code)]
4
5#[cfg(feature = "hir-driver")]
6extern crate rustc_ast;
7#[cfg(feature = "hir-driver")]
8extern crate rustc_driver;
9#[cfg(feature = "hir-driver")]
10extern crate rustc_hir;
11#[cfg(feature = "hir-driver")]
12extern crate rustc_infer;
13#[cfg(feature = "hir-driver")]
14extern crate rustc_interface;
15#[cfg(feature = "hir-driver")]
16extern crate rustc_middle;
17#[cfg(feature = "hir-driver")]
18extern crate rustc_session;
19#[cfg(feature = "hir-driver")]
20extern crate rustc_span;
21#[cfg(feature = "hir-driver")]
22extern crate rustc_trait_selection;
23
24use anyhow::{anyhow, Context, Result};
25use serde::{Deserialize, Serialize};
26use serde_json::json;
27use sha2::{Digest, Sha256};
28use std::collections::{HashMap, HashSet};
29use std::ffi::OsStr;
30use std::fs::{self, File};
31use std::io::{Read, Write};
32use std::path::{Path, PathBuf};
33use std::process::Command;
34use std::time::{SystemTime, UNIX_EPOCH};
35use walkdir::{DirEntry, WalkDir};
36
37pub mod dataflow;
38#[cfg(feature = "hir-driver")]
39mod hir;
40#[cfg(feature = "hir-driver")]
41mod hir_query;
42pub mod interprocedural;
43pub mod memory_profiler;
44mod prototypes;
45pub mod rules;
46#[cfg(feature = "hir-driver")]
47mod type_analyzer;
48
49pub use dataflow::{Assignment, MirDataflow};
50#[cfg(feature = "hir-driver")]
51pub use hir::{
52    capture_hir, capture_root_from_env, collect_crate_snapshot, target_spec_from_env,
53    HirFunctionBody, HirIndex, HirItem, HirPackage, HirTargetSpec, HirTypeMetadata,
54};
55#[cfg(feature = "hir-driver")]
56pub use hir_query::HirQuery;
57pub use prototypes::{
58    detect_broadcast_unsync_payloads, detect_command_invocations,
59    detect_content_length_allocations, detect_openssl_verify_none, detect_truncating_len_casts,
60    detect_unbounded_allocations, BroadcastUnsyncUsage, CommandInvocation, ContentLengthAllocation,
61    LengthTruncationCast, OpensslVerifyNoneInvocation,
62};
63#[cfg(feature = "hir-driver")]
64pub use type_analyzer::{CacheStats, TypeAnalyzer};
65
66#[cfg(feature = "hir-driver")]
67pub const HIR_CAPTURE_ICE_LOG_PREFIX: &str = "rust-cola: rustc ICE while capturing HIR";
68
69/// Severity levels for security findings (CVSS-aligned)
70/// - Critical: Exploitable remotely without authentication, leads to full system compromise
71/// - High: Serious vulnerability, likely exploitable
72/// - Medium: Moderate risk, requires specific conditions
73/// - Low: Minor issue, limited impact
74#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
75#[serde(rename_all = "lowercase")]
76pub enum Severity {
77    Low,
78    Medium,
79    High,
80    Critical,
81}
82
83impl Severity {
84    pub fn sarif_level(&self) -> &'static str {
85        match self {
86            Severity::Low => "note",
87            Severity::Medium => "warning",
88            Severity::High => "error",
89            Severity::Critical => "error",
90        }
91    }
92
93    /// Get a human-readable label with emoji
94    pub fn label(&self) -> &'static str {
95        match self {
96            Severity::Low => "🟢 Low",
97            Severity::Medium => "🟡 Medium",
98            Severity::High => "🟠 High",
99            Severity::Critical => "🔴 Critical",
100        }
101    }
102
103    /// Get CVSS score range for this severity
104    pub fn cvss_range(&self) -> &'static str {
105        match self {
106            Severity::Low => "0.1-3.9",
107            Severity::Medium => "4.0-6.9",
108            Severity::High => "7.0-8.9",
109            Severity::Critical => "9.0-10.0",
110        }
111    }
112}
113
114/// Confidence level for analysis findings
115/// - High: Strong evidence, low false positive likelihood
116/// - Medium: Moderate evidence, may require manual review
117/// - Low: Weak evidence, higher false positive likelihood
118#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
119#[serde(rename_all = "lowercase")]
120pub enum Confidence {
121    Low,
122    Medium,
123    High,
124}
125
126impl Confidence {
127    pub fn label(&self) -> &'static str {
128        match self {
129            Confidence::Low => "Low",
130            Confidence::Medium => "Medium",
131            Confidence::High => "High",
132        }
133    }
134}
135
136impl Default for Confidence {
137    fn default() -> Self {
138        Confidence::Medium
139    }
140}
141
142/// Code context classification for findings
143/// Used to categorize where a finding was detected, enabling filtering without data loss
144#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
145#[serde(rename_all = "kebab-case")]
146pub enum CodeContext {
147    /// Normal production code (default)
148    #[default]
149    Production,
150    /// Test code: #[test], tests/, #[cfg(test)] modules
151    Test,
152    /// Example code: examples/ directory
153    Example,
154    /// Benchmark code: benches/ directory, #[bench]
155    Benchmark,
156    /// Generated code: build.rs output, proc macros (future)
157    Generated,
158}
159
160impl CodeContext {
161    /// Human-readable label for this context
162    pub fn label(&self) -> &'static str {
163        match self {
164            CodeContext::Production => "production",
165            CodeContext::Test => "test",
166            CodeContext::Example => "example",
167            CodeContext::Benchmark => "benchmark",
168            CodeContext::Generated => "generated",
169        }
170    }
171
172    /// Whether this context is typically excluded from primary analysis
173    pub fn is_non_production(&self) -> bool {
174        !matches!(self, CodeContext::Production)
175    }
176
177    /// SARIF suppression justification for this context
178    pub fn suppression_justification(&self) -> Option<&'static str> {
179        match self {
180            CodeContext::Production => None,
181            CodeContext::Test => Some("Finding is in test code"),
182            CodeContext::Example => Some("Finding is in example code"),
183            CodeContext::Benchmark => Some("Finding is in benchmark code"),
184            CodeContext::Generated => Some("Finding is in generated code"),
185        }
186    }
187}
188
189// ============================================================================
190// CVSS-like Exploitability Metrics
191// ============================================================================
192
193/// Attack vector - how the vulnerability can be exploited
194#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
195#[serde(rename_all = "lowercase")]
196pub enum AttackVector {
197    /// Exploitable over the network (e.g., via HTTP request)
198    Network,
199    /// Requires adjacent network access (e.g., local network)
200    Adjacent,
201    /// Requires local access to the system
202    #[default]
203    Local,
204    /// Requires physical access to the device
205    Physical,
206}
207
208impl AttackVector {
209    /// CVSS v3.1 score contribution (0.0 - 0.85)
210    pub fn score(&self) -> f32 {
211        match self {
212            AttackVector::Network => 0.85,
213            AttackVector::Adjacent => 0.62,
214            AttackVector::Local => 0.55,
215            AttackVector::Physical => 0.20,
216        }
217    }
218
219    pub fn label(&self) -> &'static str {
220        match self {
221            AttackVector::Network => "Network",
222            AttackVector::Adjacent => "Adjacent",
223            AttackVector::Local => "Local",
224            AttackVector::Physical => "Physical",
225        }
226    }
227}
228
229/// Attack complexity - conditions beyond attacker's control
230#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
231#[serde(rename_all = "lowercase")]
232pub enum AttackComplexity {
233    /// No special conditions required
234    #[default]
235    Low,
236    /// Requires specific conditions (race condition, specific config, etc.)
237    High,
238}
239
240impl AttackComplexity {
241    /// CVSS v3.1 score contribution
242    pub fn score(&self) -> f32 {
243        match self {
244            AttackComplexity::Low => 0.77,
245            AttackComplexity::High => 0.44,
246        }
247    }
248
249    pub fn label(&self) -> &'static str {
250        match self {
251            AttackComplexity::Low => "Low",
252            AttackComplexity::High => "High",
253        }
254    }
255}
256
257/// Privileges required to exploit the vulnerability
258#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
259#[serde(rename_all = "lowercase")]
260pub enum PrivilegesRequired {
261    /// No privileges required (unauthenticated)
262    #[default]
263    None,
264    /// Low privileges (normal user)
265    Low,
266    /// High privileges (admin/root)
267    High,
268}
269
270impl PrivilegesRequired {
271    /// CVSS v3.1 score contribution
272    pub fn score(&self) -> f32 {
273        match self {
274            PrivilegesRequired::None => 0.85,
275            PrivilegesRequired::Low => 0.62,
276            PrivilegesRequired::High => 0.27,
277        }
278    }
279
280    pub fn label(&self) -> &'static str {
281        match self {
282            PrivilegesRequired::None => "None",
283            PrivilegesRequired::Low => "Low",
284            PrivilegesRequired::High => "High",
285        }
286    }
287}
288
289/// Whether user interaction is required
290#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
291#[serde(rename_all = "lowercase")]
292pub enum UserInteraction {
293    /// No user interaction required
294    #[default]
295    None,
296    /// Requires user to perform some action (click link, open file)
297    Required,
298}
299
300impl UserInteraction {
301    /// CVSS v3.1 score contribution
302    pub fn score(&self) -> f32 {
303        match self {
304            UserInteraction::None => 0.85,
305            UserInteraction::Required => 0.62,
306        }
307    }
308
309    pub fn label(&self) -> &'static str {
310        match self {
311            UserInteraction::None => "None",
312            UserInteraction::Required => "Required",
313        }
314    }
315}
316
317/// Exploitability metrics bundle (CVSS-like)
318#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
319pub struct Exploitability {
320    pub attack_vector: AttackVector,
321    pub attack_complexity: AttackComplexity,
322    pub privileges_required: PrivilegesRequired,
323    pub user_interaction: UserInteraction,
324}
325
326impl Exploitability {
327    /// Create with all factors specified
328    pub fn new(
329        attack_vector: AttackVector,
330        attack_complexity: AttackComplexity,
331        privileges_required: PrivilegesRequired,
332        user_interaction: UserInteraction,
333    ) -> Self {
334        Self {
335            attack_vector,
336            attack_complexity,
337            privileges_required,
338            user_interaction,
339        }
340    }
341
342    /// Network attack, low complexity, no auth, no user interaction (worst case)
343    pub fn network_unauthenticated() -> Self {
344        Self {
345            attack_vector: AttackVector::Network,
346            attack_complexity: AttackComplexity::Low,
347            privileges_required: PrivilegesRequired::None,
348            user_interaction: UserInteraction::None,
349        }
350    }
351
352    /// Local attack requiring some privileges
353    pub fn local_privileged() -> Self {
354        Self {
355            attack_vector: AttackVector::Local,
356            attack_complexity: AttackComplexity::Low,
357            privileges_required: PrivilegesRequired::Low,
358            user_interaction: UserInteraction::None,
359        }
360    }
361
362    /// Compute exploitability sub-score (0.0 - 3.9)
363    /// Based on CVSS v3.1 formula: 8.22 × AV × AC × PR × UI
364    pub fn score(&self) -> f32 {
365        8.22 * self.attack_vector.score()
366            * self.attack_complexity.score()
367            * self.privileges_required.score()
368            * self.user_interaction.score()
369    }
370
371    /// Get a human-readable summary
372    pub fn summary(&self) -> String {
373        format!(
374            "AV:{}/AC:{}/PR:{}/UI:{}",
375            match self.attack_vector {
376                AttackVector::Network => "N",
377                AttackVector::Adjacent => "A",
378                AttackVector::Local => "L",
379                AttackVector::Physical => "P",
380            },
381            match self.attack_complexity {
382                AttackComplexity::Low => "L",
383                AttackComplexity::High => "H",
384            },
385            match self.privileges_required {
386                PrivilegesRequired::None => "N",
387                PrivilegesRequired::Low => "L",
388                PrivilegesRequired::High => "H",
389            },
390            match self.user_interaction {
391                UserInteraction::None => "N",
392                UserInteraction::Required => "R",
393            }
394        )
395    }
396}
397
398#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
399pub enum RuleOrigin {
400    BuiltIn,
401    RulePack { source: String },
402    Wasm { module: String },
403}
404
405impl RuleOrigin {
406    pub fn label(&self) -> String {
407        match self {
408            RuleOrigin::BuiltIn => "built-in".to_string(),
409            RuleOrigin::RulePack { source } => format!("rulepack:{source}"),
410            RuleOrigin::Wasm { module } => format!("wasm:{module}"),
411        }
412    }
413}
414
415#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
416pub struct RuleMetadata {
417    pub id: String,
418    pub name: String,
419    pub short_description: String,
420    pub full_description: String,
421    pub help_uri: Option<String>,
422    pub default_severity: Severity,
423    pub origin: RuleOrigin,
424    /// CWE (Common Weakness Enumeration) identifiers
425    #[serde(default, skip_serializing_if = "Vec::is_empty")]
426    pub cwe_ids: Vec<String>,
427    /// Fix suggestion template
428    #[serde(default, skip_serializing_if = "Option::is_none")]
429    pub fix_suggestion: Option<String>,
430    /// CVSS-like exploitability metrics for this rule
431    #[serde(default)]
432    pub exploitability: Exploitability,
433}
434
435#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
436pub struct SourceSpan {
437    pub file: String,
438    pub start_line: u32,
439    pub start_column: u32,
440    pub end_line: u32,
441    pub end_column: u32,
442}
443
444#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
445pub struct Finding {
446    pub rule_id: String,
447    pub rule_name: String,
448    pub severity: Severity,
449    /// Confidence level of this specific finding
450    #[serde(default)]
451    pub confidence: Confidence,
452    pub message: String,
453    pub function: String,
454    pub function_signature: String,
455    pub evidence: Vec<String>,
456    pub span: Option<SourceSpan>,
457    /// CWE identifiers for this finding
458    #[serde(default, skip_serializing_if = "Vec::is_empty")]
459    pub cwe_ids: Vec<String>,
460    /// Actionable fix suggestion
461    #[serde(default, skip_serializing_if = "Option::is_none")]
462    pub fix_suggestion: Option<String>,
463    /// Code snippet showing the vulnerable code
464    #[serde(default, skip_serializing_if = "Option::is_none")]
465    pub code_snippet: Option<String>,
466    /// CVSS-like exploitability metrics
467    #[serde(default)]
468    pub exploitability: Exploitability,
469    /// Computed exploitability score (0.0 - 3.9)
470    #[serde(default)]
471    pub exploitability_score: f32,
472    /// Code context where this finding was detected (production, test, example, etc.)
473    #[serde(default)]
474    pub code_context: CodeContext,
475    /// Reason for filtering if this finding was excluded from primary results
476    #[serde(default, skip_serializing_if = "Option::is_none")]
477    pub filter_reason: Option<String>,
478}
479
480impl Finding {
481    /// Create a new finding with default confidence and optional fields
482    pub fn new(
483        rule_id: impl Into<String>,
484        rule_name: impl Into<String>,
485        severity: Severity,
486        message: impl Into<String>,
487        function: impl Into<String>,
488        function_signature: impl Into<String>,
489        evidence: Vec<String>,
490        span: Option<SourceSpan>,
491    ) -> Self {
492        let exploitability = Exploitability::default();
493        let exploitability_score = exploitability.score();
494        Self {
495            rule_id: rule_id.into(),
496            rule_name: rule_name.into(),
497            severity,
498            confidence: Confidence::Medium,
499            message: message.into(),
500            function: function.into(),
501            function_signature: function_signature.into(),
502            evidence,
503            span,
504            cwe_ids: Vec::new(),
505            fix_suggestion: None,
506            code_snippet: None,
507            exploitability,
508            exploitability_score,
509            code_context: CodeContext::Production,
510            filter_reason: None,
511        }
512    }
513
514    /// Set confidence level
515    pub fn with_confidence(mut self, confidence: Confidence) -> Self {
516        self.confidence = confidence;
517        self
518    }
519
520    /// Set CWE identifiers
521    pub fn with_cwe(mut self, cwe_ids: Vec<String>) -> Self {
522        self.cwe_ids = cwe_ids;
523        self
524    }
525
526    /// Set fix suggestion
527    pub fn with_fix(mut self, fix: impl Into<String>) -> Self {
528        self.fix_suggestion = Some(fix.into());
529        self
530    }
531
532    /// Set code snippet
533    pub fn with_snippet(mut self, snippet: impl Into<String>) -> Self {
534        self.code_snippet = Some(snippet.into());
535        self
536    }
537
538    /// Set exploitability metrics and compute score
539    pub fn with_exploitability(mut self, exploitability: Exploitability) -> Self {
540        self.exploitability = exploitability;
541        self.exploitability_score = exploitability.score();
542        self
543    }
544
545    /// Set code context (production, test, example, benchmark)
546    pub fn with_code_context(mut self, context: CodeContext) -> Self {
547        self.code_context = context;
548        self
549    }
550
551    /// Mark this finding as filtered with a reason
552    pub fn with_filter_reason(mut self, reason: impl Into<String>) -> Self {
553        self.filter_reason = Some(reason.into());
554        self
555    }
556
557    /// Returns true if this finding is from non-production code
558    pub fn is_non_production(&self) -> bool {
559        self.code_context.is_non_production()
560    }
561}
562
563impl Default for Finding {
564    fn default() -> Self {
565        let exploitability = Exploitability::default();
566        Self {
567            rule_id: String::new(),
568            rule_name: String::new(),
569            severity: Severity::Medium,
570            confidence: Confidence::Medium,
571            message: String::new(),
572            function: String::new(),
573            function_signature: String::new(),
574            evidence: Vec::new(),
575            span: None,
576            cwe_ids: Vec::new(),
577            fix_suggestion: None,
578            code_snippet: None,
579            exploitability_score: exploitability.score(),
580            exploitability,
581            code_context: CodeContext::Production,
582            filter_reason: None,
583        }
584    }
585}
586
587#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
588pub struct MirFunctionHirMetadata {
589    pub def_path_hash: String,
590    #[serde(default, skip_serializing_if = "Option::is_none")]
591    pub signature: Option<String>,
592}
593
594#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
595pub struct MirFunction {
596    pub name: String,
597    pub signature: String,
598    pub body: Vec<String>,
599    pub span: Option<SourceSpan>,
600    #[serde(default, skip_serializing_if = "Option::is_none")]
601    pub hir: Option<MirFunctionHirMetadata>,
602}
603
604impl Default for MirFunction {
605    fn default() -> Self {
606        Self {
607            name: String::new(),
608            signature: String::new(),
609            body: Vec::new(),
610            span: None,
611            hir: None,
612        }
613    }
614}
615
616impl MirFunction {
617    /// v1.0.1: Returns true if this function is test code that should be excluded from analysis.
618    ///
619    /// Detects test code based on:
620    /// - File path patterns (tests/, examples/, benches/)
621    /// - Function name patterns (test_, ::tests::, ::mock_)
622    /// - Test attributes (#[test], #[cfg(test)])
623    pub fn is_test_code(&self) -> bool {
624        // Check file path patterns from span
625        if let Some(ref span) = self.span {
626            let path_lower = span.file.to_lowercase();
627            let path_patterns = [
628                "/tests/",
629                "/test/",
630                "_test.rs",
631                "_tests.rs",
632                "/benches/",
633                "/bench/",
634                "/examples/",
635                "/example/",
636            ];
637
638            if path_patterns.iter().any(|p| path_lower.contains(p)) {
639                return true;
640            }
641        }
642
643        // Check function name patterns
644        let name_lower = self.name.to_lowercase();
645        let name_patterns = [
646            "::tests::",
647            "::test_",
648            "test_",
649            "::mock_",
650            "::fake_",
651            "::stub_",
652            "_test::",
653            "::benches::",
654            "::bench_",
655        ];
656
657        if name_patterns.iter().any(|p| name_lower.contains(p)) {
658            return true;
659        }
660
661        // Check for test attributes in body (from MIR metadata)
662        let body_str = self.body.join("\n").to_lowercase();
663        let attr_patterns = [
664            "#[test]",
665            "#[cfg(test)]",
666            "#[tokio::test]",
667            "#[async_std::test]",
668            "#[ignore]",
669            "proptest!",
670            "quickcheck!",
671        ];
672
673        if attr_patterns.iter().any(|p| body_str.contains(&p.to_lowercase())) {
674            return true;
675        }
676
677        false
678    }
679
680    /// v1.0.1: Returns true if this function is example code
681    pub fn is_example_code(&self) -> bool {
682        if let Some(ref span) = self.span {
683            let path_lower = span.file.to_lowercase();
684            return path_lower.contains("/examples/") || path_lower.contains("/example/");
685        }
686        false
687    }
688
689    /// v1.0.1: Returns true if this function is benchmark code
690    pub fn is_bench_code(&self) -> bool {
691        if let Some(ref span) = self.span {
692            let path_lower = span.file.to_lowercase();
693            return path_lower.contains("/benches/") || path_lower.contains("/bench/");
694        }
695        self.name.to_lowercase().contains("::bench_")
696    }
697}
698
699#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
700pub struct MirPackage {
701    pub crate_name: String,
702    pub crate_root: String,
703    pub functions: Vec<MirFunction>,
704}
705
706// ============================================================================
707// Source-Level Analysis Infrastructure (Tier 2)
708// ============================================================================
709
710/// Represents parsed source code for a Rust file
711#[derive(Clone, Debug)]
712pub struct SourceFile {
713    pub path: PathBuf,
714    pub content: String,
715    pub syntax_tree: Option<syn::File>,
716}
717
718impl SourceFile {
719    pub fn from_path(path: impl AsRef<Path>) -> Result<Self> {
720        let path = path.as_ref().to_path_buf();
721        let content = fs::read_to_string(&path)
722            .with_context(|| format!("Failed to read source file: {}", path.display()))?;
723
724        let syntax_tree = syn::parse_file(&content).ok();
725
726        Ok(Self {
727            path,
728            content,
729            syntax_tree,
730        })
731    }
732
733    /// Get all source files in a crate recursively
734    pub fn collect_crate_sources(crate_root: impl AsRef<Path>) -> Result<Vec<Self>> {
735        let mut sources = Vec::new();
736        let crate_root = crate_root.as_ref();
737
738        for entry in WalkDir::new(crate_root)
739            .into_iter()
740            .filter_entry(|e| {
741                // Skip target, .git, and hidden directories
742                let file_name = e.file_name().to_string_lossy();
743                !file_name.starts_with('.') && file_name != "target"
744            })
745            .filter_map(|e| e.ok())
746        {
747            if entry.file_type().is_file() {
748                if let Some(ext) = entry.path().extension() {
749                    if ext == "rs" {
750                        if let Ok(source) = Self::from_path(entry.path()) {
751                            sources.push(source);
752                        }
753                    }
754                }
755            }
756        }
757
758        Ok(sources)
759    }
760}
761
762/// Package with both MIR and source-level information
763#[derive(Clone, Debug)]
764pub struct EnrichedPackage {
765    pub mir: MirPackage,
766    pub sources: Vec<SourceFile>,
767}
768
769impl EnrichedPackage {
770    pub fn new(mir: MirPackage, crate_root: impl AsRef<Path>) -> Result<Self> {
771        let sources = SourceFile::collect_crate_sources(crate_root)?;
772        Ok(Self { mir, sources })
773    }
774}
775
776// ============================================================================
777// End Source-Level Analysis Infrastructure
778// ============================================================================
779
780#[derive(Clone, Debug)]
781pub struct ExtractionArtifacts {
782    pub mir: MirPackage,
783    #[cfg(feature = "hir-driver")]
784    pub hir: Option<HirPackage>,
785}
786
787#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
788pub struct AnalysisResult {
789    pub findings: Vec<Finding>,
790    pub rules: Vec<RuleMetadata>,
791}
792
793pub trait Rule: Send + Sync {
794    fn metadata(&self) -> &RuleMetadata;
795
796    /// Evaluate this rule against a MIR package.
797    ///
798    /// The optional `inter_analysis` parameter provides shared interprocedural
799    /// analysis (call graph, function summaries) for rules that need cross-function
800    /// taint tracking. Rules that don't need it should ignore the parameter.
801    fn evaluate(
802        &self,
803        package: &MirPackage,
804        inter_analysis: Option<&interprocedural::InterProceduralAnalysis>,
805    ) -> Vec<Finding>;
806
807    fn cache_key(&self) -> String {
808        serde_json::to_string(self.metadata()).unwrap_or_default()
809    }
810}
811
812fn collect_matches(lines: &[String], patterns: &[&str]) -> Vec<String> {
813    lines
814        .iter()
815        .filter(|line| patterns.iter().any(|needle| line.contains(needle)))
816        .map(|line| line.trim().to_string())
817        .collect()
818}
819
820#[allow(dead_code)]
821fn collect_case_insensitive_matches(lines: &[String], patterns: &[&str]) -> Vec<String> {
822    let lowered_patterns: Vec<String> = patterns.iter().map(|p| p.to_lowercase()).collect();
823    lines
824        .iter()
825        .filter_map(|line| {
826            let lower = line.to_lowercase();
827            if lowered_patterns.iter().any(|needle| lower.contains(needle)) {
828                Some(line.trim().to_string())
829            } else {
830                None
831            }
832        })
833        .collect()
834}
835
836fn extract_octal_literals(line: &str) -> Vec<u32> {
837    let mut values = Vec::new();
838    let mut search_start = 0;
839
840    while let Some(relative_idx) = line[search_start..].find("0o") {
841        let idx = search_start + relative_idx + 2;
842        let remainder = &line[idx..];
843        let mut digits = String::new();
844        let mut consumed = 0;
845
846        for (byte_idx, ch) in remainder.char_indices() {
847            match ch {
848                '0'..='7' => {
849                    digits.push(ch);
850                    consumed = byte_idx + ch.len_utf8();
851                }
852                '_' => {
853                    consumed = byte_idx + ch.len_utf8();
854                }
855                _ => break,
856            }
857        }
858
859        if !digits.is_empty() {
860            if let Ok(value) = u32::from_str_radix(&digits, 8) {
861                values.push(value);
862            }
863        }
864
865        search_start = idx + consumed + 1;
866    }
867
868    values
869}
870
871fn line_has_world_writable_mode(line: &str) -> bool {
872    let contains_mode_call = [
873        "set_mode(",
874        ".mode(",
875        "::mode(",
876        "from_mode(",
877        "::from_mode(",
878    ]
879    .iter()
880    .any(|pattern| line.contains(pattern));
881
882    if !contains_mode_call && !line.contains("GENERIC_ALL") {
883        return false;
884    }
885
886    if line.contains("GENERIC_ALL") {
887        return true;
888    }
889
890    extract_octal_literals(line)
891        .into_iter()
892        .any(|value| (value & 0o022) != 0)
893}
894
895// =============================================================================
896// LEGACY RULE IMPLEMENTATIONS
897// These rules have been migrated to the rules/ module but kept here temporarily
898// to avoid breaking changes. They will be removed in a future cleanup.
899// =============================================================================
900#[allow(dead_code)]
901fn line_contains_md5_usage(line: &str) -> bool {
902    let lower = line.to_lowercase();
903    let mut search_start = 0;
904
905    while let Some(relative_idx) = lower[search_start..].find("md5") {
906        let idx = search_start + relative_idx;
907
908        let mut before_chars = lower[..idx].chars().rev().skip_while(|c| c.is_whitespace());
909        let mut after_chars = lower[idx + 3..].chars().skip_while(|c| c.is_whitespace());
910
911        let after_matches = matches!(
912            (after_chars.next(), after_chars.next()),
913            (Some(':'), Some(':'))
914        );
915
916        let before_first = before_chars.next();
917        let before_second = before_chars.next();
918        let before_matches = matches!((before_first, before_second), (Some(':'), Some(':')));
919
920        if before_matches || after_matches {
921            return true;
922        }
923
924        search_start = idx + 3;
925    }
926
927    false
928}
929
930fn line_contains_sha1_usage(line: &str) -> bool {
931    let lower = line.to_lowercase();
932    lower.contains("sha1::") || lower.contains("::sha1")
933}
934
935fn line_contains_weak_hash_extended(line: &str) -> bool {
936    let lower = line.to_lowercase();
937
938    // Skip const string assignments and hex dumps entirely
939    // This catches MIR patterns like: _1 = [const "error message with adler32 or crc"]
940    // These are often error messages or documentation, not actual weak hash usage
941    if lower.contains("= [const \"") || lower.contains("const \"") {
942        return false;
943    }
944    // Skip hex dumps (MIR allocator debug output)
945    if lower.starts_with("0x") || (lower.contains("0x") && lower.contains("│")) {
946        return false;
947    }
948
949    // RIPEMD family (all variants are deprecated)
950    if lower.contains("ripemd") {
951        if lower.contains("ripemd::")
952            || lower.contains("::ripemd")
953            || lower.contains("ripemd128")
954            || lower.contains("ripemd160")
955            || lower.contains("ripemd256")
956            || lower.contains("ripemd320")
957        {
958            return true;
959        }
960    }
961
962    // CRC family (non-cryptographic checksums)
963    if lower.contains("crc") {
964        // Be specific to avoid false positives on words containing "crc"
965        if lower.contains("crc::")
966            || lower.contains("::crc")
967            || lower.contains("crc32")
968            || lower.contains("crc_32")
969            || lower.contains("crc16")
970            || lower.contains("crc_16")
971            || lower.contains("crc64")
972            || lower.contains("crc_64")
973        {
974            return true;
975        }
976    }
977
978    // Adler32 (non-cryptographic checksum)
979    if lower.contains("adler")
980        && (lower.contains("adler::") || lower.contains("::adler") || lower.contains("adler32"))
981    {
982        return true;
983    }
984
985    false
986}
987
988fn looks_like_null_pointer_transmute(line: &str) -> bool {
989    let lower = line.to_lowercase();
990
991    // Must contain transmute
992    if !lower.contains("transmute") {
993        return false;
994    }
995
996    // Skip internal compiler transmute casts (shown as "(Transmute)")
997    // These are type conversions like Unique<T> → NonNull<T>, not user-written transmute calls
998    if lower.contains("(transmute)") {
999        return false;
1000    }
1001
1002    // Pattern 1: transmute(0) or transmute(0usize) - transmuting zero
1003    if lower.contains("transmute(const 0") || lower.contains("transmute(0_") {
1004        return true;
1005    }
1006
1007    // Pattern 2: transmute(std::ptr::null()) or transmute(std::ptr::null_mut())
1008    if (lower.contains("std::ptr::null") || lower.contains("::ptr::null"))
1009        && lower.contains("transmute")
1010    {
1011        return true;
1012    }
1013
1014    // Pattern 3: Look for transmute in context with "null" keyword
1015    if lower.contains("null") && lower.contains("transmute") {
1016        return true;
1017    }
1018
1019    false
1020}
1021
1022fn looks_like_zst_pointer_arithmetic(line: &str) -> bool {
1023    let lower = line.to_lowercase();
1024
1025    // Pointer arithmetic methods to detect
1026    let arithmetic_methods = [
1027        "offset",
1028        "add",
1029        "sub",
1030        "wrapping_offset",
1031        "wrapping_add",
1032        "wrapping_sub",
1033        "offset_from",
1034    ];
1035
1036    // Must have pointer arithmetic
1037    let has_arithmetic = arithmetic_methods
1038        .iter()
1039        .any(|method| lower.contains(method));
1040    if !has_arithmetic {
1041        return false;
1042    }
1043
1044    // Enhanced zero-sized type detection
1045
1046    // 1. Unit type: *const () or *mut ()
1047    if (lower.contains("*const ()") || lower.contains("*mut ()")) && has_arithmetic {
1048        return true;
1049    }
1050
1051    // 2. PhantomData (common marker types)
1052    if lower.contains("phantomdata") && has_arithmetic {
1053        return true;
1054    }
1055
1056    // 3. PhantomPinned (another std marker type)
1057    if lower.contains("phantompinned") && has_arithmetic {
1058        return true;
1059    }
1060
1061    // 4. Full paths to marker types
1062    if (lower.contains("std::marker::phantomdata")
1063        || lower.contains("::marker::phantomdata")
1064        || lower.contains("core::marker::phantomdata"))
1065        && has_arithmetic
1066    {
1067        return true;
1068    }
1069
1070    if (lower.contains("std::marker::phantompinned")
1071        || lower.contains("::marker::phantompinned")
1072        || lower.contains("core::marker::phantompinned"))
1073        && has_arithmetic
1074    {
1075        return true;
1076    }
1077
1078    // 5. Empty tuple/array patterns
1079    if (lower.contains("*const [(); 0]") || lower.contains("*mut [(); 0]")) && has_arithmetic {
1080        return true;
1081    }
1082
1083    // 6. Check for explicit size annotations in comments or variable names
1084    // Sometimes ZST status is indicated in naming: ptr_zst, zst_ptr, etc.
1085    if (lower.contains("_zst") || lower.contains("zst_")) && has_arithmetic {
1086        return true;
1087    }
1088
1089    // 7. Heuristic: Detect custom empty types by naming convention
1090    // Types with names like "EmptyStruct", "EmptyEnum", "UnitType", etc.
1091    // These are commonly user-defined ZSTs
1092    let empty_type_patterns = [
1093        "emptystruct",
1094        "emptyenum",
1095        "emptytype",
1096        "empty_struct",
1097        "empty_enum",
1098        "empty_type",
1099        "unitstruct",
1100        "unitenum",
1101        "unittype",
1102        "unit_struct",
1103        "unit_enum",
1104        "unit_type",
1105        "markerstruct",
1106        "markerenum",
1107        "markertype",
1108        "marker_struct",
1109        "marker_enum",
1110        "marker_type",
1111        "zststruct",
1112        "zstenum",
1113        "zsttype",
1114        "zst_struct",
1115        "zst_enum",
1116        "zst_type",
1117    ];
1118    if empty_type_patterns.iter().any(|p| lower.contains(p)) && has_arithmetic {
1119        return true;
1120    }
1121
1122    // 8. Detect pointer types in impl blocks: <impl *const SomeType>::add(...)
1123    // Extract the type from the pattern and check if it looks like a ZST
1124    // Pattern: const_ptr::<impl *const TypeName>::method or const_ptr::<impl *mut TypeName>::method
1125    if let Some(impl_start) = lower.find("<impl *const ") {
1126        let type_start = impl_start + "<impl *const ".len();
1127        if let Some(impl_end) = lower[type_start..].find('>') {
1128            let type_name = &lower[type_start..type_start + impl_end];
1129            // Check if the extracted type name matches any ZST naming patterns
1130            if type_name.contains("empty")
1131                || type_name.contains("unit")
1132                || type_name.contains("marker")
1133                || type_name.contains("zst")
1134            {
1135                return true;
1136            }
1137        }
1138    }
1139    if let Some(impl_start) = lower.find("<impl *mut ") {
1140        let type_start = impl_start + "<impl *mut ".len();
1141        if let Some(impl_end) = lower[type_start..].find('>') {
1142            let type_name = &lower[type_start..type_start + impl_end];
1143            if type_name.contains("empty")
1144                || type_name.contains("unit")
1145                || type_name.contains("marker")
1146                || type_name.contains("zst")
1147            {
1148                return true;
1149            }
1150        }
1151    }
1152
1153    false
1154}
1155
1156#[allow(dead_code)]
1157fn looks_like_cleartext_env_var(line: &str) -> bool {
1158    let lower = line.to_lowercase();
1159
1160    // Must contain set_var function call (various forms in MIR)
1161    if !lower.contains("set_var") {
1162        return false;
1163    }
1164
1165    // Must look like an environment variable setting
1166    // In MIR this appears as: std::env::set_var::<&str, &str>
1167    if !lower.contains("std::env") && !lower.contains("::env::") {
1168        return false;
1169    }
1170
1171    // Sensitive environment variable name patterns
1172    let sensitive_names = [
1173        "password",
1174        "passwd",
1175        "pwd",
1176        "secret",
1177        "token",
1178        "api_key",
1179        "apikey",
1180        "auth",
1181        "private_key",
1182        "privatekey",
1183        "jwt",
1184        "access_token",
1185        "refresh_token",
1186        "bearer",
1187        "credential",
1188        "db_password",
1189        "database_password",
1190    ];
1191
1192    // Check if any sensitive name appears in the line or nearby const string
1193    sensitive_names.iter().any(|name| lower.contains(name))
1194}
1195
1196fn command_rule_should_skip(function: &MirFunction, package: &MirPackage) -> bool {
1197    if package.crate_name == "mir-extractor" {
1198        matches!(
1199            function.name.as_str(),
1200            "detect_rustc_version"
1201                | "run_cargo_rustc"
1202                | "discover_rustc_targets"
1203                | "detect_crate_name"
1204        )
1205    } else {
1206        false
1207    }
1208}
1209
1210fn text_contains_word_case_insensitive(text: &str, needle: &str) -> bool {
1211    if needle.is_empty() {
1212        return false;
1213    }
1214
1215    let target = needle.to_lowercase();
1216    text.to_lowercase()
1217        .split(|c: char| !(c.is_alphanumeric() || c == '_'))
1218        .any(|token| token == target)
1219}
1220
1221fn strip_comments(line: &str, in_block_comment: &mut bool) -> String {
1222    let mut result = String::with_capacity(line.len());
1223    let bytes = line.as_bytes();
1224    let mut idx = 0usize;
1225
1226    while idx < bytes.len() {
1227        if *in_block_comment {
1228            if bytes[idx] == b'*' && idx + 1 < bytes.len() && bytes[idx + 1] == b'/' {
1229                *in_block_comment = false;
1230                idx += 2;
1231            } else {
1232                idx += 1;
1233            }
1234            continue;
1235        }
1236
1237        if bytes[idx] == b'/' && idx + 1 < bytes.len() {
1238            match bytes[idx + 1] {
1239                b'/' => break,
1240                b'*' => {
1241                    *in_block_comment = true;
1242                    idx += 2;
1243                    continue;
1244                }
1245                _ => {}
1246            }
1247        }
1248
1249        result.push(bytes[idx] as char);
1250        idx += 1;
1251    }
1252    result
1253}
1254
1255#[derive(Debug, Clone, Deserialize)]
1256pub struct SuppressionRule {
1257    pub rule_id: String,
1258    pub file: Option<String>,     // Glob pattern
1259    pub function: Option<String>, // Function name pattern
1260    pub reason: Option<String>,
1261}
1262
1263pub struct RuleEngine {
1264    rules: Vec<Box<dyn Rule>>,
1265    pub suppressions: Vec<SuppressionRule>,
1266    pub ipa_config: interprocedural::IpaConfig,
1267}
1268
1269impl RuleEngine {
1270    pub fn new() -> Self {
1271        Self {
1272            rules: Vec::new(),
1273            suppressions: Vec::new(),
1274            ipa_config: interprocedural::IpaConfig::default(),
1275        }
1276    }
1277
1278    pub fn with_builtin_rules() -> Self {
1279        let mut engine = RuleEngine::new();
1280        register_builtin_rules(&mut engine);
1281        engine
1282    }
1283
1284    /// Set the inter-procedural analysis configuration
1285    pub fn set_ipa_config(&mut self, config: interprocedural::IpaConfig) {
1286        self.ipa_config = config;
1287    }
1288
1289    pub fn register_rule(&mut self, rule: Box<dyn Rule>) {
1290        self.rules.push(rule);
1291    }
1292
1293    pub fn run(&self, package: &MirPackage) -> AnalysisResult {
1294        let mut findings = Vec::new();
1295        let mut rules = Vec::new();
1296
1297        memory_profiler::checkpoint_with_context("RuleEngine::run start", &package.crate_name);
1298
1299        // Create shared interprocedural analysis once for all rules that need it.
1300        // This avoids creating 5+ separate instances (one per injection rule),
1301        // reducing memory usage significantly for large codebases.
1302        //
1303        // Memory usage is bounded by the limits in IpaConfig.
1304        //
1305        // For crates exceeding the threshold, IPA is skipped but intra-procedural
1306        // analysis still runs for all rules.
1307
1308        let inter_analysis = if package.functions.len() <= self.ipa_config.max_functions_for_ipa {
1309            memory_profiler::checkpoint("IPA: Starting analysis");
1310            let _scope = memory_profiler::MemoryScope::new("IPA analysis");
1311
1312            interprocedural::InterProceduralAnalysis::with_config(package, self.ipa_config.clone())
1313                .and_then(|mut analysis| {
1314                    memory_profiler::checkpoint("IPA: Call graph built");
1315                    analysis.analyze(package)?;
1316                    memory_profiler::checkpoint("IPA: Analysis complete");
1317                    Ok(analysis)
1318                })
1319                .ok()
1320        } else {
1321            eprintln!(
1322                "Note: Skipping interprocedural analysis for {} ({} functions > {} threshold)",
1323                package.crate_name,
1324                package.functions.len(),
1325                self.ipa_config.max_functions_for_ipa
1326            );
1327            None
1328        };
1329
1330        memory_profiler::checkpoint("Starting rule evaluation");
1331
1332        for (i, rule) in self.rules.iter().enumerate() {
1333            let metadata = rule.metadata().clone();
1334            let rule_id = metadata.id.clone();
1335            rules.push(metadata.clone());
1336
1337            // Profile EVERY rule to catch memory explosions
1338            if memory_profiler::is_enabled() {
1339                memory_profiler::checkpoint_with_context(
1340                    "Rule BEFORE",
1341                    &format!("{}/{} {}", i, self.rules.len(), rule_id),
1342                );
1343            }
1344
1345            let new_findings = rule.evaluate(package, inter_analysis.as_ref());
1346
1347            if memory_profiler::is_enabled() {
1348                memory_profiler::checkpoint_with_context(
1349                    "Rule AFTER",
1350                    &format!(
1351                        "{}/{} {} (+{} findings)",
1352                        i,
1353                        self.rules.len(),
1354                        rule_id,
1355                        new_findings.len()
1356                    ),
1357                );
1358            }
1359
1360            findings.extend(new_findings);
1361        }
1362
1363        memory_profiler::checkpoint("RuleEngine::run complete");
1364
1365        AnalysisResult { findings, rules }
1366    }
1367
1368    pub fn rule_metadata(&self) -> Vec<RuleMetadata> {
1369        self.rules
1370            .iter()
1371            .map(|rule| rule.metadata().clone())
1372            .collect()
1373    }
1374
1375    pub fn cache_fingerprint(&self) -> String {
1376        let mut hasher = Sha256::new();
1377        for rule in &self.rules {
1378            hasher.update(rule.cache_key().as_bytes());
1379            hasher.update(&[0u8]);
1380        }
1381        hex::encode(hasher.finalize())
1382    }
1383
1384    pub fn load_rulepack<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
1385        let path = path.as_ref();
1386        let mut file =
1387            File::open(path).with_context(|| format!("open rulepack {}", path.display()))?;
1388        let mut contents = Vec::new();
1389        file.read_to_end(&mut contents)?;
1390        self.load_rulepack_from_reader(&contents[..], &path.display().to_string())
1391    }
1392
1393    pub fn load_rulepack_from_reader<R: Read>(
1394        &mut self,
1395        mut reader: R,
1396        origin: &str,
1397    ) -> Result<()> {
1398        let mut buf = Vec::new();
1399        reader.read_to_end(&mut buf)?;
1400        let document: RulePackDocument =
1401            serde_yaml::from_slice(&buf).context("parse rulepack YAML")?;
1402
1403        for rule_config in document.rules {
1404            let declarative = DeclarativeRule::new(rule_config, origin.to_string());
1405            self.register_rule(Box::new(declarative));
1406        }
1407
1408        self.suppressions.extend(document.suppressions);
1409
1410        Ok(())
1411    }
1412
1413    pub fn load_wasm_module<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
1414        let path = path.as_ref();
1415        fs::read(path).with_context(|| format!("read wasm module {}", path.display()))?;
1416        let module = path.to_string_lossy().to_string();
1417        self.register_rule(Box::new(WasmRulePlaceholder::from_path(path, module)));
1418        Ok(())
1419    }
1420}
1421
1422#[derive(Debug, Deserialize, Default)]
1423struct RulePackDocument {
1424    #[serde(default)]
1425    rules: Vec<RulePackRuleConfig>,
1426    #[serde(default)]
1427    suppressions: Vec<SuppressionRule>,
1428}
1429
1430#[derive(Debug, Deserialize)]
1431struct RulePackRuleConfig {
1432    id: String,
1433    #[serde(default)]
1434    name: Option<String>,
1435    #[serde(default)]
1436    short_description: Option<String>,
1437    #[serde(default)]
1438    full_description: Option<String>,
1439    #[serde(default)]
1440    help_uri: Option<String>,
1441    #[serde(default)]
1442    severity: Option<Severity>,
1443    #[serde(default)]
1444    severity_override: Option<Severity>,
1445    #[serde(default)]
1446    message: Option<String>,
1447    #[serde(default)]
1448    function_name_contains_any: Vec<String>,
1449    #[serde(default)]
1450    function_name_contains_all: Vec<String>,
1451    #[serde(default)]
1452    body_contains_any: Vec<String>,
1453    #[serde(default)]
1454    body_contains_all: Vec<String>,
1455}
1456
1457// =============================================================================
1458// Symbol constants for rule detection (used by tests)
1459// =============================================================================
1460const VEC_SET_LEN_SYMBOL: &str = concat!("Vec", "::", "set", "_len");
1461const MAYBE_UNINIT_TYPE_SYMBOL: &str = concat!("Maybe", "Uninit");
1462const MAYBE_UNINIT_ASSUME_INIT_SYMBOL: &str = concat!("assume", "_init");
1463const MEM_MODULE_SYMBOL: &str = concat!("mem");
1464const MEM_UNINITIALIZED_SYMBOL: &str = concat!("uninitialized");
1465const MEM_ZEROED_SYMBOL: &str = concat!("zeroed");
1466const DANGER_ACCEPT_INVALID_CERTS_SYMBOL: &str = concat!("danger", "_accept", "_invalid", "_certs");
1467const DANGER_ACCEPT_INVALID_HOSTNAMES_SYMBOL: &str =
1468    concat!("danger", "_accept", "_invalid", "_hostnames");
1469
1470struct DeclarativeRule {
1471    metadata: RuleMetadata,
1472    message: Option<String>,
1473    severity_override: Option<Severity>,
1474    function_name_contains_any: Vec<String>,
1475    function_name_contains_all: Vec<String>,
1476    body_contains_any: Vec<String>,
1477    body_contains_all: Vec<String>,
1478}
1479
1480impl DeclarativeRule {
1481    fn new(config: RulePackRuleConfig, origin: String) -> Self {
1482        let default_name = config.id.clone();
1483        let short_description = config
1484            .short_description
1485            .clone()
1486            .unwrap_or_else(|| config.id.clone());
1487        let full_description = config
1488            .full_description
1489            .clone()
1490            .unwrap_or_else(|| format!("Rule {} loaded from {}", config.id, origin));
1491
1492        let metadata = RuleMetadata {
1493            id: config.id,
1494            name: config.name.unwrap_or(default_name),
1495            short_description,
1496            full_description,
1497            help_uri: config.help_uri,
1498            default_severity: config.severity.unwrap_or(Severity::Medium),
1499            origin: RuleOrigin::RulePack { source: origin },
1500            cwe_ids: Vec::new(),
1501            fix_suggestion: None,
1502            exploitability: Exploitability::default(),
1503        };
1504
1505        Self {
1506            metadata,
1507            message: config.message,
1508            severity_override: config.severity_override,
1509            function_name_contains_any: config.function_name_contains_any,
1510            function_name_contains_all: config.function_name_contains_all,
1511            body_contains_any: config.body_contains_any,
1512            body_contains_all: config.body_contains_all,
1513        }
1514    }
1515
1516    fn matches(&self, function: &MirFunction) -> bool {
1517        if !self.function_name_contains_any.is_empty()
1518            && !self
1519                .function_name_contains_any
1520                .iter()
1521                .any(|needle| function.name.contains(needle))
1522        {
1523            return false;
1524        }
1525
1526        if !self.function_name_contains_all.is_empty()
1527            && !self
1528                .function_name_contains_all
1529                .iter()
1530                .all(|needle| function.name.contains(needle))
1531        {
1532            return false;
1533        }
1534
1535        if !self.body_contains_any.is_empty()
1536            && !self
1537                .body_contains_any
1538                .iter()
1539                .any(|needle| function.body.iter().any(|line| line.contains(needle)))
1540        {
1541            return false;
1542        }
1543
1544        if !self.body_contains_all.is_empty()
1545            && !self
1546                .body_contains_all
1547                .iter()
1548                .all(|needle| function.body.iter().any(|line| line.contains(needle)))
1549        {
1550            return false;
1551        }
1552
1553        true
1554    }
1555
1556    fn gather_evidence(&self, function: &MirFunction) -> Vec<String> {
1557        let mut evidence = Vec::new();
1558
1559        for pattern in self
1560            .body_contains_any
1561            .iter()
1562            .chain(self.body_contains_all.iter())
1563        {
1564            if let Some(line) = function
1565                .body
1566                .iter()
1567                .find(|body_line| body_line.contains(pattern))
1568            {
1569                evidence.push(format!("matched `{pattern}`: {}", line.trim()));
1570            }
1571        }
1572
1573        if evidence.is_empty() {
1574            evidence.push("Rule conditions satisfied".to_string());
1575        }
1576
1577        evidence
1578    }
1579}
1580
1581impl Rule for DeclarativeRule {
1582    fn cache_key(&self) -> String {
1583        let payload = json!({
1584            "metadata": &self.metadata,
1585            "message": &self.message,
1586            "severity_override": &self.severity_override,
1587            "function_name_contains_any": &self.function_name_contains_any,
1588            "function_name_contains_all": &self.function_name_contains_all,
1589            "body_contains_any": &self.body_contains_any,
1590            "body_contains_all": &self.body_contains_all,
1591        });
1592        serde_json::to_string(&payload).unwrap_or_default()
1593    }
1594
1595    fn metadata(&self) -> &RuleMetadata {
1596        &self.metadata
1597    }
1598
1599    fn evaluate(
1600        &self,
1601        package: &MirPackage,
1602        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1603    ) -> Vec<Finding> {
1604        let mut findings = Vec::new();
1605        for function in &package.functions {
1606            if !self.matches(function) {
1607                continue;
1608            }
1609
1610            let severity = self
1611                .severity_override
1612                .unwrap_or(self.metadata.default_severity);
1613            let message = self.message.clone().unwrap_or_else(|| {
1614                format!(
1615                    "Rule {} triggered for `{}`",
1616                    self.metadata.id, function.name
1617                )
1618            });
1619            let evidence = self.gather_evidence(function);
1620            let exploitability = self.metadata.exploitability;
1621
1622            findings.push(Finding {
1623                rule_id: self.metadata.id.clone(),
1624                rule_name: self.metadata.name.clone(),
1625                severity,
1626                confidence: Confidence::Medium,
1627                message,
1628                function: function.name.clone(),
1629                function_signature: function.signature.clone(),
1630                evidence,
1631                span: function.span.clone(),
1632                cwe_ids: Vec::new(),
1633                fix_suggestion: None,
1634                code_snippet: None,
1635                exploitability_score: exploitability.score(),
1636                exploitability,
1637                ..Default::default()
1638            });
1639        }
1640
1641        findings
1642    }
1643}
1644
1645struct WasmRulePlaceholder {
1646    metadata: RuleMetadata,
1647}
1648
1649impl WasmRulePlaceholder {
1650    fn from_path(path: &Path, module_utf8: String) -> Self {
1651        let stem = path
1652            .file_stem()
1653            .and_then(|os| os.to_str())
1654            .unwrap_or("wasm-module");
1655
1656        let sanitized = module_utf8
1657            .replace('\\', "::")
1658            .replace('/', "::")
1659            .replace(':', "-");
1660
1661        let metadata = RuleMetadata {
1662            id: format!("WASM-STUB-{}", sanitized),
1663            name: format!("wasm::{stem}"),
1664            short_description: format!("Placeholder rule metadata for {stem}.wasm"),
1665            full_description: format!(
1666                "Rust-cola detected WASM module '{}' but execution is not implemented yet. This placeholder keeps metadata discoverable for future analysis runs.",
1667                module_utf8
1668            ),
1669            help_uri: None,
1670            default_severity: Severity::Low,
1671            origin: RuleOrigin::Wasm { module: module_utf8 },
1672            cwe_ids: Vec::new(),
1673            fix_suggestion: None,
1674            exploitability: Exploitability::default(),
1675        };
1676
1677        Self { metadata }
1678    }
1679}
1680
1681impl Rule for WasmRulePlaceholder {
1682    fn metadata(&self) -> &RuleMetadata {
1683        &self.metadata
1684    }
1685
1686    fn evaluate(
1687        &self,
1688        _package: &MirPackage,
1689        _inter_analysis: Option<&crate::interprocedural::InterProceduralAnalysis>,
1690    ) -> Vec<Finding> {
1691        Vec::new()
1692    }
1693}
1694
1695// =============================================================================
1696// RUSTCOLA084: TLS verification disabled in custom clients
1697// =============================================================================
1698
1699/// Detects disabled TLS certificate verification across multiple HTTP/TLS libraries.
1700/// This extends RUSTCOLA012 (reqwest) to cover native-tls, rustls, hyper-tls, and others.
1701///
1702/// Detects:
1703/// - native-tls: danger_accept_invalid_certs(true), danger_accept_invalid_hostnames(true)
1704/// - rustls: .dangerous() + custom verifier, DangerousClientConfigBuilder
1705/// - reqwest: danger_accept_invalid_certs(true), danger_accept_invalid_hostnames(true)
1706/// - hyper-tls: native-tls connector with verification disabled
1707fn register_builtin_rules(engine: &mut RuleEngine) {
1708    // Register rules from categorized modules
1709    rules::register_crypto_rules(engine);
1710    rules::register_memory_rules(engine);
1711    rules::register_concurrency_rules(engine);
1712    rules::register_ffi_rules(engine);
1713    rules::register_input_rules(engine);
1714    rules::register_resource_rules(engine);
1715    rules::register_code_quality_rules(engine);
1716    rules::register_web_rules(engine);
1717    rules::register_supply_chain_rules(engine);
1718    rules::register_injection_rules(engine);
1719    // Advanced dataflow-based rules (migrated from mir-advanced-rules)
1720    rules::register_advanced_memory_rules(engine);
1721    rules::register_advanced_input_rules(engine);
1722    rules::register_advanced_async_rules(engine);
1723}
1724
1725#[derive(Clone, Debug)]
1726pub struct CacheConfig {
1727    pub enabled: bool,
1728    pub directory: PathBuf,
1729    pub clear: bool,
1730}
1731
1732#[cfg(feature = "hir-driver")]
1733#[derive(Clone, Debug)]
1734pub struct HirOptions {
1735    pub capture: bool,
1736    pub cache: bool,
1737}
1738
1739#[cfg(feature = "hir-driver")]
1740impl Default for HirOptions {
1741    fn default() -> Self {
1742        Self {
1743            capture: true,
1744            cache: true,
1745        }
1746    }
1747}
1748
1749#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
1750pub struct FunctionFingerprint {
1751    pub name: String,
1752    pub signature: String,
1753    pub hash: String,
1754    #[serde(default, skip_serializing_if = "Option::is_none")]
1755    pub hir_def_path_hash: Option<String>,
1756}
1757
1758#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
1759pub struct CacheMetadata {
1760    pub crate_fingerprint: String,
1761    pub created_timestamp: u64,
1762    pub function_fingerprints: Vec<FunctionFingerprint>,
1763}
1764
1765#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
1766struct CachedAnalysisEntry {
1767    engine_fingerprint: String,
1768    findings: Vec<Finding>,
1769    rules: Vec<RuleMetadata>,
1770}
1771
1772#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
1773pub enum CacheMissReason {
1774    NotFound,
1775    Cleared,
1776    Invalid(String),
1777}
1778
1779#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
1780pub enum CacheStatus {
1781    Hit(CacheMetadata),
1782    Miss {
1783        metadata: CacheMetadata,
1784        reason: CacheMissReason,
1785    },
1786    Disabled,
1787}
1788
1789#[derive(Serialize, Deserialize)]
1790struct CacheEnvelope {
1791    version: u32,
1792    crate_fingerprint: String,
1793    rustc_version: String,
1794    created_timestamp: u64,
1795    function_fingerprints: Vec<FunctionFingerprint>,
1796    #[serde(default)]
1797    analysis_cache: Vec<CachedAnalysisEntry>,
1798    mir: MirPackage,
1799    #[cfg(feature = "hir-driver")]
1800    #[serde(default, skip_serializing_if = "Option::is_none")]
1801    hir: Option<HirPackage>,
1802}
1803
1804const CACHE_VERSION: u32 = 3;
1805
1806pub fn extract_with_cache(
1807    crate_path: &Path,
1808    cache: &CacheConfig,
1809) -> Result<(MirPackage, CacheStatus)> {
1810    #[cfg(feature = "hir-driver")]
1811    let hir_options = HirOptions::default();
1812
1813    let (artifacts, status) = extract_artifacts_with_cache(
1814        crate_path,
1815        cache,
1816        #[cfg(feature = "hir-driver")]
1817        &hir_options,
1818        || {
1819            extract_artifacts(
1820                crate_path,
1821                #[cfg(feature = "hir-driver")]
1822                &hir_options,
1823            )
1824        },
1825    )?;
1826    Ok((artifacts.mir, status))
1827}
1828
1829#[cfg(feature = "hir-driver")]
1830pub fn extract_with_cache_full(
1831    crate_path: &Path,
1832    cache: &CacheConfig,
1833) -> Result<(ExtractionArtifacts, CacheStatus)> {
1834    let options = HirOptions::default();
1835    extract_with_cache_full_opts(crate_path, cache, &options)
1836}
1837
1838#[cfg(feature = "hir-driver")]
1839pub fn extract_with_cache_full_opts(
1840    crate_path: &Path,
1841    cache: &CacheConfig,
1842    hir_options: &HirOptions,
1843) -> Result<(ExtractionArtifacts, CacheStatus)> {
1844    extract_artifacts_with_cache(crate_path, cache, hir_options, || {
1845        extract_artifacts(crate_path, hir_options)
1846    })
1847}
1848
1849pub fn extract_artifacts_with_cache<F>(
1850    crate_path: &Path,
1851    cache: &CacheConfig,
1852    #[cfg(feature = "hir-driver")] hir_options: &HirOptions,
1853    extractor: F,
1854) -> Result<(ExtractionArtifacts, CacheStatus)>
1855where
1856    F: FnOnce() -> Result<ExtractionArtifacts>,
1857{
1858    if !cache.enabled {
1859        let package = extractor()?;
1860        return Ok((package, CacheStatus::Disabled));
1861    }
1862
1863    fs::create_dir_all(&cache.directory).context("create cache directory")?;
1864    let canonical_crate =
1865        fs::canonicalize(crate_path).context("canonicalize crate path for cache")?;
1866    let rustc_version = detect_rustc_version();
1867    let crate_fingerprint = compute_crate_fingerprint(&canonical_crate, &rustc_version)?;
1868    let cache_file = cache
1869        .directory
1870        .join(format!("{crate_fingerprint}.cola-cache.json"));
1871
1872    let mut miss_reason = CacheMissReason::NotFound;
1873
1874    if cache.clear && cache_file.exists() {
1875        fs::remove_file(&cache_file).ok();
1876        miss_reason = CacheMissReason::Cleared;
1877    }
1878
1879    if cache_file.exists() {
1880        match read_cache_envelope(&cache_file)? {
1881            Some(envelope) => {
1882                if envelope.version == CACHE_VERSION
1883                    && envelope.crate_fingerprint == crate_fingerprint
1884                    && envelope.rustc_version == rustc_version
1885                {
1886                    let metadata = CacheMetadata {
1887                        crate_fingerprint,
1888                        created_timestamp: envelope.created_timestamp,
1889                        function_fingerprints: envelope.function_fingerprints.clone(),
1890                    };
1891                    #[allow(unused_mut)]
1892                    let mut artifacts = ExtractionArtifacts {
1893                        mir: envelope.mir.clone(),
1894                        #[cfg(feature = "hir-driver")]
1895                        hir: if hir_options.cache && hir_options.capture {
1896                            envelope.hir.clone()
1897                        } else {
1898                            None
1899                        },
1900                    };
1901
1902                    #[cfg(feature = "hir-driver")]
1903                    if hir_options.capture && (!hir_options.cache || artifacts.hir.is_none()) {
1904                        match hir::capture_hir(&canonical_crate) {
1905                            Ok(fresh_hir) => {
1906                                attach_hir_metadata_to_mir(&mut artifacts.mir, &fresh_hir);
1907                                artifacts.hir = Some(fresh_hir);
1908                            }
1909                            Err(err) => {
1910                                eprintln!(
1911                                    "rust-cola: failed to refresh HIR for {}: {err:?}",
1912                                    canonical_crate.display()
1913                                );
1914                            }
1915                        }
1916                    }
1917
1918                    return Ok((artifacts, CacheStatus::Hit(metadata)));
1919                } else {
1920                    miss_reason = CacheMissReason::Invalid("fingerprint mismatch".to_string());
1921                    fs::remove_file(&cache_file).ok();
1922                }
1923            }
1924            None => {
1925                miss_reason = CacheMissReason::Invalid("corrupt cache entry".to_string());
1926                fs::remove_file(&cache_file).ok();
1927            }
1928        }
1929    }
1930
1931    let artifacts = extractor()?;
1932    let function_fingerprints = compute_function_fingerprints(&artifacts.mir);
1933    let metadata = CacheMetadata {
1934        crate_fingerprint: crate_fingerprint.clone(),
1935        created_timestamp: current_timestamp(),
1936        function_fingerprints: function_fingerprints.clone(),
1937    };
1938
1939    let envelope = CacheEnvelope {
1940        version: CACHE_VERSION,
1941        crate_fingerprint,
1942        rustc_version,
1943        created_timestamp: metadata.created_timestamp,
1944        function_fingerprints,
1945        analysis_cache: Vec::new(),
1946        mir: artifacts.mir.clone(),
1947        #[cfg(feature = "hir-driver")]
1948        hir: if hir_options.cache && hir_options.capture {
1949            artifacts.hir.clone()
1950        } else {
1951            None
1952        },
1953    };
1954
1955    if let Err(err) = write_cache_envelope(&cache_file, &envelope) {
1956        eprintln!(
1957            "rust-cola: failed to persist cache at {}: {err}",
1958            cache_file.display()
1959        );
1960    }
1961
1962    Ok((
1963        artifacts,
1964        CacheStatus::Miss {
1965            metadata,
1966            reason: miss_reason,
1967        },
1968    ))
1969}
1970
1971fn read_cache_envelope(path: &Path) -> Result<Option<CacheEnvelope>> {
1972    let data = match fs::read(path) {
1973        Ok(bytes) => bytes,
1974        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
1975        Err(err) => return Err(err).context("read cache file"),
1976    };
1977
1978    let envelope: CacheEnvelope = match serde_json::from_slice(&data) {
1979        Ok(env) => env,
1980        Err(_) => return Ok(None),
1981    };
1982
1983    Ok(Some(envelope))
1984}
1985
1986fn write_cache_envelope(path: &Path, envelope: &CacheEnvelope) -> Result<()> {
1987    if let Some(parent) = path.parent() {
1988        fs::create_dir_all(parent).context("create cache parent directory")?;
1989    }
1990    let mut file = File::create(path).context("create cache file")?;
1991    serde_json::to_writer_pretty(&mut file, envelope).context("write cache envelope")?;
1992    file.write_all(b"\n").ok();
1993    Ok(())
1994}
1995
1996fn cache_entry_path_for(cache: &CacheConfig, fingerprint: &str) -> PathBuf {
1997    cache
1998        .directory
1999        .join(format!("{fingerprint}.cola-cache.json"))
2000}
2001
2002fn cache_fingerprint_from_status(status: &CacheStatus) -> Option<&str> {
2003    match status {
2004        CacheStatus::Hit(meta) => Some(meta.crate_fingerprint.as_str()),
2005        CacheStatus::Miss { metadata, .. } => Some(metadata.crate_fingerprint.as_str()),
2006        CacheStatus::Disabled => None,
2007    }
2008}
2009
2010pub fn load_cached_analysis(
2011    cache: &CacheConfig,
2012    status: &CacheStatus,
2013    engine: &RuleEngine,
2014) -> Result<Option<AnalysisResult>> {
2015    if !cache.enabled {
2016        return Ok(None);
2017    }
2018
2019    let Some(fingerprint) = cache_fingerprint_from_status(status) else {
2020        return Ok(None);
2021    };
2022
2023    load_cached_analysis_for_fingerprint(cache, fingerprint, engine)
2024}
2025
2026pub fn store_cached_analysis(
2027    cache: &CacheConfig,
2028    status: &CacheStatus,
2029    engine: &RuleEngine,
2030    analysis: &AnalysisResult,
2031) -> Result<()> {
2032    if !cache.enabled {
2033        return Ok(());
2034    }
2035
2036    let Some(fingerprint) = cache_fingerprint_from_status(status) else {
2037        return Ok(());
2038    };
2039
2040    store_cached_analysis_for_fingerprint(cache, fingerprint, engine, analysis)
2041}
2042
2043fn load_cached_analysis_for_fingerprint(
2044    cache: &CacheConfig,
2045    fingerprint: &str,
2046    engine: &RuleEngine,
2047) -> Result<Option<AnalysisResult>> {
2048    let path = cache_entry_path_for(cache, fingerprint);
2049    let envelope = match read_cache_envelope(&path)? {
2050        Some(env) => env,
2051        None => return Ok(None),
2052    };
2053
2054    let engine_fp = engine.cache_fingerprint();
2055    if let Some(entry) = envelope
2056        .analysis_cache
2057        .iter()
2058        .find(|entry| entry.engine_fingerprint == engine_fp)
2059    {
2060        let expected_rules = engine.rule_metadata();
2061        if entry.rules != expected_rules {
2062            return Ok(None);
2063        }
2064
2065        return Ok(Some(AnalysisResult {
2066            findings: entry.findings.clone(),
2067            rules: entry.rules.clone(),
2068        }));
2069    }
2070
2071    Ok(None)
2072}
2073
2074fn store_cached_analysis_for_fingerprint(
2075    cache: &CacheConfig,
2076    fingerprint: &str,
2077    engine: &RuleEngine,
2078    analysis: &AnalysisResult,
2079) -> Result<()> {
2080    let path = cache_entry_path_for(cache, fingerprint);
2081    let mut envelope = match read_cache_envelope(&path)? {
2082        Some(env) => env,
2083        None => return Ok(()),
2084    };
2085
2086    let engine_fp = engine.cache_fingerprint();
2087    let entry = CachedAnalysisEntry {
2088        engine_fingerprint: engine_fp.clone(),
2089        findings: analysis.findings.clone(),
2090        rules: analysis.rules.clone(),
2091    };
2092
2093    if let Some(existing) = envelope
2094        .analysis_cache
2095        .iter_mut()
2096        .find(|existing| existing.engine_fingerprint == engine_fp)
2097    {
2098        *existing = entry;
2099    } else {
2100        envelope.analysis_cache.push(entry);
2101    }
2102
2103    write_cache_envelope(&path, &envelope)
2104}
2105
2106fn compute_function_fingerprints(package: &MirPackage) -> Vec<FunctionFingerprint> {
2107    package
2108        .functions
2109        .iter()
2110        .map(|function| {
2111            let mut hasher = Sha256::new();
2112            hasher.update(function.name.as_bytes());
2113            hasher.update(&[0u8]);
2114            hasher.update(function.signature.as_bytes());
2115            hasher.update(&[0u8]);
2116            if let Some(hir) = &function.hir {
2117                hasher.update(hir.def_path_hash.as_bytes());
2118                hasher.update(&[0u8]);
2119            }
2120            for line in &function.body {
2121                hasher.update(line.as_bytes());
2122                hasher.update(&[0u8]);
2123            }
2124            FunctionFingerprint {
2125                name: function.name.clone(),
2126                signature: function.signature.clone(),
2127                hash: hex::encode(hasher.finalize()),
2128                hir_def_path_hash: function.hir.as_ref().map(|hir| hir.def_path_hash.clone()),
2129            }
2130        })
2131        .collect()
2132}
2133
2134fn compute_crate_fingerprint(crate_path: &Path, rustc_version: &str) -> Result<String> {
2135    let mut hasher = Sha256::new();
2136    hasher.update(rustc_version.as_bytes());
2137    hasher.update(&[0u8]);
2138
2139    let mut files_to_hash: Vec<PathBuf> = Vec::new();
2140
2141    for entry in WalkDir::new(crate_path)
2142        .into_iter()
2143        .filter_entry(|e| filter_entry(e))
2144    {
2145        let entry = entry.context("walk crate directory")?;
2146        if !entry.file_type().is_file() {
2147            continue;
2148        }
2149
2150        if should_hash_file(entry.path()) {
2151            files_to_hash.push(entry.into_path());
2152        }
2153    }
2154
2155    files_to_hash.sort();
2156
2157    for path in files_to_hash {
2158        let rel = path.strip_prefix(crate_path).unwrap_or(&path);
2159        hasher.update(rel.to_string_lossy().as_bytes());
2160        hasher.update(&[0u8]);
2161        let contents =
2162            fs::read(&path).with_context(|| format!("read source file {}", path.display()))?;
2163        hasher.update(&contents);
2164        hasher.update(&[0u8]);
2165    }
2166
2167    Ok(hex::encode(hasher.finalize()))
2168}
2169
2170fn filter_entry(entry: &DirEntry) -> bool {
2171    if entry.depth() == 0 {
2172        return true;
2173    }
2174
2175    let name = entry.file_name().to_string_lossy();
2176    if entry.file_type().is_dir()
2177        && matches!(
2178            name.as_ref(),
2179            "target" | ".git" | ".cola-cache" | "out" | "node_modules"
2180        )
2181    {
2182        return false;
2183    }
2184    true
2185}
2186
2187fn should_hash_file(path: &Path) -> bool {
2188    if let Some(ext) = path.extension().and_then(OsStr::to_str) {
2189        if ext == "rs" || ext == "toml" || ext == "lock" {
2190            return true;
2191        }
2192    }
2193
2194    matches!(
2195        path.file_name().and_then(OsStr::to_str),
2196        Some("Cargo.toml") | Some("Cargo.lock")
2197    )
2198}
2199
2200fn detect_rustc_version() -> String {
2201    match Command::new("rustc").arg("--version").output() {
2202        Ok(output) if output.status.success() => String::from_utf8(output.stdout)
2203            .unwrap_or_else(|_| "rustc version: utf8 error".to_string())
2204            .trim()
2205            .to_string(),
2206        Ok(output) => format!("rustc version: status {}", output.status),
2207        Err(err) => format!("rustc version: error {err}"),
2208    }
2209}
2210
2211fn ensure_executable(path: PathBuf) -> Option<PathBuf> {
2212    if path.exists() {
2213        return Some(path);
2214    }
2215
2216    if cfg!(windows) && path.extension().is_none() {
2217        let mut candidate = path.clone();
2218        candidate.set_extension("exe");
2219        if candidate.exists() {
2220            return Some(candidate);
2221        }
2222    }
2223
2224    None
2225}
2226
2227pub(crate) fn detect_cargo_binary() -> Option<PathBuf> {
2228    if let Some(path) = std::env::var_os("CARGO").map(PathBuf::from) {
2229        if let Some(resolved) = ensure_executable(path) {
2230            return Some(resolved);
2231        }
2232    }
2233
2234    let rustup_home = std::env::var_os("RUSTUP_HOME")
2235        .map(PathBuf::from)
2236        .or_else(|| std::env::var_os("HOME").map(|home| PathBuf::from(home).join(".rustup")));
2237
2238    if let (Some(home), Some(toolchain)) = (rustup_home, std::env::var_os("RUSTUP_TOOLCHAIN")) {
2239        let candidate = PathBuf::from(&home)
2240            .join("toolchains")
2241            .join(toolchain)
2242            .join("bin")
2243            .join("cargo");
2244        if let Some(resolved) = ensure_executable(candidate) {
2245            return Some(resolved);
2246        }
2247    }
2248
2249    if let Some(home) = std::env::var_os("CARGO_HOME").map(PathBuf::from) {
2250        let candidate = home.join("bin").join("cargo");
2251        if let Some(resolved) = ensure_executable(candidate) {
2252            return Some(resolved);
2253        }
2254    }
2255
2256    if let Some(home) = std::env::var_os("HOME").map(PathBuf::from) {
2257        let candidate = home.join(".cargo").join("bin").join("cargo");
2258        if let Some(resolved) = ensure_executable(candidate) {
2259            return Some(resolved);
2260        }
2261    }
2262
2263    if Command::new("cargo").arg("--version").output().is_ok() {
2264        return Some(PathBuf::from("cargo"));
2265    }
2266
2267    None
2268}
2269
2270#[allow(dead_code)]
2271fn detect_rustup_path() -> Option<PathBuf> {
2272    if let Some(path) = std::env::var_os("RUSTUP").map(PathBuf::from) {
2273        if let Some(resolved) = ensure_executable(path) {
2274            return Some(resolved);
2275        }
2276    }
2277
2278    if let Some(home) = std::env::var_os("CARGO_HOME").map(PathBuf::from) {
2279        let candidate = home.join("bin").join("rustup");
2280        if let Some(resolved) = ensure_executable(candidate) {
2281            return Some(resolved);
2282        }
2283    }
2284
2285    if let Some(home) = std::env::var_os("HOME").map(PathBuf::from) {
2286        let candidate = home.join(".cargo").join("bin").join("rustup");
2287        if let Some(resolved) = ensure_executable(candidate) {
2288            return Some(resolved);
2289        }
2290    }
2291
2292    if Command::new("rustup").arg("--version").output().is_ok() {
2293        return Some(PathBuf::from("rustup"));
2294    }
2295
2296    None
2297}
2298
2299#[allow(dead_code)]
2300fn find_rust_toolchain_file() -> Option<PathBuf> {
2301    let mut candidates = Vec::new();
2302    if let Ok(dir) = std::env::var("CARGO_MANIFEST_DIR") {
2303        candidates.push(PathBuf::from(dir));
2304    }
2305    if let Ok(current) = std::env::current_dir() {
2306        candidates.push(current);
2307    }
2308
2309    for mut dir in candidates {
2310        loop {
2311            let toml_candidate = dir.join("rust-toolchain.toml");
2312            if toml_candidate.exists() {
2313                return Some(toml_candidate);
2314            }
2315
2316            let plain_candidate = dir.join("rust-toolchain");
2317            if plain_candidate.exists() {
2318                return Some(plain_candidate);
2319            }
2320
2321            if !dir.pop() {
2322                break;
2323            }
2324        }
2325    }
2326
2327    None
2328}
2329
2330#[allow(dead_code)]
2331fn detect_toolchain() -> String {
2332    if let Ok(toolchain) = std::env::var("RUSTUP_TOOLCHAIN") {
2333        if !toolchain.is_empty() {
2334            return toolchain;
2335        }
2336    }
2337
2338    if let Ok(toolchain) = std::env::var("RUST_TOOLCHAIN") {
2339        if !toolchain.is_empty() {
2340            return toolchain;
2341        }
2342    }
2343
2344    if let Some(path) = find_rust_toolchain_file() {
2345        if let Ok(contents) = fs::read_to_string(&path) {
2346            if path
2347                .extension()
2348                .and_then(|ext| ext.to_str())
2349                .map(|ext| ext.eq_ignore_ascii_case("toml"))
2350                .unwrap_or(false)
2351            {
2352                if let Ok(doc) = toml::from_str::<toml::Value>(&contents) {
2353                    if let Some(channel) = doc
2354                        .get("toolchain")
2355                        .and_then(|table| table.get("channel"))
2356                        .and_then(|val| val.as_str())
2357                    {
2358                        return channel.to_string();
2359                    }
2360                }
2361            } else {
2362                for line in contents.lines() {
2363                    let trimmed = line.trim();
2364                    if !trimmed.is_empty() && !trimmed.starts_with('#') {
2365                        return trimmed.to_string();
2366                    }
2367                }
2368            }
2369        }
2370    }
2371
2372    "nightly-2025-09-14".to_string()
2373}
2374
2375/// The nightly toolchain version required for MIR extraction
2376const REQUIRED_NIGHTLY_TOOLCHAIN: &str = "nightly-2025-09-14";
2377
2378fn build_cargo_command() -> Command {
2379    // Use cargo directly for metadata operations
2380    // Let the target project's toolchain handle basic cargo commands
2381    if let Some(cargo_path) = detect_cargo_binary() {
2382        Command::new(cargo_path)
2383    } else {
2384        Command::new("cargo")
2385    }
2386}
2387
2388/// Build a cargo command that forces nightly toolchain for MIR extraction.
2389/// MIR extraction requires -Zunpretty=mir which is a nightly-only feature.
2390fn build_cargo_nightly_command() -> Command {
2391    // Use `rustup run <toolchain> cargo` to invoke cargo with a specific toolchain.
2392    // This is more reliable than `cargo +toolchain` which only works when cargo
2393    // is invoked through rustup's cargo shim (not a direct path to cargo binary).
2394    let mut cmd = Command::new("rustup");
2395    cmd.arg("run");
2396    cmd.arg(REQUIRED_NIGHTLY_TOOLCHAIN);
2397    cmd.arg("cargo");
2398    cmd
2399}
2400
2401fn load_cargo_metadata(crate_path: &Path, no_deps: bool) -> Result<cargo_metadata::Metadata> {
2402    let canonical = fs::canonicalize(crate_path).unwrap_or_else(|_| crate_path.to_path_buf());
2403    let mut cmd = build_cargo_command();
2404    cmd.arg("metadata");
2405    cmd.args(["--format-version", "1"]);
2406    if no_deps {
2407        cmd.arg("--no-deps");
2408    }
2409
2410    let debug_metadata = std::env::var_os("RUST_COLA_DEBUG_METADATA").is_some();
2411
2412    if canonical.is_file() {
2413        if debug_metadata {
2414            eprintln!(
2415                "metadata canonical manifest {:?} (file?)",
2416                canonical.display()
2417            );
2418        }
2419        cmd.arg("--manifest-path");
2420        cmd.arg(&canonical);
2421    } else {
2422        let manifest_path = canonical.join("Cargo.toml");
2423        if debug_metadata {
2424            eprintln!(
2425                "metadata manifest candidate {:?} exists? {}",
2426                manifest_path.display(),
2427                manifest_path.exists()
2428            );
2429        }
2430        if manifest_path.exists() {
2431            cmd.arg("--manifest-path");
2432            cmd.arg(&manifest_path);
2433        } else {
2434            if debug_metadata {
2435                eprintln!(
2436                    "metadata falling back to current_dir {:?}",
2437                    canonical.display()
2438                );
2439            }
2440            cmd.current_dir(&canonical);
2441        }
2442    }
2443
2444    if debug_metadata {
2445        let program = cmd.get_program().to_owned();
2446        let args: Vec<String> = cmd
2447            .get_args()
2448            .map(|arg| arg.to_string_lossy().into_owned())
2449            .collect();
2450        eprintln!("cargo metadata command: {:?} {:?}", program, args);
2451    }
2452
2453    let output = cmd
2454        .output()
2455        .with_context(|| format!("run cargo metadata for {}", canonical.display()))?;
2456
2457    if !output.status.success() {
2458        let stderr = String::from_utf8_lossy(&output.stderr);
2459        return Err(anyhow!(
2460            "cargo metadata failed for {}: {}",
2461            canonical.display(),
2462            stderr.trim()
2463        ));
2464    }
2465
2466    serde_json::from_slice::<cargo_metadata::Metadata>(&output.stdout).with_context(|| {
2467        format!(
2468            "parse cargo metadata JSON produced for {}",
2469            canonical.display()
2470        )
2471    })
2472}
2473
2474fn current_timestamp() -> u64 {
2475    SystemTime::now()
2476        .duration_since(UNIX_EPOCH)
2477        .map(|d| d.as_secs())
2478        .unwrap_or(0)
2479}
2480
2481#[derive(Clone, Debug)]
2482pub(crate) enum RustcTarget {
2483    Lib,
2484    Bin(String),
2485}
2486
2487impl RustcTarget {
2488    fn description(&self) -> String {
2489        match self {
2490            RustcTarget::Lib => "--lib".to_string(),
2491            RustcTarget::Bin(name) => format!("--bin {name}"),
2492        }
2493    }
2494
2495    fn apply_to(&self, cmd: &mut Command) {
2496        match self {
2497            RustcTarget::Lib => {
2498                cmd.arg("--lib");
2499            }
2500            RustcTarget::Bin(name) => {
2501                cmd.args(["--bin", name]);
2502            }
2503        }
2504    }
2505}
2506
2507pub(crate) fn discover_rustc_targets(crate_path: &Path) -> Result<Vec<RustcTarget>> {
2508    if std::env::var_os("RUST_COLA_DEBUG_METADATA").is_some() {
2509        eprintln!(
2510            "discover_rustc_targets crate_path {:?}",
2511            crate_path.display()
2512        );
2513    }
2514    let manifest_path = crate_path.join("Cargo.toml");
2515    let metadata = load_cargo_metadata(crate_path, true)
2516        .with_context(|| format!("query cargo metadata for {}", crate_path.display()))?;
2517    let manifest_canonical = fs::canonicalize(&manifest_path).unwrap_or(manifest_path.clone());
2518    let package = if let Some(pkg) = metadata.root_package() {
2519        pkg.clone()
2520    } else {
2521        metadata
2522            .packages
2523            .iter()
2524            .find(|pkg| {
2525                fs::canonicalize(pkg.manifest_path.as_std_path())
2526                    .map(|path| path == manifest_canonical)
2527                    .unwrap_or_else(|_| {
2528                        pkg.manifest_path.as_std_path() == manifest_canonical.as_path()
2529                    })
2530            })
2531            .cloned()
2532            .ok_or_else(|| anyhow!("no package metadata found for {}", crate_path.display()))?
2533    };
2534
2535    let mut targets = Vec::new();
2536    let mut skipped: Vec<String> = Vec::new();
2537    for target in &package.targets {
2538        if !target.required_features.is_empty() {
2539            skipped.push(target.name.clone());
2540            continue;
2541        }
2542
2543        if target
2544            .kind
2545            .iter()
2546            .any(|kind| kind == "lib" || kind == "proc-macro")
2547        {
2548            targets.push(RustcTarget::Lib);
2549        }
2550
2551        if target.kind.iter().any(|kind| kind == "bin") {
2552            targets.push(RustcTarget::Bin(target.name.clone()));
2553        }
2554    }
2555
2556    if targets.is_empty() {
2557        if !skipped.is_empty() {
2558            return Err(anyhow!(
2559                "package {} has no lib or bin targets enabled without additional features (skipped targets: {})",
2560                package.name,
2561                skipped.join(", ")
2562            ));
2563        }
2564
2565        return Err(anyhow!(
2566            "package {} has no lib or bin targets; cannot extract MIR",
2567            package.name
2568        ));
2569    }
2570
2571    Ok(targets)
2572}
2573
2574fn run_cargo_rustc(crate_path: &Path, target: &RustcTarget) -> Result<String> {
2575    // Use nightly toolchain for MIR extraction (-Zunpretty=mir is nightly-only)
2576    let mut cmd = build_cargo_nightly_command();
2577    cmd.current_dir(crate_path);
2578    cmd.arg("rustc");
2579    target.apply_to(&mut cmd);
2580    cmd.args(["--", "-Zunpretty=mir", "-Zmir-include-spans"]);
2581
2582    let output = cmd
2583        .output()
2584        .with_context(|| format!("run `cargo rustc {}`", target.description()))?;
2585
2586    if !output.status.success() {
2587        return Err(anyhow!(
2588            "cargo rustc failed for {}: {}",
2589            target.description(),
2590            String::from_utf8_lossy(&output.stderr)
2591        ));
2592    }
2593
2594    let stdout =
2595        String::from_utf8(output.stdout).context("decode MIR output to UTF-8 for target")?;
2596    Ok(stdout)
2597}
2598
2599pub fn extract(crate_path: &Path) -> Result<MirPackage> {
2600    let canonical_crate_path = fs::canonicalize(crate_path).context("canonicalize crate path")?;
2601    let targets = discover_rustc_targets(&canonical_crate_path)?;
2602    let crate_root = canonical_crate_path
2603        .to_str()
2604        .ok_or_else(|| anyhow!("crate path is not valid UTF-8"))?
2605        .to_string();
2606
2607    let crate_name = detect_crate_name(&canonical_crate_path).unwrap_or_else(|| {
2608        canonical_crate_path
2609            .file_name()
2610            .and_then(|os| os.to_str())
2611            .unwrap_or("unknown")
2612            .to_string()
2613    });
2614
2615    let mut functions = Vec::new();
2616    let mut seen = HashSet::new();
2617
2618    for target in targets {
2619        let stdout = run_cargo_rustc(&canonical_crate_path, &target)?;
2620        for function in parse_mir_dump(&stdout) {
2621            if seen.insert((function.name.clone(), function.signature.clone())) {
2622                functions.push(function);
2623            }
2624        }
2625    }
2626
2627    Ok(MirPackage {
2628        crate_name,
2629        crate_root,
2630        functions,
2631    })
2632}
2633
2634fn extract_artifacts(
2635    crate_path: &Path,
2636    #[cfg(feature = "hir-driver")] hir_options: &HirOptions,
2637) -> Result<ExtractionArtifacts> {
2638    #[allow(unused_mut)]
2639    let mut mir = extract(crate_path)?;
2640
2641    #[cfg(feature = "hir-driver")]
2642    let hir = if hir_options.capture {
2643        match hir::capture_hir(crate_path) {
2644            Ok(hir) => Some(hir),
2645            Err(err) => {
2646                log_hir_capture_error(crate_path, &err);
2647                None
2648            }
2649        }
2650    } else {
2651        None
2652    };
2653
2654    #[cfg(feature = "hir-driver")]
2655    if let Some(hir_package) = &hir {
2656        attach_hir_metadata_to_mir(&mut mir, hir_package);
2657    }
2658
2659    Ok(ExtractionArtifacts {
2660        mir,
2661        #[cfg(feature = "hir-driver")]
2662        hir,
2663    })
2664}
2665
2666#[cfg(feature = "hir-driver")]
2667fn attach_hir_metadata_to_mir(mir: &mut MirPackage, hir: &HirPackage) {
2668    let mut metadata_by_path = HashMap::with_capacity(hir.functions.len());
2669    let mut metadata_by_simple_name: HashMap<String, Vec<String>> = HashMap::new();
2670    let mut span_by_path = HashMap::new();
2671
2672    for item in &hir.items {
2673        if let Some(span) = &item.span {
2674            span_by_path.insert(item.def_path.clone(), span.clone());
2675        }
2676    }
2677
2678    for body in &hir.functions {
2679        metadata_by_path.insert(
2680            body.def_path.clone(),
2681            MirFunctionHirMetadata {
2682                def_path_hash: body.def_path_hash.clone(),
2683                signature: if body.signature.is_empty() {
2684                    None
2685                } else {
2686                    Some(body.signature.clone())
2687                },
2688            },
2689        );
2690
2691        if let Some(simple) = body.def_path.rsplit("::").next() {
2692            metadata_by_simple_name
2693                .entry(simple.to_string())
2694                .or_default()
2695                .push(body.def_path.clone());
2696        }
2697    }
2698
2699    for function in &mut mir.functions {
2700        if function.hir.is_some() {
2701            continue;
2702        }
2703
2704        let mut matched_def_path = None;
2705
2706        if let Some(def_path) = extract_def_path_from_signature(&function.signature) {
2707            if let Some(meta) = metadata_by_path.remove(&def_path) {
2708                function.hir = Some(meta);
2709                matched_def_path = Some(def_path.clone());
2710            }
2711        }
2712
2713        if function.hir.is_none() {
2714            if let Some(candidates) = metadata_by_simple_name.get(function.name.as_str()) {
2715                if candidates.len() == 1 {
2716                    let def_path = candidates[0].clone();
2717                    if let Some(meta) = metadata_by_path.remove(&def_path) {
2718                        function.hir = Some(meta);
2719                        matched_def_path = Some(def_path.clone());
2720                    }
2721                }
2722            }
2723        }
2724
2725        if let Some(def_path) = matched_def_path {
2726            if let Some(span) = span_by_path.get(&def_path) {
2727                function.span = Some(span.clone());
2728            }
2729        }
2730    }
2731}
2732
2733#[cfg(feature = "hir-driver")]
2734fn log_hir_capture_error(crate_path: &Path, err: &anyhow::Error) {
2735    use crate::hir::{HirCaptureError, HirCaptureErrorKind};
2736
2737    if let Some(hir_err) = err.downcast_ref::<HirCaptureError>() {
2738        match hir_err.kind() {
2739            HirCaptureErrorKind::RustcIce => {
2740                eprintln!(
2741                    "{} for {} (status {:?})",
2742                    HIR_CAPTURE_ICE_LOG_PREFIX,
2743                    crate_path.display(),
2744                    hir_err.status()
2745                );
2746                let diagnostic = hir_err.primary_diagnostic();
2747                if !diagnostic.is_empty() {
2748                    eprintln!("rust-cola: rustc ICE diagnostic: {}", diagnostic);
2749                }
2750                emit_truncated_rustc_stderr(hir_err.stderr());
2751            }
2752            HirCaptureErrorKind::CommandFailed => {
2753                eprintln!(
2754                    "rust-cola: cargo rustc failed while capturing HIR for {} (status {:?}): {}",
2755                    crate_path.display(),
2756                    hir_err.status(),
2757                    hir_err.primary_diagnostic()
2758                );
2759                emit_truncated_rustc_stderr(hir_err.stderr());
2760            }
2761        }
2762    } else {
2763        eprintln!(
2764            "rust-cola: failed to capture HIR for {}: {err:?}",
2765            crate_path.display()
2766        );
2767    }
2768}
2769
2770#[cfg(feature = "hir-driver")]
2771fn emit_truncated_rustc_stderr(stderr: &str) {
2772    const MAX_LINES: usize = 20;
2773    if stderr.trim().is_empty() {
2774        return;
2775    }
2776
2777    let lines: Vec<&str> = stderr.lines().collect();
2778    let display_count = lines.len().min(MAX_LINES);
2779
2780    for (idx, line) in lines.iter().take(display_count).enumerate() {
2781        if line.trim().is_empty() {
2782            eprintln!("rust-cola: rustc stderr[{idx}]:");
2783        } else {
2784            eprintln!("rust-cola: rustc stderr[{idx}]: {}", line);
2785        }
2786    }
2787
2788    if lines.len() > MAX_LINES {
2789        eprintln!(
2790            "rust-cola: rustc stderr truncated to {MAX_LINES} lines ({} total lines, {} bytes).",
2791            lines.len(),
2792            stderr.len()
2793        );
2794        eprintln!("rust-cola: rerun with `RUST_BACKTRACE=1` for more detail.");
2795    }
2796}
2797
2798pub fn write_mir_json(path: impl AsRef<Path>, package: &MirPackage) -> Result<()> {
2799    if let Some(parent) = path.as_ref().parent() {
2800        fs::create_dir_all(parent).context("create parent directories for MIR JSON")?;
2801    }
2802    let mut file = File::create(path.as_ref()).context("create MIR JSON file")?;
2803    serde_json::to_writer_pretty(&mut file, package).context("serialize MIR package to JSON")?;
2804    file.write_all(b"\n").ok();
2805    Ok(())
2806}
2807
2808#[cfg(feature = "hir-driver")]
2809pub fn write_hir_json(path: impl AsRef<Path>, package: &HirPackage) -> Result<()> {
2810    if let Some(parent) = path.as_ref().parent() {
2811        fs::create_dir_all(parent).context("create parent directories for HIR JSON")?;
2812    }
2813    let mut file = File::create(path.as_ref()).context("create HIR JSON file")?;
2814    serde_json::to_writer_pretty(&mut file, package).context("serialize HIR package to JSON")?;
2815    file.write_all(b"\n").ok();
2816    Ok(())
2817}
2818
2819pub fn write_findings_json(path: impl AsRef<Path>, findings: &[Finding]) -> Result<()> {
2820    if let Some(parent) = path.as_ref().parent() {
2821        fs::create_dir_all(parent).context("create parent directories for findings JSON")?;
2822    }
2823    let mut file = File::create(path.as_ref()).context("create findings JSON file")?;
2824    serde_json::to_writer_pretty(&mut file, findings).context("serialize findings to JSON")?;
2825    file.write_all(b"\n").ok();
2826    Ok(())
2827}
2828
2829pub fn write_sarif_json(path: impl AsRef<Path>, sarif: &serde_json::Value) -> Result<()> {
2830    if let Some(parent) = path.as_ref().parent() {
2831        fs::create_dir_all(parent).context("create parent directories for SARIF JSON")?;
2832    }
2833    let mut file = File::create(path.as_ref()).context("create SARIF file")?;
2834    serde_json::to_writer_pretty(&mut file, sarif).context("serialize SARIF report")?;
2835    file.write_all(b"\n").ok();
2836    Ok(())
2837}
2838
2839pub fn analyze(package: &MirPackage) -> AnalysisResult {
2840    RuleEngine::with_builtin_rules().run(package)
2841}
2842
2843pub fn analyze_with_engine(engine: &RuleEngine, package: &MirPackage) -> AnalysisResult {
2844    engine.run(package)
2845}
2846
2847fn derive_relative_source_path(crate_name: &str, function_name: &str) -> Option<String> {
2848    let marker = " at ";
2849    let start = function_name.find(marker)? + marker.len();
2850    let rest = &function_name[start..];
2851    let end = rest.find("::")?;
2852    let location = &rest[..end];
2853    let path_part = location.split(':').next()?.trim();
2854    if path_part.is_empty() {
2855        return None;
2856    }
2857
2858    let mut normalized = path_part.replace('\\', "/");
2859    let prefix = format!("{}/", crate_name);
2860    if let Some(stripped) = normalized.strip_prefix(&prefix) {
2861        normalized = stripped.to_string();
2862    }
2863
2864    Some(normalized)
2865}
2866
2867fn file_uri_from_path(path: &Path) -> String {
2868    #[cfg(windows)]
2869    {
2870        let mut owned = path.to_string_lossy().into_owned();
2871
2872        if let Some(stripped) = owned.strip_prefix("\\\\?\\UNC\\") {
2873            let normalized = stripped.replace('\\', "/");
2874            return format!("file://{}", normalized);
2875        }
2876
2877        if let Some(stripped) = owned.strip_prefix("\\\\?\\") {
2878            owned = stripped.to_string();
2879        }
2880
2881        if let Some(stripped) = owned.strip_prefix("\\\\") {
2882            let normalized = stripped.replace('\\', "/");
2883            return format!("file://{}", normalized);
2884        }
2885
2886        let mut normalized = owned.replace('\\', "/");
2887        if !normalized.starts_with('/') {
2888            normalized.insert(0, '/');
2889        }
2890        format!("file://{}", normalized)
2891    }
2892    #[cfg(not(windows))]
2893    {
2894        format!("file://{}", path.to_string_lossy())
2895    }
2896}
2897
2898fn artifact_uri_for(package: &MirPackage, function_name: &str) -> String {
2899    let crate_root = PathBuf::from(&package.crate_root);
2900    if let Some(relative) = derive_relative_source_path(&package.crate_name, function_name) {
2901        let mut segments: Vec<&str> = relative
2902            .split('/')
2903            .filter(|segment| !segment.is_empty())
2904            .collect();
2905
2906        if let Some(first) = segments.first().copied() {
2907            let crate_dir = crate_root
2908                .file_name()
2909                .map(|os| os.to_string_lossy().to_string());
2910            let normalized_first = first.replace('_', "-").to_lowercase();
2911            let crate_name_normalized = package.crate_name.replace('_', "-").to_lowercase();
2912            let crate_dir_normalized = crate_dir
2913                .as_deref()
2914                .map(|dir| dir.replace('_', "-").to_lowercase());
2915
2916            let drop_first = crate_dir_normalized
2917                .as_ref()
2918                .map(|dir| dir == &normalized_first)
2919                .unwrap_or(false)
2920                || normalized_first == crate_name_normalized
2921                || normalized_first == package.crate_name.replace('-', "_").to_lowercase();
2922
2923            if drop_first {
2924                segments.remove(0);
2925            }
2926        }
2927
2928        let mut path = crate_root.clone();
2929        for segment in segments {
2930            path.push(segment);
2931        }
2932        return file_uri_from_path(&path);
2933    }
2934
2935    // Fallback: try to extract path from function name patterns like "src/lib.rs:15" or "build.rs:10"
2936    // This handles source-level rules that use location-style function names
2937    if function_name.contains(':') && !function_name.contains("::") {
2938        // Pattern: "file.rs:line" or "path/to/file.rs:line"
2939        if let Some(colon_pos) = function_name.rfind(':') {
2940            let path_part = &function_name[..colon_pos];
2941            if path_part.ends_with(".rs") {
2942                let file_path = crate_root.join(path_part);
2943                return file_uri_from_path(&file_path);
2944            }
2945        }
2946    }
2947
2948    // Final fallback: use src/lib.rs if it exists, otherwise src/main.rs
2949    // GitHub Code Scanning requires a file path, not a directory
2950    let lib_rs = crate_root.join("src/lib.rs");
2951    if lib_rs.exists() {
2952        return file_uri_from_path(&lib_rs);
2953    }
2954    let main_rs = crate_root.join("src/main.rs");
2955    if main_rs.exists() {
2956        return file_uri_from_path(&main_rs);
2957    }
2958
2959    // Last resort: append src/lib.rs even if it doesn't exist
2960    file_uri_from_path(&crate_root.join("src/lib.rs"))
2961}
2962
2963/// Extract a code snippet from a source file for SARIF output.
2964/// Returns the lines from start_line to end_line (1-indexed, inclusive).
2965/// Falls back to None if the file cannot be read.
2966fn extract_snippet(
2967    crate_root: &Path,
2968    file_path: &str,
2969    start_line: u32,
2970    end_line: u32,
2971) -> Option<String> {
2972    // Resolve file path - may be relative to crate root or absolute
2973    let path = if Path::new(file_path).is_absolute() {
2974        PathBuf::from(file_path)
2975    } else {
2976        crate_root.join(file_path)
2977    };
2978
2979    let content = fs::read_to_string(&path).ok()?;
2980    let lines: Vec<&str> = content.lines().collect();
2981
2982    // Convert to 0-indexed
2983    let start = (start_line.saturating_sub(1)) as usize;
2984    let end = end_line as usize;
2985
2986    if start >= lines.len() {
2987        return None;
2988    }
2989
2990    let end = end.min(lines.len());
2991    let snippet_lines: Vec<&str> = lines[start..end].to_vec();
2992
2993    if snippet_lines.is_empty() {
2994        None
2995    } else {
2996        Some(snippet_lines.join("\n"))
2997    }
2998}
2999
3000pub fn sarif_report(package: &MirPackage, analysis: &AnalysisResult) -> serde_json::Value {
3001    let crate_root = Path::new(&package.crate_root);
3002    let rule_index: HashMap<&str, &RuleMetadata> = analysis
3003        .rules
3004        .iter()
3005        .map(|meta| (meta.id.as_str(), meta))
3006        .collect();
3007
3008    let results: Vec<_> = analysis
3009        .findings
3010        .iter()
3011        .map(|finding| {
3012            let rule_meta = rule_index.get(finding.rule_id.as_str());
3013            let origin = rule_meta
3014                .map(|meta| meta.origin.label())
3015                .unwrap_or_else(|| "unknown".to_string());
3016
3017            let mut region = serde_json::Map::new();
3018            region.insert(
3019                "message".to_string(),
3020                json!({"text": finding.function_signature.clone()}),
3021            );
3022
3023            let artifact_uri = if let Some(span) = &finding.span {
3024                region.insert("startLine".to_string(), json!(span.start_line));
3025                region.insert("startColumn".to_string(), json!(span.start_column));
3026                region.insert("endLine".to_string(), json!(span.end_line));
3027                region.insert("endColumn".to_string(), json!(span.end_column));
3028
3029                // Extract code snippet for SARIF output
3030                if let Some(snippet_text) =
3031                    extract_snippet(crate_root, &span.file, span.start_line, span.end_line)
3032                {
3033                    region.insert("snippet".to_string(), json!({"text": snippet_text}));
3034                }
3035
3036                let path = Path::new(&span.file);
3037                file_uri_from_path(path)
3038            } else {
3039                artifact_uri_for(package, &finding.function)
3040            };
3041
3042            json!({
3043                "ruleId": finding.rule_id,
3044                "level": finding.severity.sarif_level(),
3045                "message": {"text": finding.message},
3046                "locations": [
3047                    {
3048                        "physicalLocation": {
3049                            "artifactLocation": {
3050                                "uri": artifact_uri,
3051                            },
3052                            "region": serde_json::Value::Object(region)
3053                        },
3054                        "logicalLocations": [
3055                            {
3056                                "fullyQualifiedName": finding.function,
3057                                "decoratedName": finding.function_signature,
3058                            }
3059                        ]
3060                    }
3061                ],
3062                "suppressions": if finding.code_context.is_non_production() {
3063                    json!([{
3064                        "kind": "inSource",
3065                        "status": "underReview",
3066                        "justification": finding.code_context.suppression_justification()
3067                    }])
3068                } else {
3069                    json!([])
3070                },
3071                "properties": {
3072                    "ruleName": finding.rule_name,
3073                    "origin": origin,
3074                    "evidence": finding.evidence,
3075                    "codeContext": finding.code_context.label(),
3076                    "filterReason": finding.filter_reason,
3077                }
3078            })
3079        })
3080        .collect();
3081
3082    let rules: Vec<_> = analysis
3083        .rules
3084        .iter()
3085        .map(|rule| {
3086            let mut value = json!({
3087                "id": rule.id,
3088                "name": rule.name,
3089                "shortDescription": {"text": rule.short_description},
3090                "fullDescription": {"text": rule.full_description},
3091                "helpUri": rule.help_uri,
3092                "defaultConfiguration": {
3093                    "level": rule.default_severity.sarif_level()
3094                },
3095                "properties": {
3096                    "origin": rule.origin.label()
3097                }
3098            });
3099
3100            if rule.help_uri.is_none() {
3101                if let Some(obj) = value.as_object_mut() {
3102                    obj.remove("helpUri");
3103                }
3104            }
3105
3106            value
3107        })
3108        .collect();
3109
3110    json!({
3111        "$schema": "https://json.schemastore.org/sarif-2.1.0.json",
3112        "version": "2.1.0",
3113        "runs": [
3114            {
3115                "tool": {
3116                    "driver": {
3117                        "name": "rust-cola",
3118                        "informationUri": "https://github.com/your-org/rust-cola",
3119                        "version": env!("CARGO_PKG_VERSION"),
3120                        "rules": rules,
3121                    }
3122                },
3123                "results": results,
3124                "invocations": [
3125                    {
3126                        "workingDirectory": {
3127                            "uri": file_uri_from_path(Path::new(&package.crate_root)),
3128                        },
3129                        "executionSuccessful": true
3130                    }
3131                ],
3132                "artifacts": [
3133                    {
3134                        "location": {
3135                            "uri": file_uri_from_path(Path::new(&package.crate_root))
3136                        },
3137                        "description": {
3138                            "text": format!("Crate {} analyzed via MIR", package.crate_name)
3139                        }
3140                    }
3141                ]
3142            }
3143        ]
3144    })
3145}
3146
3147fn parse_mir_dump(input: &str) -> Vec<MirFunction> {
3148    let mut functions = Vec::new();
3149    let mut current_signature: Option<String> = None;
3150    let mut current_body: Vec<String> = Vec::new();
3151
3152    for line in input.lines() {
3153        if line.trim_start().starts_with("fn ") {
3154            if let Some(sig) = current_signature.take() {
3155                functions.push(MirFunction::from_parts(
3156                    sig,
3157                    std::mem::take(&mut current_body),
3158                ));
3159            }
3160            current_signature = Some(line.trim().to_string());
3161        } else if current_signature.is_some() {
3162            current_body.push(line.to_string());
3163        }
3164    }
3165
3166    if let Some(sig) = current_signature {
3167        functions.push(MirFunction::from_parts(sig, current_body));
3168    }
3169
3170    functions
3171}
3172
3173impl MirFunction {
3174    fn from_parts(signature: String, mut body: Vec<String>) -> MirFunction {
3175        trim_trailing_blanks(&mut body);
3176        let name = extract_name(&signature).unwrap_or_else(|| "unknown".to_string());
3177        let span = extract_span(&signature);
3178        MirFunction {
3179            name,
3180            signature,
3181            body,
3182            span,
3183            hir: None,
3184        }
3185    }
3186}
3187
3188fn extract_name(signature: &str) -> Option<String> {
3189    let signature = signature.trim_start();
3190    signature
3191        .strip_prefix("fn ")
3192        .and_then(|rest| rest.split('(').next())
3193        .map(|s| s.trim().to_string())
3194}
3195
3196#[cfg_attr(not(feature = "hir-driver"), allow(dead_code))]
3197fn extract_def_path_from_signature(signature: &str) -> Option<String> {
3198    let trimmed = signature.trim_start();
3199    let idx = trimmed.find("fn ")? + 3;
3200    let after_fn = &trimmed[idx..];
3201    let before_location = after_fn
3202        .split_once(" at ")
3203        .map(|(path, _)| path)
3204        .unwrap_or(after_fn);
3205    let path = before_location.split('(').next()?.trim();
3206    if path.is_empty() {
3207        return None;
3208    }
3209    Some(path.to_string())
3210}
3211
3212pub fn extract_span_from_mir_line(line: &str) -> Option<SourceSpan> {
3213    // Example: ... // scope 0 at src/lib.rs:4:15: 4:35
3214    if let Some(idx) = line.rfind("// scope ") {
3215        let comment = &line[idx..];
3216        if let Some(at_idx) = comment.find(" at ") {
3217            let location = comment[at_idx + 4..].trim();
3218            // location is like "src/lib.rs:4:15: 4:35"
3219
3220            // Parse backwards: end_column, end_line, start_column, start_line, file
3221            // Format: file:start_line:start_column: end_line:end_column
3222
3223            if let Some((rest, end_column_str)) = location.rsplit_once(':') {
3224                if let Ok(end_column) = end_column_str.trim().parse::<u32>() {
3225                    if let Some((rest, end_line_str)) = rest.rsplit_once(':') {
3226                        if let Ok(end_line) = end_line_str.trim().parse::<u32>() {
3227                            if let Some((rest, start_column_str)) = rest.rsplit_once(':') {
3228                                if let Ok(start_column) = start_column_str.trim().parse::<u32>() {
3229                                    if let Some((file_path, start_line_str)) = rest.rsplit_once(':')
3230                                    {
3231                                        if let Ok(start_line) = start_line_str.trim().parse::<u32>()
3232                                        {
3233                                            return Some(SourceSpan {
3234                                                file: file_path.trim().replace('\\', "/"),
3235                                                start_line,
3236                                                start_column,
3237                                                end_line,
3238                                                end_column,
3239                                            });
3240                                        }
3241                                    }
3242                                }
3243                            }
3244                        }
3245                    }
3246                }
3247            }
3248        }
3249    }
3250    None
3251}
3252
3253fn extract_span(signature: &str) -> Option<SourceSpan> {
3254    const MARKER: &str = " at ";
3255    let marker_idx = signature.find(MARKER)? + MARKER.len();
3256    let after_marker = &signature[marker_idx..];
3257    let location_end = after_marker.find('>')?;
3258    let location = after_marker[..location_end].trim();
3259
3260    let (before_end_column, end_column_str) = location.rsplit_once(':')?;
3261    let end_column = end_column_str.trim().parse().ok()?;
3262
3263    let (before_end_line, end_line_str) = before_end_column.rsplit_once(':')?;
3264    let end_line = end_line_str.trim().parse().ok()?;
3265
3266    let (before_start_column, start_column_str) = before_end_line.rsplit_once(':')?;
3267    let start_column = start_column_str.trim().parse().ok()?;
3268
3269    let (path_str, start_line_str) = before_start_column.rsplit_once(':')?;
3270    let start_line = start_line_str.trim().parse().ok()?;
3271
3272    Some(SourceSpan {
3273        file: path_str.trim().replace('\\', "/"),
3274        start_line,
3275        start_column,
3276        end_line,
3277        end_column,
3278    })
3279}
3280
3281fn trim_trailing_blanks(lines: &mut Vec<String>) {
3282    while matches!(lines.last(), Some(last) if last.trim().is_empty()) {
3283        lines.pop();
3284    }
3285}
3286
3287pub(crate) fn detect_crate_name(crate_path: &Path) -> Option<String> {
3288    let canonical_crate = fs::canonicalize(crate_path)
3289        .ok()
3290        .unwrap_or_else(|| crate_path.to_path_buf());
3291    if std::env::var_os("RUST_COLA_DEBUG_METADATA").is_some() {
3292        eprintln!(
3293            "detect_crate_name crate_path {:?} canonical {:?}",
3294            crate_path.display(),
3295            canonical_crate.display()
3296        );
3297    }
3298    let manifest_path = if canonical_crate.is_file() {
3299        canonical_crate.clone()
3300    } else {
3301        canonical_crate.join("Cargo.toml")
3302    };
3303
3304    let canonical_manifest = fs::canonicalize(&manifest_path).ok();
3305
3306    let metadata = load_cargo_metadata(&canonical_crate, true).ok()?;
3307
3308    if let Some(target_manifest) = canonical_manifest {
3309        if let Some(pkg) = metadata.packages.iter().find(|pkg| {
3310            let pkg_manifest = pkg.manifest_path.clone().into_std_path_buf();
3311            fs::canonicalize(pkg_manifest)
3312                .ok()
3313                .map(|path| path == target_manifest)
3314                .unwrap_or(false)
3315        }) {
3316            return Some(pkg.name.clone());
3317        }
3318    }
3319
3320    metadata
3321        .packages
3322        .iter()
3323        .find(|pkg| {
3324            let parent = pkg
3325                .manifest_path
3326                .clone()
3327                .into_std_path_buf()
3328                .parent()
3329                .map(|p| p.to_path_buf());
3330            parent == Some(canonical_crate.clone())
3331        })
3332        .map(|pkg| pkg.name.clone())
3333        .or_else(|| metadata.root_package().map(|pkg| pkg.name.clone()))
3334        .or_else(|| metadata.packages.first().map(|pkg| pkg.name.clone()))
3335}
3336
3337#[cfg(test)]
3338mod tests {
3339    use super::*;
3340    use crate::rules::memory::{
3341        BoxIntoRawRule, MemForgetGuardRule, NonNullNewUncheckedRule, StaticMutGlobalRule,
3342    };
3343    use crate::rules::resource::{
3344        HardcodedHomePathRule, PermissionsSetReadonlyFalseRule, WorldWritableModeRule,
3345    };
3346    use crate::rules::utils::{strip_string_literals, StringLiteralState};
3347    use crate::rules::web::{NonHttpsUrlRule, OpensslVerifyNoneRule};
3348    use crate::rules::{CommandInjectionRiskRule, UntrustedEnvInputRule};
3349    use std::io::Cursor;
3350    use std::path::Path;
3351    use std::sync::atomic::{AtomicUsize, Ordering};
3352    use std::sync::Arc;
3353    use tempfile::tempdir;
3354
3355    const LENGTH_TRUNCATION_CAST_INTTOINT_SYMBOL: &str = concat!("Int", "To", "Int");
3356    const LENGTH_TRUNCATION_CAST_WRITE_SYMBOL: &str = concat!("write", "_u16");
3357    const UNBOUNDED_ALLOCATION_WITH_CAPACITY_SYMBOL: &str = concat!("with", "_capacity");
3358
3359    fn make_vec_set_len_line(indent: &str) -> String {
3360        let mut line = String::with_capacity(indent.len() + 48);
3361        line.push_str(indent);
3362        line.push_str("Vec::<i32>::");
3363        line.push_str("set_len");
3364        line.push_str("((*_1), const 4_usize);");
3365        line
3366    }
3367
3368    fn make_maybe_uninit_assume_init_line(indent: &str) -> String {
3369        let mut line = String::with_capacity(indent.len() + 66);
3370        line.push_str(indent);
3371        line.push_str("_7 = core::mem::");
3372        line.push_str(MAYBE_UNINIT_TYPE_SYMBOL);
3373        line.push_str("::<i32>::");
3374        line.push_str(MAYBE_UNINIT_ASSUME_INIT_SYMBOL);
3375        line.push_str("(move _6);");
3376        line
3377    }
3378
3379    fn make_mem_uninitialized_line(indent: &str) -> String {
3380        let mut line = String::with_capacity(indent.len() + 48);
3381        line.push_str(indent);
3382        line.push_str("_8 = std::");
3383        line.push_str(MEM_MODULE_SYMBOL);
3384        line.push_str("::");
3385        line.push_str(MEM_UNINITIALIZED_SYMBOL);
3386        line.push_str("::<i32>();");
3387        line
3388    }
3389
3390    fn make_length_truncation_cast_lines(indent: &str) -> Vec<String> {
3391        let mut lines = Vec::with_capacity(4);
3392
3393        let mut line = String::with_capacity(indent.len() + 24);
3394        line.push_str(indent);
3395        line.push_str("debug payload_len => _1;");
3396        lines.push(line);
3397
3398        let mut line = String::with_capacity(indent.len() + 16);
3399        line.push_str(indent);
3400        line.push_str("_2 = copy _1;");
3401        lines.push(line);
3402
3403        let mut line = String::with_capacity(indent.len() + 36);
3404        line.push_str(indent);
3405        line.push_str("_3 = move _2 as i32 (");
3406        line.push_str(LENGTH_TRUNCATION_CAST_INTTOINT_SYMBOL);
3407        line.push_str(");");
3408        lines.push(line);
3409
3410        let mut line = String::with_capacity(indent.len() + 88);
3411        line.push_str(indent);
3412        line.push_str("_4 = byteorder::WriteBytesExt::");
3413        line.push_str(LENGTH_TRUNCATION_CAST_WRITE_SYMBOL);
3414        line.push_str("::<byteorder::BigEndian>(move _0, move _3);");
3415        lines.push(line);
3416
3417        lines
3418    }
3419
3420    fn make_unbounded_allocation_lines(indent: &str, debug_ident: &str) -> Vec<String> {
3421        let mut lines = Vec::with_capacity(3);
3422
3423        let mut debug_line = String::with_capacity(indent.len() + debug_ident.len() + 16);
3424        debug_line.push_str(indent);
3425        debug_line.push_str("debug ");
3426        debug_line.push_str(debug_ident);
3427        debug_line.push_str(" => _1;");
3428        lines.push(debug_line);
3429
3430        let mut copy_line = String::with_capacity(indent.len() + 16);
3431        copy_line.push_str(indent);
3432        copy_line.push_str("_2 = copy _1;");
3433        lines.push(copy_line);
3434
3435        let mut alloc_line = String::with_capacity(indent.len() + 64);
3436        alloc_line.push_str(indent);
3437        alloc_line.push_str("_3 = Vec::<u8>::");
3438        alloc_line.push_str(UNBOUNDED_ALLOCATION_WITH_CAPACITY_SYMBOL);
3439        alloc_line.push_str("(move _2);");
3440        lines.push(alloc_line);
3441
3442        lines
3443    }
3444
3445    fn make_danger_accept_invalid_certs_line(indent: &str) -> String {
3446        let mut line = String::with_capacity(indent.len() + 86);
3447        line.push_str(indent);
3448        line.push_str("_10 = reqwest::ClientBuilder::");
3449        line.push_str(DANGER_ACCEPT_INVALID_CERTS_SYMBOL);
3450        line.push_str("(move _1, const true);");
3451        line
3452    }
3453
3454    #[test]
3455    fn extracts_def_path_from_signature_examples() {
3456        assert_eq!(
3457            super::extract_def_path_from_signature("fn crate::module::demo(_1: i32)").as_deref(),
3458            Some("crate::module::demo")
3459        );
3460
3461        assert_eq!(
3462            super::extract_def_path_from_signature(
3463                "unsafe extern \"C\" fn foo::bar::baz(_1: i32) -> i32",
3464            )
3465            .as_deref(),
3466            Some("foo::bar::baz")
3467        );
3468
3469        assert_eq!(
3470            super::extract_def_path_from_signature("no function signature here"),
3471            None
3472        );
3473    }
3474
3475    #[test]
3476    fn parse_extracts_functions() {
3477        let input = r#"
3478fn foo() -> () {
3479    bb0: {
3480        _0 = ();
3481        return;
3482    }
3483}
3484
3485fn bar(_1: i32) -> i32 {
3486    bb0: {
3487        _0 = _1;
3488        return;
3489    }
3490}
3491"#;
3492
3493        let functions = parse_mir_dump(input);
3494        assert_eq!(functions.len(), 2);
3495        assert_eq!(functions[0].name, "foo");
3496        assert_eq!(functions[1].name, "bar");
3497    }
3498
3499    #[test]
3500    fn parse_extracts_function_spans() {
3501        let input = r#"
3502fn <impl at C:\\workspace\\demo\\src\\lib.rs:42:5: 42:27>::vec_set_len(_1: &mut Vec<u8>) -> () {
3503    bb0: {
3504        _0 = ();
3505        return;
3506    }
3507}
3508"#;
3509
3510        let functions = parse_mir_dump(input);
3511        assert_eq!(functions.len(), 1);
3512        let span = functions[0].span.as_ref().expect("missing span");
3513        let normalized_path = span
3514            .file
3515            .split(|c| c == '/' || c == '\\')
3516            .filter(|segment| !segment.is_empty())
3517            .collect::<Vec<_>>()
3518            .join("/");
3519        assert!(
3520            normalized_path.ends_with("workspace/demo/src/lib.rs"),
3521            "unexpected file: {}",
3522            span.file
3523        );
3524        assert_eq!(span.start_line, 42);
3525        assert_eq!(span.start_column, 5);
3526        assert_eq!(span.end_line, 42);
3527        assert_eq!(span.end_column, 27);
3528    }
3529
3530    #[test]
3531    fn rule_finding_carries_function_span() {
3532        let input = r#"
3533fn <impl at C:\\workspace\\demo\\src\\lib.rs:40:1: 40:32>::vec_set_len(_1: &mut Vec<u8>) -> () {
3534    bb0: {
3535        _2 = Vec::<u8>::set_len(move _1, const 4_usize);
3536        return;
3537    }
3538}
3539"#;
3540
3541        let functions = parse_mir_dump(input);
3542        assert_eq!(functions.len(), 1);
3543
3544        let package = MirPackage {
3545            crate_name: "demo".to_string(),
3546            crate_root: "C:/workspace/demo".to_string(),
3547            functions,
3548        };
3549
3550        let engine = RuleEngine::with_builtin_rules();
3551        let analysis = engine.run(&package);
3552        let finding = analysis
3553            .findings
3554            .iter()
3555            .find(|finding| finding.rule_id == "RUSTCOLA008")
3556            .expect("vec-set-len finding not emitted");
3557
3558        let span = finding.span.as_ref().expect("finding missing span");
3559        let normalized_path = span
3560            .file
3561            .split(|c| c == '/' || c == '\\')
3562            .filter(|segment| !segment.is_empty())
3563            .collect::<Vec<_>>()
3564            .join("/");
3565        assert!(
3566            normalized_path.ends_with("workspace/demo/src/lib.rs"),
3567            "unexpected file: {}",
3568            span.file
3569        );
3570        assert_eq!(span.start_line, 40);
3571        assert_eq!(span.start_column, 1);
3572        assert_eq!(span.end_line, 40);
3573        assert_eq!(span.end_column, 32);
3574    }
3575
3576    #[test]
3577    fn sarif_report_includes_span_region() {
3578        let span = SourceSpan {
3579            file: "/workspace/demo/src/lib.rs".to_string(),
3580            start_line: 12,
3581            start_column: 5,
3582            end_line: 12,
3583            end_column: 18,
3584        };
3585
3586        let package = MirPackage {
3587            crate_name: "demo".to_string(),
3588            crate_root: "/workspace/demo".to_string(),
3589            functions: Vec::new(),
3590        };
3591
3592        let rule = RuleMetadata {
3593            id: "TEST001".to_string(),
3594            name: "demo-rule".to_string(),
3595            short_description: "demo description".to_string(),
3596            full_description: "demo full description".to_string(),
3597            help_uri: None,
3598            default_severity: Severity::Medium,
3599            origin: RuleOrigin::BuiltIn,
3600            cwe_ids: Vec::new(),
3601            fix_suggestion: None,
3602            exploitability: Exploitability::default(),
3603        };
3604
3605        let exploitability = Exploitability::default();
3606        let finding = Finding {
3607            rule_id: rule.id.clone(),
3608            rule_name: rule.name.clone(),
3609            severity: rule.default_severity,
3610            confidence: Confidence::High,
3611            message: "Something happened".to_string(),
3612            function: "demo::example".to_string(),
3613            function_signature: "fn demo::example()".to_string(),
3614            evidence: vec![],
3615            span: Some(span.clone()),
3616            cwe_ids: Vec::new(),
3617            fix_suggestion: None,
3618            code_snippet: None,
3619            exploitability_score: exploitability.score(),
3620            exploitability,
3621            ..Default::default()
3622        };
3623
3624        let analysis = AnalysisResult {
3625            findings: vec![finding],
3626            rules: vec![rule],
3627        };
3628
3629        let sarif = sarif_report(&package, &analysis);
3630        let result = &sarif["runs"][0]["results"][0];
3631        let region = &result["locations"][0]["physicalLocation"]["region"];
3632
3633        assert_eq!(region["startLine"], json!(span.start_line));
3634        assert_eq!(region["startColumn"], json!(span.start_column));
3635        assert_eq!(region["endLine"], json!(span.end_line));
3636        assert_eq!(region["endColumn"], json!(span.end_column));
3637
3638        let artifact_uri = result["locations"][0]["physicalLocation"]["artifactLocation"]["uri"]
3639            .as_str()
3640            .expect("uri missing");
3641        let expected_uri = file_uri_from_path(Path::new(&span.file));
3642        assert_eq!(artifact_uri, expected_uri);
3643    }
3644
3645    #[test]
3646    fn sarif_report_includes_code_snippet() {
3647        use std::io::Write;
3648
3649        // Create a temp directory with a source file
3650        let temp_dir = tempfile::tempdir().expect("create temp dir");
3651        let src_dir = temp_dir.path().join("src");
3652        std::fs::create_dir_all(&src_dir).expect("create src dir");
3653
3654        let source_code = r#"fn main() {
3655    let password = "secret123";
3656    println!("{}", password);
3657}
3658"#;
3659        let lib_rs = src_dir.join("lib.rs");
3660        let mut file = std::fs::File::create(&lib_rs).expect("create lib.rs");
3661        file.write_all(source_code.as_bytes())
3662            .expect("write source");
3663
3664        let span = SourceSpan {
3665            file: "src/lib.rs".to_string(),
3666            start_line: 2,
3667            start_column: 5,
3668            end_line: 2,
3669            end_column: 30,
3670        };
3671
3672        let package = MirPackage {
3673            crate_name: "demo".to_string(),
3674            crate_root: temp_dir.path().to_string_lossy().to_string(),
3675            functions: Vec::new(),
3676        };
3677
3678        let rule = RuleMetadata {
3679            id: "TEST002".to_string(),
3680            name: "hardcoded-secret".to_string(),
3681            short_description: "Hardcoded secret".to_string(),
3682            full_description: "Hardcoded secret detected".to_string(),
3683            help_uri: None,
3684            default_severity: Severity::High,
3685            origin: RuleOrigin::BuiltIn,
3686            cwe_ids: Vec::new(),
3687            fix_suggestion: None,
3688            exploitability: Exploitability::default(),
3689        };
3690
3691        let exploitability = Exploitability::default();
3692        let finding = Finding {
3693            rule_id: rule.id.clone(),
3694            rule_name: rule.name.clone(),
3695            severity: rule.default_severity,
3696            confidence: Confidence::High,
3697            message: "Hardcoded password detected".to_string(),
3698            function: "demo::main".to_string(),
3699            function_signature: "fn demo::main()".to_string(),
3700            evidence: vec![],
3701            span: Some(span),
3702            cwe_ids: Vec::new(),
3703            fix_suggestion: None,
3704            code_snippet: None,
3705            exploitability_score: exploitability.score(),
3706            exploitability,
3707            ..Default::default()
3708        };
3709
3710        let analysis = AnalysisResult {
3711            findings: vec![finding],
3712            rules: vec![rule],
3713        };
3714
3715        let sarif = sarif_report(&package, &analysis);
3716        let result = &sarif["runs"][0]["results"][0];
3717        let region = &result["locations"][0]["physicalLocation"]["region"];
3718
3719        // Verify snippet is included
3720        let snippet = &region["snippet"]["text"];
3721        assert!(snippet.is_string(), "snippet.text should be a string");
3722        let snippet_text = snippet.as_str().unwrap();
3723        assert!(
3724            snippet_text.contains("password"),
3725            "snippet should contain the code: {}",
3726            snippet_text
3727        );
3728    }
3729
3730    #[cfg(windows)]
3731    #[test]
3732    fn file_uri_from_path_strips_extended_prefix() {
3733        let uri =
3734            super::file_uri_from_path(Path::new(r"\\?\C:\workspace\mir-extractor\src\lib.rs"));
3735        assert!(
3736            uri.starts_with("file:///C:/workspace/mir-extractor/src/lib.rs"),
3737            "unexpected uri: {uri}"
3738        );
3739        assert!(!uri.contains("//?/"), "extended prefix remained: {uri}");
3740    }
3741
3742    #[cfg(windows)]
3743    #[test]
3744    fn artifact_uri_for_avoids_duplicate_crate_folder() {
3745        let package = MirPackage {
3746            crate_name: "mir-extractor".to_string(),
3747            crate_root: r"\\?\C:\workspace\mir-extractor".to_string(),
3748            functions: Vec::new(),
3749        };
3750
3751        let uri = super::artifact_uri_for(
3752            &package,
3753            "fn <impl at mir-extractor\\src\\lib.rs:10:1: 10:2>::example()",
3754        );
3755
3756        assert!(
3757            uri.starts_with("file:///C:/workspace/mir-extractor/src/lib.rs"),
3758            "unexpected uri: {uri}"
3759        );
3760        assert!(
3761            !uri.contains("mir-extractor/mir-extractor"),
3762            "duplicate crate segment detected: {uri}"
3763        );
3764    }
3765
3766    #[test]
3767    fn rulepack_matches_body_contains() {
3768        let yaml = r#"
3769rules:
3770  - id: ORG001
3771    name: no-into-raw
3772    severity: high
3773    message: Detected into_raw usage
3774    body_contains_any:
3775      - "into_raw"
3776"#;
3777
3778        let mut engine = RuleEngine::with_builtin_rules();
3779        engine
3780            .load_rulepack_from_reader(Cursor::new(yaml), "inline")
3781            .expect("load inline rulepack");
3782
3783        let package = MirPackage {
3784            crate_name: "demo".to_string(),
3785            crate_root: ".".to_string(),
3786            functions: vec![MirFunction {
3787                name: "ffi_create".to_string(),
3788                signature: "fn ffi_create()".to_string(),
3789                body: vec!["_0 = Box::into_raw(move _1);".to_string()],
3790                span: None,
3791                ..Default::default()
3792            }],
3793        };
3794
3795        let analysis = engine.run(&package);
3796        assert!(analysis
3797            .findings
3798            .iter()
3799            .any(|f| f.rule_id == "ORG001" && f.severity == Severity::High));
3800        assert!(analysis.rules.iter().any(|meta| meta.id == "ORG001"));
3801    }
3802
3803    #[test]
3804    fn builtin_security_rules_fire() {
3805        let engine = RuleEngine::with_builtin_rules();
3806        let package = MirPackage {
3807            crate_name: "security".to_string(),
3808            crate_root: ".".to_string(),
3809            functions: vec![
3810                MirFunction {
3811                    name: "unsafe_helper".to_string(),
3812                    signature: "unsafe fn unsafe_helper()".to_string(),
3813                    body: vec!["unsafe { core::ptr::read(_1); }".to_string()],
3814                    span: None,
3815                ..Default::default()
3816                },
3817                MirFunction {
3818                    name: "md5_hash".to_string(),
3819                    signature: "fn md5_hash()".to_string(),
3820                    body: vec!["_2 = md5::Md5::new();".to_string()],
3821                    span: None,
3822                ..Default::default()
3823                },
3824                MirFunction {
3825                    name: "sha1_hash".to_string(),
3826                    signature: "fn sha1_hash()".to_string(),
3827                    body: vec!["_3 = sha1::Sha1::new();".to_string()],
3828                    span: None,
3829                ..Default::default()
3830                },
3831                MirFunction {
3832                    name: "env_usage".to_string(),
3833                    signature: "fn env_usage()".to_string(),
3834                    // Complete taint flow: env::var (source) -> Command::arg (sink)
3835                    body: vec![
3836                        "_4 = std::env::var(move _1) -> [return: bb1, unwind: bb2];".to_string(),
3837                        "_5 = Command::arg::<&str>(move _6, move _4) -> [return: bb3, unwind: bb4];".to_string(),
3838                    ],
3839                    span: None,
3840                ..Default::default()
3841                },
3842                MirFunction {
3843                    name: "command_spawn".to_string(),
3844                    signature: "fn command_spawn()".to_string(),
3845                    body: vec!["_5 = std::process::Command::new(_1);".to_string()],
3846                    span: None,
3847                ..Default::default()
3848                },
3849                MirFunction {
3850                    name: "vec_set_len".to_string(),
3851                    signature: "fn vec_set_len(v: &mut Vec<i32>)".to_string(),
3852                    body: vec![make_vec_set_len_line("")],
3853                    span: None,
3854                ..Default::default()
3855                },
3856                MirFunction {
3857                    name: "maybe_uninit".to_string(),
3858                    signature: "fn maybe_uninit()".to_string(),
3859                    body: vec![make_maybe_uninit_assume_init_line("")],
3860                    span: None,
3861                ..Default::default()
3862                },
3863                MirFunction {
3864                    name: "deprecated_mem".to_string(),
3865                    signature: "fn deprecated_mem()".to_string(),
3866                    body: vec![make_mem_uninitialized_line("")],
3867                    span: None,
3868                ..Default::default()
3869                },
3870                MirFunction {
3871                    name: "http_url".to_string(),
3872                    signature: "fn http_url()".to_string(),
3873                    body: vec!["_9 = const \"http://example.com\";".to_string()],
3874                    span: None,
3875                ..Default::default()
3876                },
3877                MirFunction {
3878                    name: "dangerous_tls".to_string(),
3879                    signature: "fn dangerous_tls(builder: reqwest::ClientBuilder)".to_string(),
3880                    body: vec![make_danger_accept_invalid_certs_line("")],
3881                    span: None,
3882                ..Default::default()
3883                },
3884                MirFunction {
3885                    name: "openssl_none".to_string(),
3886                    signature: "fn openssl_none(ctx: &mut SslContextBuilder)".to_string(),
3887                    body: vec!["openssl::ssl::SslContextBuilder::set_verify((*_1), openssl::ssl::SslVerifyMode::NONE);".to_string()],
3888                    span: None,
3889                ..Default::default()
3890                },
3891                MirFunction {
3892                    name: "home_path_literal".to_string(),
3893                    signature: "fn home_path_literal()".to_string(),
3894                    body: vec!["_11 = const \"/home/alice/.ssh/id_rsa\";".to_string()],
3895                    span: None,
3896                ..Default::default()
3897                },
3898                MirFunction {
3899                    name: "static_mut_global".to_string(),
3900                    signature: "fn static_mut_global()".to_string(),
3901                    body: vec!["    static mut GLOBAL: i32 = 0;".to_string()],
3902                    span: None,
3903                ..Default::default()
3904                },
3905                MirFunction {
3906                    name: "set_readonly_false".to_string(),
3907                    signature: "fn set_readonly_false(perm: &mut std::fs::Permissions)".to_string(),
3908                    body: vec!["    std::fs::Permissions::set_readonly(move _1, const false);".to_string()],
3909                    span: None,
3910                ..Default::default()
3911                },
3912                MirFunction {
3913                    name: "world_writable_mode".to_string(),
3914                    signature: "fn world_writable_mode(opts: &mut std::fs::OpenOptions)".to_string(),
3915                    body: vec!["    std::os::unix::fs::OpenOptionsExt::mode(move _1, const 0o777);".to_string()],
3916                    span: None,
3917                ..Default::default()
3918                },
3919                MirFunction {
3920                    name: "forget_guard".to_string(),
3921                    signature: "fn forget_guard(mutex: &std::sync::Mutex<i32>)".to_string(),
3922                    body: vec![
3923                        "    _1 = std::sync::Mutex::lock(move _0) -> [return: bb1, unwind: bb2];".to_string(),
3924                        "    _2 = core::result::Result::<std::sync::MutexGuard<'_, i32>, _>::unwrap(move _1);".to_string(),
3925                        "    std::mem::forget(move _2);".to_string(),
3926                    ],
3927                    span: None,
3928                ..Default::default()
3929                },
3930                MirFunction {
3931                    name: "nonnull_unchecked".to_string(),
3932                    signature: "fn nonnull_unchecked(ptr: *mut u8)".to_string(),
3933                    body: vec!["    _0 = core::ptr::NonNull::<u8>::new_unchecked(_1);".to_string()],
3934                    span: None,
3935                ..Default::default()
3936                },
3937                MirFunction {
3938                    name: "content_length_allocation".to_string(),
3939                    signature: "fn content_length_allocation(resp: reqwest::Response)".to_string(),
3940                    body: vec![
3941                        "    _1 = reqwest::Response::content_length(move _0);".to_string(),
3942                        "    _2 = copy _1;".to_string(),
3943                        "    _3 = Vec::<u8>::with_capacity(move _2);".to_string(),
3944                    ],
3945                    span: None,
3946                ..Default::default()
3947                },
3948                MirFunction {
3949                    name: "length_truncation_cast".to_string(),
3950                    signature: "fn length_truncation_cast(len: usize)".to_string(),
3951                    body: {
3952                        let mut body = Vec::with_capacity(6);
3953                        body.push("fn length_truncation_cast(len: usize) {".to_string());
3954                        body.extend(make_length_truncation_cast_lines("    "));
3955                        body.push("}".to_string());
3956                        body
3957                    },
3958                    span: None,
3959                ..Default::default()
3960                },
3961                MirFunction {
3962                    name: "unbounded_allocation".to_string(),
3963                    signature: "fn unbounded_allocation(len: usize)".to_string(),
3964                    body: make_unbounded_allocation_lines("    ", "len"),
3965                    span: None,
3966                ..Default::default()
3967                },
3968                MirFunction {
3969                    name: "broadcast_unsync".to_string(),
3970                    signature: "fn broadcast_unsync()".to_string(),
3971                    body: vec![
3972                        "    _5 = tokio::sync::broadcast::channel::<std::rc::Rc<String>>(const 16_usize);".to_string(),
3973                    ],
3974                    span: None,
3975                ..Default::default()
3976                },
3977            ],
3978        };
3979
3980        let analysis = engine.run(&package);
3981
3982        let triggered: Vec<_> = analysis
3983            .findings
3984            .iter()
3985            .map(|f| f.rule_id.as_str())
3986            .collect();
3987        assert!(
3988            triggered.contains(&"RUSTCOLA003"),
3989            "expected unsafe rule to fire"
3990        );
3991        assert!(
3992            triggered.contains(&"RUSTCOLA004"),
3993            "expected md5 rule to fire"
3994        );
3995        assert!(
3996            triggered.contains(&"RUSTCOLA005"),
3997            "expected sha1 rule to fire"
3998        );
3999        assert!(
4000            triggered.contains(&"RUSTCOLA006"),
4001            "expected env rule to fire"
4002        );
4003        assert!(
4004            triggered.contains(&"RUSTCOLA007"),
4005            "expected command rule to fire"
4006        );
4007        assert!(
4008            triggered.contains(&"RUSTCOLA008"),
4009            "expected {} rule to fire",
4010            VEC_SET_LEN_SYMBOL
4011        );
4012        assert!(
4013            triggered.contains(&"RUSTCOLA009"),
4014            "expected MaybeUninit rule to fire"
4015        );
4016        assert!(
4017            triggered.contains(&"RUSTCOLA010"),
4018            "expected {}::{} rule to fire",
4019            MEM_MODULE_SYMBOL,
4020            MEM_UNINITIALIZED_SYMBOL
4021        );
4022        assert!(
4023            triggered.contains(&"RUSTCOLA011"),
4024            "expected non-https rule to fire"
4025        );
4026        assert!(
4027            triggered.contains(&"RUSTCOLA012"),
4028            "expected {} rule to fire",
4029            DANGER_ACCEPT_INVALID_CERTS_SYMBOL
4030        );
4031        assert!(
4032            triggered.contains(&"RUSTCOLA013"),
4033            "expected openssl verify none rule to fire"
4034        );
4035        assert!(
4036            triggered.contains(&"RUSTCOLA014"),
4037            "expected hardcoded home path rule to fire"
4038        );
4039        assert!(
4040            triggered.contains(&"RUSTCOLA025"),
4041            "expected static mut global rule to fire"
4042        );
4043        assert!(
4044            triggered.contains(&"RUSTCOLA073"),
4045            "expected NonNull::new_unchecked rule to fire"
4046        );
4047        assert!(
4048            triggered.contains(&"RUSTCOLA078"),
4049            "expected mem::forget guard rule to fire"
4050        );
4051        assert!(
4052            triggered.contains(&"RUSTCOLA028"),
4053            "expected set_readonly(false) rule to fire"
4054        );
4055        assert!(
4056            triggered.contains(&"RUSTCOLA029"),
4057            "expected world-writable mode rule to fire"
4058        );
4059        assert!(
4060            triggered.contains(&"RUSTCOLA021"),
4061            "expected content-length allocation rule to fire"
4062        );
4063        assert!(
4064            triggered.contains(&"RUSTCOLA022"),
4065            "expected length truncation cast rule to fire"
4066        );
4067        assert!(
4068            triggered.contains(&"RUSTCOLA024"),
4069            "expected general unbounded allocation rule to fire"
4070        );
4071        assert!(
4072            triggered.contains(&"RUSTCOLA023"),
4073            "expected broadcast unsync payload rule to fire"
4074        );
4075        let content_length_finding = analysis
4076            .findings
4077            .iter()
4078            .find(|f| f.rule_id == "RUSTCOLA021")
4079            .expect("content-length finding present");
4080        assert!(content_length_finding
4081            .evidence
4082            .iter()
4083            .any(|line| line.contains("with_capacity")));
4084
4085        let truncation_finding = analysis
4086            .findings
4087            .iter()
4088            .find(|f| f.rule_id == "RUSTCOLA022")
4089            .expect("length truncation finding present");
4090        assert!(truncation_finding
4091            .evidence
4092            .iter()
4093            .any(|line| line.contains(LENGTH_TRUNCATION_CAST_INTTOINT_SYMBOL)));
4094        assert!(truncation_finding
4095            .evidence
4096            .iter()
4097            .any(|line| line.contains(LENGTH_TRUNCATION_CAST_WRITE_SYMBOL)));
4098
4099        let broadcast_finding = analysis
4100            .findings
4101            .iter()
4102            .find(|f| f.rule_id == "RUSTCOLA023")
4103            .expect("broadcast finding present");
4104        assert!(broadcast_finding
4105            .evidence
4106            .iter()
4107            .any(|line| line.contains("broadcast::channel")));
4108
4109        let unbounded_finding = analysis
4110            .findings
4111            .iter()
4112            .find(|f| f.rule_id == "RUSTCOLA024")
4113            .expect("unbounded allocation finding present");
4114        assert!(unbounded_finding
4115            .evidence
4116            .iter()
4117            .any(|line| line.contains(UNBOUNDED_ALLOCATION_WITH_CAPACITY_SYMBOL)));
4118
4119        for id in &[
4120            "RUSTCOLA003",
4121            "RUSTCOLA004",
4122            "RUSTCOLA005",
4123            "RUSTCOLA006",
4124            "RUSTCOLA007",
4125            "RUSTCOLA008",
4126            "RUSTCOLA009",
4127            "RUSTCOLA010",
4128            "RUSTCOLA011",
4129            "RUSTCOLA012",
4130            "RUSTCOLA013",
4131            "RUSTCOLA014",
4132            "RUSTCOLA025",
4133            "RUSTCOLA073",
4134            "RUSTCOLA078",
4135            "RUSTCOLA028",
4136            "RUSTCOLA029",
4137            "RUSTCOLA021",
4138            "RUSTCOLA022",
4139            "RUSTCOLA024",
4140            "RUSTCOLA023",
4141        ] {
4142            assert!(analysis.rules.iter().any(|meta| meta.id == *id));
4143        }
4144    }
4145
4146    #[test]
4147    fn box_into_raw_rule_detects_usage() {
4148        let rule = BoxIntoRawRule::new();
4149        let package = MirPackage {
4150            crate_name: "demo".to_string(),
4151            crate_root: ".".to_string(),
4152            functions: vec![MirFunction {
4153                name: "ffi_bridge".to_string(),
4154                signature: "fn ffi_bridge()".to_string(),
4155                body: vec!["    _0 = Box::into_raw(move _1);".to_string()],
4156                span: None,
4157                ..Default::default()
4158            }],
4159        };
4160
4161        let findings = rule.evaluate(&package, None);
4162        assert_eq!(findings.len(), 1);
4163        assert!(findings[0]
4164            .evidence
4165            .iter()
4166            .any(|entry| entry.contains("Box::into_raw")));
4167    }
4168
4169    #[test]
4170    fn box_into_raw_rule_skips_analyzer_crate() {
4171        let rule = BoxIntoRawRule::new();
4172        let package = MirPackage {
4173            crate_name: "mir-extractor".to_string(),
4174            crate_root: ".".to_string(),
4175            functions: vec![MirFunction {
4176                name: "ffi_bridge".to_string(),
4177                signature: "fn ffi_bridge()".to_string(),
4178                body: vec!["    _0 = Box::into_raw(move _1);".to_string()],
4179                span: None,
4180                ..Default::default()
4181            }],
4182        };
4183
4184        let findings = rule.evaluate(&package, None);
4185        assert!(findings.is_empty());
4186    }
4187
4188    #[test]
4189    fn non_https_url_rule_detects_literal() {
4190        let rule = NonHttpsUrlRule::new();
4191        let package = MirPackage {
4192            crate_name: "demo".to_string(),
4193            crate_root: ".".to_string(),
4194            functions: vec![MirFunction {
4195                name: "insecure_url".to_string(),
4196                signature: "fn insecure_url()".to_string(),
4197                body: vec!["    _1 = const \"http://example.com\";".to_string()],
4198                span: None,
4199                ..Default::default()
4200            }],
4201        };
4202
4203        let findings = rule.evaluate(&package, None);
4204        assert_eq!(findings.len(), 1);
4205        assert!(findings[0]
4206            .evidence
4207            .iter()
4208            .any(|entry| entry.contains("http://example.com")));
4209    }
4210
4211    #[test]
4212    fn non_https_url_rule_skips_analyzer_crate() {
4213        let rule = NonHttpsUrlRule::new();
4214        let package = MirPackage {
4215            crate_name: "mir-extractor".to_string(),
4216            crate_root: ".".to_string(),
4217            functions: vec![MirFunction {
4218                name: "document_string".to_string(),
4219                signature: "fn document_string()".to_string(),
4220                body: vec!["    _1 = const \"http://docs\";".to_string()],
4221                span: None,
4222                ..Default::default()
4223            }],
4224        };
4225
4226        let findings = rule.evaluate(&package, None);
4227        assert!(findings.is_empty());
4228    }
4229
4230    #[test]
4231    fn hardcoded_home_path_rule_detects_literal() {
4232        let rule = HardcodedHomePathRule::new();
4233        let package = MirPackage {
4234            crate_name: "demo".to_string(),
4235            crate_root: ".".to_string(),
4236            functions: vec![MirFunction {
4237                name: "store_profile".to_string(),
4238                signature: "fn store_profile()".to_string(),
4239                body: vec!["    _1 = const \"/home/alice/.config\";".to_string()],
4240                span: None,
4241                ..Default::default()
4242            }],
4243        };
4244
4245        let findings = rule.evaluate(&package, None);
4246        assert_eq!(findings.len(), 1);
4247        assert!(findings[0]
4248            .evidence
4249            .iter()
4250            .any(|entry| entry.contains("/home/alice/.config")));
4251    }
4252
4253    #[test]
4254    fn hardcoded_home_path_rule_skips_analyzer_crate() {
4255        let rule = HardcodedHomePathRule::new();
4256        let package = MirPackage {
4257            crate_name: "mir-extractor".to_string(),
4258            crate_root: ".".to_string(),
4259            functions: vec![MirFunction {
4260                name: "document_paths".to_string(),
4261                signature: "fn document_paths()".to_string(),
4262                body: vec!["    _1 = const \"/home/docs\";".to_string()],
4263                span: None,
4264                ..Default::default()
4265            }],
4266        };
4267
4268        let findings = rule.evaluate(&package, None);
4269        assert!(findings.is_empty());
4270    }
4271
4272    #[test]
4273    fn static_mut_global_rule_detects_mutable_static() {
4274        let rule = StaticMutGlobalRule::new();
4275        let package = MirPackage {
4276            crate_name: "demo".to_string(),
4277            crate_root: ".".to_string(),
4278            functions: vec![MirFunction {
4279                name: "global".to_string(),
4280                signature: "fn global()".to_string(),
4281                body: vec!["    static mut COUNTER: usize = 0;".to_string()],
4282                span: None,
4283                ..Default::default()
4284            }],
4285        };
4286
4287        let findings = rule.evaluate(&package, None);
4288        assert_eq!(findings.len(), 1);
4289        assert!(findings[0]
4290            .evidence
4291            .iter()
4292            .any(|entry| entry.contains("static mut")));
4293    }
4294
4295    #[test]
4296    fn static_mut_global_rule_skips_analyzer_crate() {
4297        let rule = StaticMutGlobalRule::new();
4298        let package = MirPackage {
4299            crate_name: "mir-extractor".to_string(),
4300            crate_root: ".".to_string(),
4301            functions: vec![MirFunction {
4302                name: "global".to_string(),
4303                signature: "fn global()".to_string(),
4304                body: vec!["    static mut COUNTER: usize = 0;".to_string()],
4305                span: None,
4306                ..Default::default()
4307            }],
4308        };
4309
4310        let findings = rule.evaluate(&package, None);
4311        assert!(findings.is_empty());
4312    }
4313
4314    #[test]
4315    fn permissions_set_readonly_rule_detects_false() {
4316        let rule = PermissionsSetReadonlyFalseRule::new();
4317        let package = MirPackage {
4318            crate_name: "demo".to_string(),
4319            crate_root: ".".to_string(),
4320            functions: vec![MirFunction {
4321                name: "loosen_permissions".to_string(),
4322                signature: "fn loosen_permissions(perm: &mut std::fs::Permissions)".to_string(),
4323                body: vec![
4324                    "    std::fs::Permissions::set_readonly(move _1, const false);".to_string(),
4325                ],
4326                span: None,
4327                ..Default::default()
4328            }],
4329        };
4330
4331        let findings = rule.evaluate(&package, None);
4332        assert_eq!(findings.len(), 1);
4333        assert!(findings[0]
4334            .evidence
4335            .iter()
4336            .any(|entry| entry.contains("set_readonly")));
4337    }
4338
4339    #[test]
4340    fn permissions_set_readonly_rule_skips_analyzer_crate() {
4341        let rule = PermissionsSetReadonlyFalseRule::new();
4342        let package = MirPackage {
4343            crate_name: "mir-extractor".to_string(),
4344            crate_root: ".".to_string(),
4345            functions: vec![MirFunction {
4346                name: "loosen_permissions".to_string(),
4347                signature: "fn loosen_permissions(perm: &mut std::fs::Permissions)".to_string(),
4348                body: vec![
4349                    "    std::fs::Permissions::set_readonly(move _1, const false);".to_string(),
4350                ],
4351                span: None,
4352                ..Default::default()
4353            }],
4354        };
4355
4356        let findings = rule.evaluate(&package, None);
4357        assert!(findings.is_empty());
4358    }
4359
4360    #[test]
4361    fn permissions_set_readonly_rule_ignores_true() {
4362        let rule = PermissionsSetReadonlyFalseRule::new();
4363        let package = MirPackage {
4364            crate_name: "demo".to_string(),
4365            crate_root: ".".to_string(),
4366            functions: vec![MirFunction {
4367                name: "harden_permissions".to_string(),
4368                signature: "fn harden_permissions(perm: &mut std::fs::Permissions)".to_string(),
4369                body: vec![
4370                    "    std::fs::Permissions::set_readonly(move _1, const true);".to_string(),
4371                ],
4372                span: None,
4373                ..Default::default()
4374            }],
4375        };
4376
4377        let findings = rule.evaluate(&package, None);
4378        assert!(findings.is_empty());
4379    }
4380
4381    #[test]
4382    fn nonnull_rule_detects_new_unchecked() {
4383        let rule = NonNullNewUncheckedRule::new();
4384        let package = MirPackage {
4385            crate_name: "demo".to_string(),
4386            crate_root: ".".to_string(),
4387            functions: vec![MirFunction {
4388                name: "make_nonnull".to_string(),
4389                signature: "fn make_nonnull(ptr: *mut u8)".to_string(),
4390                body: vec!["    _2 = core::ptr::NonNull::<u8>::new_unchecked(_1);".to_string()],
4391                span: None,
4392                ..Default::default()
4393            }],
4394        };
4395
4396        let findings = rule.evaluate(&package, None);
4397        assert_eq!(findings.len(), 1);
4398        assert!(findings[0]
4399            .evidence
4400            .iter()
4401            .any(|entry| entry.contains("NonNull")));
4402    }
4403
4404    #[test]
4405    fn nonnull_rule_skips_analyzer_crate() {
4406        let rule = NonNullNewUncheckedRule::new();
4407        let package = MirPackage {
4408            crate_name: "mir-extractor".to_string(),
4409            crate_root: ".".to_string(),
4410            functions: vec![MirFunction {
4411                name: "make_nonnull".to_string(),
4412                signature: "fn make_nonnull(ptr: *mut u8)".to_string(),
4413                body: vec!["    _2 = core::ptr::NonNull::<u8>::new_unchecked(_1);".to_string()],
4414                span: None,
4415                ..Default::default()
4416            }],
4417        };
4418
4419        let findings = rule.evaluate(&package, None);
4420        assert!(findings.is_empty());
4421    }
4422
4423    #[test]
4424    fn mem_forget_guard_rule_detects_guard_leak() {
4425        let rule = MemForgetGuardRule::new();
4426        let package = MirPackage {
4427            crate_name: "demo".to_string(),
4428            crate_root: ".".to_string(),
4429            functions: vec![MirFunction {
4430                name: "forget_guard".to_string(),
4431                signature: "fn forget_guard(mutex: &std::sync::Mutex<i32>)".to_string(),
4432                body: vec![
4433                    "    _1 = std::sync::Mutex::lock(move _0) -> [return: bb1, unwind: bb2];".to_string(),
4434                    "    _2 = core::result::Result::<std::sync::MutexGuard<'_, i32>, _>::unwrap(move _1);".to_string(),
4435                    "    std::mem::forget(move _2);".to_string(),
4436                ],
4437                span: None,
4438            ..Default::default()
4439            }],
4440        };
4441
4442        let findings = rule.evaluate(&package, None);
4443        assert_eq!(findings.len(), 1);
4444        assert!(findings[0]
4445            .evidence
4446            .iter()
4447            .any(|entry| entry.contains("mem::forget")));
4448        assert!(findings[0]
4449            .evidence
4450            .iter()
4451            .any(|entry| entry.contains("MutexGuard")));
4452    }
4453
4454    #[test]
4455    fn mem_forget_guard_rule_skips_analyzer_crate() {
4456        let rule = MemForgetGuardRule::new();
4457        let package = MirPackage {
4458            crate_name: "mir-extractor".to_string(),
4459            crate_root: ".".to_string(),
4460            functions: vec![MirFunction {
4461                name: "forget_guard".to_string(),
4462                signature: "fn forget_guard(mutex: &std::sync::Mutex<i32>)".to_string(),
4463                body: vec![
4464                    "    _1 = std::sync::Mutex::lock(move _0) -> [return: bb1, unwind: bb2];".to_string(),
4465                    "    _2 = core::result::Result::<std::sync::MutexGuard<'_, i32>, _>::unwrap(move _1);".to_string(),
4466                    "    std::mem::forget(move _2);".to_string(),
4467                ],
4468                span: None,
4469            ..Default::default()
4470            }],
4471        };
4472
4473        let findings = rule.evaluate(&package, None);
4474        assert!(findings.is_empty());
4475    }
4476
4477    #[test]
4478    fn mem_forget_guard_rule_ignores_non_guard_forget() {
4479        let rule = MemForgetGuardRule::new();
4480        let package = MirPackage {
4481            crate_name: "demo".to_string(),
4482            crate_root: ".".to_string(),
4483            functions: vec![MirFunction {
4484                name: "forget_vec".to_string(),
4485                signature: "fn forget_vec(buffer: Vec<u8>)".to_string(),
4486                body: vec!["    std::mem::forget(move _1);".to_string()],
4487                span: None,
4488                ..Default::default()
4489            }],
4490        };
4491
4492        let findings = rule.evaluate(&package, None);
4493        assert!(findings.is_empty());
4494    }
4495
4496    #[test]
4497    fn world_writable_mode_rule_detects_set_mode() {
4498        let rule = WorldWritableModeRule::new();
4499        let package = MirPackage {
4500            crate_name: "demo".to_string(),
4501            crate_root: ".".to_string(),
4502            functions: vec![MirFunction {
4503                name: "make_world_writable".to_string(),
4504                signature: "fn make_world_writable(perm: &mut std::os::unix::fs::PermissionsExt)"
4505                    .to_string(),
4506                body: vec![
4507                    "    std::os::unix::fs::PermissionsExt::set_mode(move _1, const 0o777);"
4508                        .to_string(),
4509                ],
4510                span: None,
4511                ..Default::default()
4512            }],
4513        };
4514
4515        let findings = rule.evaluate(&package, None);
4516        assert_eq!(findings.len(), 1);
4517        assert!(findings[0]
4518            .evidence
4519            .iter()
4520            .any(|entry| entry.contains("0o777")));
4521    }
4522
4523    #[test]
4524    fn world_writable_mode_rule_skips_analyzer_crate() {
4525        let rule = WorldWritableModeRule::new();
4526        let package = MirPackage {
4527            crate_name: "mir-extractor".to_string(),
4528            crate_root: ".".to_string(),
4529            functions: vec![MirFunction {
4530                name: "make_world_writable".to_string(),
4531                signature: "fn make_world_writable(perm: &mut std::os::unix::fs::PermissionsExt)"
4532                    .to_string(),
4533                body: vec![
4534                    "    std::os::unix::fs::PermissionsExt::set_mode(move _1, const 0o777);"
4535                        .to_string(),
4536                ],
4537                span: None,
4538                ..Default::default()
4539            }],
4540        };
4541
4542        let findings = rule.evaluate(&package, None);
4543        assert!(findings.is_empty());
4544    }
4545
4546    #[test]
4547    fn world_writable_mode_rule_ignores_safe_mask() {
4548        let rule = WorldWritableModeRule::new();
4549        let package = MirPackage {
4550            crate_name: "demo".to_string(),
4551            crate_root: ".".to_string(),
4552            functions: vec![MirFunction {
4553                name: "make_restrictive".to_string(),
4554                signature: "fn make_restrictive(perm: &mut std::os::unix::fs::PermissionsExt)"
4555                    .to_string(),
4556                body: vec![
4557                    "    std::os::unix::fs::PermissionsExt::set_mode(move _1, const 0o755);"
4558                        .to_string(),
4559                ],
4560                span: None,
4561                ..Default::default()
4562            }],
4563        };
4564
4565        let findings = rule.evaluate(&package, None);
4566        assert!(findings.is_empty());
4567    }
4568
4569    #[test]
4570    fn command_rule_reports_tainted_arguments_with_high_severity() {
4571        let rule = CommandInjectionRiskRule::new();
4572        let package = MirPackage {
4573            crate_name: "demo".to_string(),
4574            crate_root: ".".to_string(),
4575            functions: vec![MirFunction {
4576                name: "bad".to_string(),
4577                signature: "fn bad()".to_string(),
4578                body: vec![
4579                    "    _1 = std::env::var(const \"USER\");".to_string(),
4580                    "    _2 = std::process::Command::new(const \"/bin/sh\");".to_string(),
4581                    "    _3 = std::process::Command::arg(move _2, move _1);".to_string(),
4582                ],
4583                span: None,
4584                ..Default::default()
4585            }],
4586        };
4587
4588        let findings = rule.evaluate(&package, None);
4589        assert_eq!(findings.len(), 1);
4590        let finding = &findings[0];
4591        assert_eq!(finding.severity, Severity::High);
4592        assert!(finding
4593            .evidence
4594            .iter()
4595            .any(|entry| entry.contains("tainted arguments")));
4596        assert!(finding.message.contains("Potential command injection"));
4597    }
4598
4599    #[test]
4600    fn command_rule_reports_tokio_process_usage() {
4601        let rule = CommandInjectionRiskRule::new();
4602        let package = MirPackage {
4603            crate_name: "tokio-demo".to_string(),
4604            crate_root: ".".to_string(),
4605            functions: vec![MirFunction {
4606                name: "run_async".to_string(),
4607                signature: "fn run_async()".to_string(),
4608                body: vec![
4609                    "    _1 = std::env::var(const \"TARGET\");".to_string(),
4610                    "    _2 = tokio::process::Command::new(const \"/usr/bin/env\");".to_string(),
4611                    "    _3 = tokio::process::Command::arg(move _2, move _1);".to_string(),
4612                ],
4613                span: None,
4614                ..Default::default()
4615            }],
4616        };
4617
4618        let findings = rule.evaluate(&package, None);
4619        assert_eq!(findings.len(), 1);
4620        let finding = &findings[0];
4621        assert_eq!(finding.severity, Severity::High);
4622        assert!(finding
4623            .evidence
4624            .iter()
4625            .any(|entry| entry.contains("tokio::process::Command::new")));
4626    }
4627
4628    #[test]
4629    fn command_rule_reports_constant_arguments_with_medium_severity() {
4630        let rule = CommandInjectionRiskRule::new();
4631        let package = MirPackage {
4632            crate_name: "demo".to_string(),
4633            crate_root: ".".to_string(),
4634            functions: vec![MirFunction {
4635                name: "ok".to_string(),
4636                signature: "fn ok()".to_string(),
4637                body: vec![
4638                    "    _1 = std::process::Command::new(const \"git\");".to_string(),
4639                    "    _2 = std::process::Command::arg(move _1, const \"status\");".to_string(),
4640                ],
4641                span: None,
4642                ..Default::default()
4643            }],
4644        };
4645
4646        let findings = rule.evaluate(&package, None);
4647        assert_eq!(findings.len(), 1);
4648        let finding = &findings[0];
4649        assert_eq!(finding.severity, Severity::Medium);
4650        assert_eq!(finding.evidence.len(), 1);
4651        assert!(finding
4652            .message
4653            .contains("Process command execution detected"));
4654    }
4655
4656    #[test]
4657    fn untrusted_env_rule_detects_env_call() {
4658        let rule = UntrustedEnvInputRule::new();
4659        let package = MirPackage {
4660            crate_name: "demo".to_string(),
4661            crate_root: ".".to_string(),
4662            functions: vec![MirFunction {
4663                name: "read_env".to_string(),
4664                signature: "fn read_env()".to_string(),
4665                // Complete taint flow: env::var (source) -> Command::arg (sink)
4666                body: vec![
4667                    "_1 = std::env::var(move _2) -> [return: bb1, unwind: bb2];".to_string(),
4668                    "_3 = Command::arg::<&str>(move _4, move _1) -> [return: bb3, unwind: bb4];"
4669                        .to_string(),
4670                ],
4671                span: None,
4672                ..Default::default()
4673            }],
4674        };
4675
4676        let findings = rule.evaluate(&package, None);
4677        assert_eq!(findings.len(), 1);
4678        // The finding should contain evidence about the taint flow
4679        assert!(findings[0].message.contains("Tainted data"));
4680    }
4681
4682    #[test]
4683    fn untrusted_env_rule_ignores_string_literal() {
4684        let rule = UntrustedEnvInputRule::new();
4685        let package = MirPackage {
4686            crate_name: "demo".to_string(),
4687            crate_root: ".".to_string(),
4688            functions: vec![MirFunction {
4689                name: "constant".to_string(),
4690                signature: "fn constant()".to_string(),
4691                body: vec!["    const _: &str = \"std::env::var\";".to_string()],
4692                span: None,
4693                ..Default::default()
4694            }],
4695        };
4696
4697        let findings = rule.evaluate(&package, None);
4698        assert!(findings.is_empty());
4699    }
4700
4701    #[test]
4702    fn openssl_rule_reports_none_literal() {
4703        let rule = OpensslVerifyNoneRule::new();
4704        let package = MirPackage {
4705            crate_name: "demo".to_string(),
4706            crate_root: ".".to_string(),
4707            functions: vec![MirFunction {
4708                name: "disable_verify".to_string(),
4709                signature: "fn disable_verify()".to_string(),
4710                body: vec![
4711                    "    _1 = openssl::ssl::SslContextBuilder::set_verify(move _0, openssl::ssl::SslVerifyMode::NONE);"
4712                        .to_string(),
4713                ],
4714                span: None,
4715            ..Default::default()
4716            }],
4717        };
4718
4719        let findings = rule.evaluate(&package, None);
4720        assert_eq!(findings.len(), 1);
4721        assert!(findings[0]
4722            .evidence
4723            .iter()
4724            .any(|line| line.contains("set_verify")));
4725    }
4726
4727    #[test]
4728    fn openssl_rule_reports_empty_mode_variable() {
4729        let rule = OpensslVerifyNoneRule::new();
4730        let package = MirPackage {
4731            crate_name: "demo".to_string(),
4732            crate_root: ".".to_string(),
4733            functions: vec![MirFunction {
4734                name: "disable_verify_var".to_string(),
4735                signature: "fn disable_verify_var()".to_string(),
4736                body: vec![
4737                    "    _1 = openssl::ssl::SslVerifyMode::empty();".to_string(),
4738                    "    _2 = openssl::ssl::SslContextBuilder::set_verify(move _0, move _1);"
4739                        .to_string(),
4740                ],
4741                span: None,
4742                ..Default::default()
4743            }],
4744        };
4745
4746        let findings = rule.evaluate(&package, None);
4747        assert_eq!(findings.len(), 1);
4748        let evidence = &findings[0].evidence;
4749        assert_eq!(evidence.len(), 2);
4750        assert!(evidence
4751            .iter()
4752            .any(|line| line.contains("SslVerifyMode::empty")));
4753    }
4754
4755    #[test]
4756    fn md5_rule_ignores_doc_only_matches() {
4757        let engine = RuleEngine::with_builtin_rules();
4758        let package = MirPackage {
4759            crate_name: "docs".to_string(),
4760            crate_root: ".".to_string(),
4761            functions: vec![MirFunction {
4762                name: "doc_only".to_string(),
4763                signature: "fn doc_only()".to_string(),
4764                body: vec!["const _: &str = \"Detects use of MD5 hashing\";".to_string()],
4765                span: None,
4766                ..Default::default()
4767            }],
4768        };
4769
4770        let analysis = engine.run(&package);
4771
4772        assert!(
4773            !analysis
4774                .findings
4775                .iter()
4776                .any(|f| f.rule_id == "RUSTCOLA004" && f.function == "doc_only"),
4777            "md5 rule should not fire on doc-only strings"
4778        );
4779    }
4780
4781    #[test]
4782    fn sha1_rule_ignores_doc_only_matches() {
4783        let engine = RuleEngine::with_builtin_rules();
4784        let package = MirPackage {
4785            crate_name: "docs".to_string(),
4786            crate_root: ".".to_string(),
4787            functions: vec![MirFunction {
4788                name: "doc_only_sha".to_string(),
4789                signature: "fn doc_only_sha()".to_string(),
4790                body: vec!["const _: &str = \"Usage of SHA-1 hashing\";".to_string()],
4791                span: None,
4792                ..Default::default()
4793            }],
4794        };
4795
4796        let analysis = engine.run(&package);
4797
4798        assert!(
4799            !analysis
4800                .findings
4801                .iter()
4802                .any(|f| f.rule_id == "RUSTCOLA005" && f.function == "doc_only_sha"),
4803            "sha1 rule should not fire on doc-only strings"
4804        );
4805    }
4806
4807    #[test]
4808    fn command_rule_ignores_rustc_detection() {
4809        let engine = RuleEngine::with_builtin_rules();
4810        let package = MirPackage {
4811            crate_name: "mir-extractor".to_string(),
4812            crate_root: ".".to_string(),
4813            functions: vec![MirFunction {
4814                name: "detect_rustc_version".to_string(),
4815                signature: "fn detect_rustc_version()".to_string(),
4816                body: vec!["_0 = std::process::Command::new(const \"rustc\");".to_string()],
4817                span: None,
4818                ..Default::default()
4819            }],
4820        };
4821
4822        let analysis = engine.run(&package);
4823
4824        assert!(
4825            !analysis
4826                .findings
4827                .iter()
4828                .any(|f| f.rule_id == "RUSTCOLA007" && f.function == "detect_rustc_version"),
4829            "command rule should ignore detect_rustc_version helper"
4830        );
4831    }
4832
4833    #[test]
4834    fn command_rule_ignores_discover_targets() {
4835        let engine = RuleEngine::with_builtin_rules();
4836        let package = MirPackage {
4837            crate_name: "mir-extractor".to_string(),
4838            crate_root: ".".to_string(),
4839            functions: vec![MirFunction {
4840                name: "discover_rustc_targets".to_string(),
4841                signature: "fn discover_rustc_targets()".to_string(),
4842                body: vec!["_0 = std::process::Command::new(const \"cargo\");".to_string()],
4843                span: None,
4844                ..Default::default()
4845            }],
4846        };
4847
4848        let analysis = engine.run(&package);
4849
4850        assert!(
4851            !analysis
4852                .findings
4853                .iter()
4854                .any(|f| f.rule_id == "RUSTCOLA007" && f.function == "discover_rustc_targets"),
4855            "command rule should ignore discover_rustc_targets helper"
4856        );
4857    }
4858
4859    #[test]
4860    fn command_rule_ignores_detect_crate_name() {
4861        let engine = RuleEngine::with_builtin_rules();
4862        let package = MirPackage {
4863            crate_name: "mir-extractor".to_string(),
4864            crate_root: ".".to_string(),
4865            functions: vec![MirFunction {
4866                name: "detect_crate_name".to_string(),
4867                signature: "fn detect_crate_name()".to_string(),
4868                body: vec!["_0 = MetadataCommand::new();".to_string()],
4869                span: None,
4870                ..Default::default()
4871            }],
4872        };
4873
4874        let analysis = engine.run(&package);
4875
4876        assert!(
4877            !analysis
4878                .findings
4879                .iter()
4880                .any(|f| f.rule_id == "RUSTCOLA007" && f.function == "detect_crate_name"),
4881            "command rule should ignore detect_crate_name helper"
4882        );
4883    }
4884
4885    #[test]
4886    fn unsafe_send_sync_bounds_rule_detects_missing_generic_bounds() -> Result<()> {
4887        let temp = tempdir().expect("temp dir");
4888        let crate_root = temp.path();
4889
4890        fs::create_dir_all(crate_root.join("src"))?;
4891        fs::write(
4892            crate_root.join("Cargo.toml"),
4893            r#"[package]
4894name = "unsafe-send-sync"
4895version = "0.1.0"
4896edition = "2021"
4897
4898[lib]
4899path = "src/lib.rs"
4900"#,
4901        )?;
4902        fs::write(
4903            crate_root.join("src/lib.rs"),
4904            r#"use std::marker::PhantomData;
4905
4906pub struct Wrapper<T>(PhantomData<T>);
4907
4908unsafe impl<T> Send for Wrapper<T> {}
4909
4910        pub struct Pair<T, U>(PhantomData<(T, U)>);
4911
4912        unsafe impl<T, U: Send> Send for Pair<T, U> {}
4913
4914        pub struct SyncWrapper<T>(PhantomData<T>);
4915
4916        unsafe impl<T> Sync for SyncWrapper<T> {}
4917
4918pub struct SafeWrapper<T>(PhantomData<T>);
4919
4920unsafe impl<T: Send> Send for SafeWrapper<T> {}
4921
4922        pub struct SafePair<T, U>(PhantomData<(T, U)>);
4923
4924        unsafe impl<T: Send, U: Send> Send for SafePair<T, U> {}
4925
4926        pub struct QualifiedSafe<T>(PhantomData<T>);
4927
4928        unsafe impl<T: std::marker::Send> Send for QualifiedSafe<T> {}
4929
4930        pub struct PointerWrapper<T>(PhantomData<*const T>);
4931
4932        unsafe impl<T: Sync> Send for PointerWrapper<T> {}
4933
4934        pub struct WhereSync<T>(PhantomData<T>);
4935
4936        unsafe impl<T> Sync for WhereSync<T>
4937        where
4938            T: Sync,
4939        {}
4940
4941    pub struct SendBoundSync<T>(PhantomData<T>);
4942
4943    unsafe impl<T: Send> Sync for SendBoundSync<T> {}
4944"#,
4945        )?;
4946
4947        let package = MirPackage {
4948            crate_name: "unsafe-send-sync".to_string(),
4949            crate_root: crate_root.to_string_lossy().to_string(),
4950            functions: Vec::new(),
4951        };
4952
4953        let analysis = RuleEngine::with_builtin_rules().run(&package);
4954
4955        let matches: Vec<_> = analysis
4956            .findings
4957            .iter()
4958            .filter(|finding| finding.rule_id == "RUSTCOLA015")
4959            .collect();
4960
4961        assert_eq!(matches.len(), 3, "expected three unsafe Send/Sync findings");
4962
4963        let signatures: Vec<_> = matches
4964            .iter()
4965            .map(|finding| finding.function_signature.clone())
4966            .collect();
4967
4968        assert!(
4969            signatures
4970                .iter()
4971                .any(|sig| sig.contains("unsafe impl<T> Send for Wrapper<T>")),
4972            "missing finding for Wrapper"
4973        );
4974        assert!(
4975            signatures
4976                .iter()
4977                .any(|sig| sig.contains("unsafe impl<T, U: Send> Send for Pair<T, U>")),
4978            "missing finding for Pair"
4979        );
4980        assert!(
4981            signatures
4982                .iter()
4983                .any(|sig| sig.contains("unsafe impl<T> Sync for SyncWrapper<T>")),
4984            "missing finding for SyncWrapper"
4985        );
4986
4987        assert!(
4988            !signatures
4989                .iter()
4990                .any(|sig| sig.contains("unsafe impl<T: Send> Send for SafeWrapper<T>")),
4991            "safe Send impl should not be flagged"
4992        );
4993        assert!(
4994            !signatures
4995                .iter()
4996                .any(|sig| sig.contains("unsafe impl<T: Send, U: Send> Send for SafePair<T, U>")),
4997            "SafePair should not be flagged"
4998        );
4999        assert!(
5000            !signatures
5001                .iter()
5002                .any(|sig| sig.contains("unsafe impl<T> Sync for WhereSync<T>")),
5003            "WhereSync with where clause should not be flagged"
5004        );
5005        assert!(
5006            !signatures
5007                .iter()
5008                .any(|sig| sig
5009                    .contains("unsafe impl<T: std::marker::Send> Send for QualifiedSafe<T>")),
5010            "QualifiedSafe with fully qualified bound should not be flagged"
5011        );
5012        assert!(
5013            !signatures
5014                .iter()
5015                .any(|sig| sig.contains("unsafe impl<T: Sync> Send for PointerWrapper<T>")),
5016            "PointerWrapper requires Sync on T and should not be flagged"
5017        );
5018        assert!(
5019            !signatures
5020                .iter()
5021                .any(|sig| sig.contains("unsafe impl<T: Send> Sync for SendBoundSync<T>")),
5022            "SendBoundSync requires Send on T and should not be flagged"
5023        );
5024
5025        Ok(())
5026    }
5027
5028    #[test]
5029    fn unsafe_send_sync_bounds_rule_ignores_string_literals() -> Result<()> {
5030        let temp = tempdir().expect("temp dir");
5031        let crate_root = temp.path();
5032
5033        fs::create_dir_all(crate_root.join("src"))?;
5034        fs::write(
5035            crate_root.join("Cargo.toml"),
5036            r#"[package]
5037name = "unsafe-send-sync-literals"
5038version = "0.1.0"
5039edition = "2021"
5040
5041[lib]
5042path = "src/lib.rs"
5043"#,
5044        )?;
5045        fs::write(
5046            crate_root.join("src/lib.rs"),
5047            r###"pub fn strings() {
5048    let _ = "unsafe impl<T> Send for Maybe<T> {}";
5049    let _ = r#"unsafe impl<T> Sync for Maybe<T> {}"#;
5050}
5051"###,
5052        )?;
5053
5054        let package = MirPackage {
5055            crate_name: "unsafe-send-sync-literals".to_string(),
5056            crate_root: crate_root.to_string_lossy().to_string(),
5057            functions: Vec::new(),
5058        };
5059
5060        let analysis = RuleEngine::with_builtin_rules().run(&package);
5061
5062        let matches: Vec<_> = analysis
5063            .findings
5064            .iter()
5065            .filter(|finding| finding.rule_id == "RUSTCOLA015")
5066            .collect();
5067
5068        assert!(
5069            matches.is_empty(),
5070            "string literals should not trigger unsafe send/sync findings",
5071        );
5072
5073        Ok(())
5074    }
5075
5076    #[test]
5077    fn strip_string_literals_preserves_lifetimes_and_length() -> Result<()> {
5078        let input = "fn demo<'a>(s: &'a str) -> &'a str { let c = 'x'; s }";
5079        let (sanitized, _) = strip_string_literals(StringLiteralState::default(), input);
5080
5081        assert_eq!(sanitized.len(), input.len());
5082        assert!(sanitized.contains("&'a str"));
5083        assert!(!sanitized.contains("'x'"));
5084
5085        Ok(())
5086    }
5087
5088    #[test]
5089    fn ffi_buffer_leak_rule_flags_early_return() -> Result<()> {
5090        let temp = tempdir().expect("temp dir");
5091        let crate_root = temp.path();
5092
5093        fs::create_dir_all(crate_root.join("src"))?;
5094        fs::write(
5095            crate_root.join("Cargo.toml"),
5096            r#"[package]
5097name = "ffi-buffer-leak"
5098version = "0.1.0"
5099edition = "2021"
5100
5101[lib]
5102path = "src/lib.rs"
5103"#,
5104        )?;
5105        fs::write(
5106            crate_root.join("src/lib.rs"),
5107            r#"#[no_mangle]
5108pub extern "C" fn ffi_allocate(target: *mut *mut u8, len: usize) -> Result<(), &'static str> {
5109    let mut buffer = Vec::with_capacity(len);
5110    let ptr = buffer.as_mut_ptr();
5111
5112    unsafe {
5113        *target = ptr;
5114    }
5115
5116    if len == 0 {
5117        return Err("len must be > 0");
5118    }
5119
5120    std::mem::forget(buffer);
5121    Ok(())
5122}
5123"#,
5124        )?;
5125
5126        let package = MirPackage {
5127            crate_name: "ffi-buffer-leak".to_string(),
5128            crate_root: crate_root.to_string_lossy().to_string(),
5129            functions: Vec::new(),
5130        };
5131
5132        let analysis = RuleEngine::with_builtin_rules().run(&package);
5133        let ffi_findings: Vec<_> = analysis
5134            .findings
5135            .iter()
5136            .filter(|finding| finding.rule_id == "RUSTCOLA016")
5137            .collect();
5138
5139        assert_eq!(
5140            ffi_findings.len(),
5141            1,
5142            "expected single FFI buffer leak finding"
5143        );
5144        let finding = ffi_findings[0];
5145        assert!(finding.function.contains("src/lib.rs"));
5146        assert!(finding
5147            .evidence
5148            .iter()
5149            .any(|line| line.contains("Vec::with_capacity")));
5150        assert!(finding
5151            .evidence
5152            .iter()
5153            .any(|line| line.contains("return Err")));
5154
5155        Ok(())
5156    }
5157
5158    #[test]
5159    fn ffi_buffer_leak_rule_ignores_string_literals() -> Result<()> {
5160        let temp = tempdir().expect("temp dir");
5161        let crate_root = temp.path();
5162
5163        fs::create_dir_all(crate_root.join("src"))?;
5164        fs::write(
5165            crate_root.join("Cargo.toml"),
5166            r#"[package]
5167name = "ffi-buffer-literal"
5168version = "0.1.0"
5169edition = "2021"
5170
5171[lib]
5172path = "src/lib.rs"
5173"#,
5174        )?;
5175        fs::write(
5176            crate_root.join("src/lib.rs"),
5177            r###"pub fn fixtures() {
5178    let _ = r#"
5179#[no_mangle]
5180pub extern "C" fn ffi_allocate(target: *mut *mut u8, len: usize) -> Result<(), &'static str> {
5181    let mut buffer = Vec::with_capacity(len);
5182    if len == 0 {
5183        return Err("len must be > 0");
5184    }
5185    std::mem::forget(buffer);
5186    Ok(())
5187}
5188"#;
5189}
5190"###,
5191        )?;
5192
5193        let package = MirPackage {
5194            crate_name: "ffi-buffer-literal".to_string(),
5195            crate_root: crate_root.to_string_lossy().to_string(),
5196            functions: Vec::new(),
5197        };
5198
5199        let analysis = RuleEngine::with_builtin_rules().run(&package);
5200        let matches: Vec<_> = analysis
5201            .findings
5202            .iter()
5203            .filter(|finding| finding.rule_id == "RUSTCOLA016")
5204            .collect();
5205
5206        assert!(
5207            matches.is_empty(),
5208            "string literal containing FFI example should not trigger RUSTCOLA016",
5209        );
5210
5211        Ok(())
5212    }
5213
5214    #[test]
5215    fn allocator_mismatch_rule_ignores_string_literals() -> Result<()> {
5216        let temp = tempdir().expect("temp dir");
5217        let crate_root = temp.path();
5218
5219        fs::create_dir_all(crate_root.join("src"))?;
5220        fs::write(
5221            crate_root.join("Cargo.toml"),
5222            r#"[package]
5223name = "allocator-mismatch-string-literals"
5224version = "0.1.0"
5225edition = "2021"
5226
5227[lib]
5228path = "src/lib.rs"
5229"#,
5230        )?;
5231        fs::write(
5232            crate_root.join("src/lib.rs"),
5233            r###"pub fn literal_patterns() {
5234    let message = "Box::into_raw should not trigger";
5235    let raw_literal = r#"libc::free mentioned here"#;
5236    let multiline = r#"Vec::from_raw_parts in documentation"#;
5237    let combined = format!("{} {}", message, raw_literal);
5238    drop((multiline, combined));
5239}
5240"###,
5241        )?;
5242
5243        let package = MirPackage {
5244            crate_name: "allocator-mismatch-string-literals".to_string(),
5245            crate_root: crate_root.to_string_lossy().to_string(),
5246            functions: Vec::new(),
5247        };
5248
5249        let analysis = RuleEngine::with_builtin_rules().run(&package);
5250        let findings: Vec<_> = analysis
5251            .findings
5252            .iter()
5253            .filter(|finding| finding.rule_id == "RUSTCOLA017")
5254            .collect();
5255
5256        assert!(
5257            findings.is_empty(),
5258            "string literals referencing allocator names should not trigger RUSTCOLA017"
5259        );
5260
5261        Ok(())
5262    }
5263
5264    #[test]
5265    fn unsafe_usage_rule_detects_unsafe_block() -> Result<()> {
5266        let package = MirPackage {
5267            crate_name: "unsafe-detect".to_string(),
5268            crate_root: ".".to_string(),
5269            functions: vec![MirFunction {
5270                name: "danger".to_string(),
5271                signature: "fn danger()".to_string(),
5272                body: vec![
5273                    "fn danger() {".to_string(),
5274                    "    unsafe { core::ptr::read(_1); }".to_string(),
5275                    "}".to_string(),
5276                ],
5277                span: None,
5278                ..Default::default()
5279            }],
5280        };
5281
5282        let analysis = RuleEngine::with_builtin_rules().run(&package);
5283        let matches: Vec<_> = analysis
5284            .findings
5285            .iter()
5286            .filter(|finding| finding.rule_id == "RUSTCOLA003")
5287            .collect();
5288
5289        assert_eq!(matches.len(), 1, "expected RUSTCOLA003 to fire");
5290        assert!(matches[0]
5291            .evidence
5292            .iter()
5293            .any(|line| line.contains("unsafe")));
5294
5295        Ok(())
5296    }
5297
5298    #[test]
5299    fn unsafe_usage_rule_ignores_string_literals() -> Result<()> {
5300        let package = MirPackage {
5301            crate_name: "unsafe-literal".to_string(),
5302            crate_root: ".".to_string(),
5303            functions: vec![MirFunction {
5304                name: "doc_example".to_string(),
5305                signature: "fn doc_example()".to_string(),
5306                body: vec![
5307                    "fn doc_example() {".to_string(),
5308                    "    _1 = \"This string mentions unsafe code\";".to_string(),
5309                    "}".to_string(),
5310                ],
5311                span: None,
5312                ..Default::default()
5313            }],
5314        };
5315
5316        let analysis = RuleEngine::with_builtin_rules().run(&package);
5317        let matches: Vec<_> = analysis
5318            .findings
5319            .iter()
5320            .filter(|finding| finding.rule_id == "RUSTCOLA003")
5321            .collect();
5322
5323        assert!(
5324            matches.is_empty(),
5325            "string literal mentioning unsafe should not trigger RUSTCOLA003"
5326        );
5327
5328        Ok(())
5329    }
5330
5331    #[test]
5332    fn vec_set_len_rule_detects_usage() -> Result<()> {
5333        let package = MirPackage {
5334            crate_name: "vec-set-len-detect".to_string(),
5335            crate_root: ".".to_string(),
5336            functions: vec![MirFunction {
5337                name: "grow".to_string(),
5338                signature: "fn grow(vec: &mut Vec<i32>)".to_string(),
5339                body: vec![
5340                    "fn grow(vec: &mut Vec<i32>) {".to_string(),
5341                    make_vec_set_len_line("    "),
5342                    "}".to_string(),
5343                ],
5344                span: None,
5345                ..Default::default()
5346            }],
5347        };
5348
5349        let analysis = RuleEngine::with_builtin_rules().run(&package);
5350        let matches: Vec<_> = analysis
5351            .findings
5352            .iter()
5353            .filter(|finding| finding.rule_id == "RUSTCOLA008")
5354            .collect();
5355
5356        assert_eq!(matches.len(), 1, "expected RUSTCOLA008 to fire");
5357        assert!(matches[0]
5358            .evidence
5359            .iter()
5360            .any(|line| line.contains("set_len")));
5361
5362        Ok(())
5363    }
5364
5365    #[test]
5366    fn vec_set_len_rule_ignores_string_literals() -> Result<()> {
5367        let package = MirPackage {
5368            crate_name: "vec-set-len-literal".to_string(),
5369            crate_root: ".".to_string(),
5370            functions: vec![MirFunction {
5371                name: "doc_only".to_string(),
5372                signature: "fn doc_only()".to_string(),
5373                body: vec![
5374                    "fn doc_only() {".to_string(),
5375                    format!("    _1 = \"Documenting {} behavior\";", VEC_SET_LEN_SYMBOL),
5376                    "}".to_string(),
5377                ],
5378                span: None,
5379                ..Default::default()
5380            }],
5381        };
5382
5383        let analysis = RuleEngine::with_builtin_rules().run(&package);
5384        let matches: Vec<_> = analysis
5385            .findings
5386            .iter()
5387            .filter(|finding| finding.rule_id == "RUSTCOLA008")
5388            .collect();
5389
5390        assert!(
5391            matches.is_empty(),
5392            "string literal mentioning {} should not trigger RUSTCOLA008",
5393            VEC_SET_LEN_SYMBOL
5394        );
5395
5396        Ok(())
5397    }
5398
5399    #[test]
5400    fn vec_set_len_rule_ignores_metadata_lines() -> Result<()> {
5401        let package = MirPackage {
5402            crate_name: "vec-set-len-metadata".to_string(),
5403            crate_root: ".".to_string(),
5404            functions: vec![MirFunction {
5405                name: "meta".to_string(),
5406                signature: "fn meta()".to_string(),
5407                body: vec![
5408                    "fn meta() {".to_string(),
5409                    format!(
5410                        "    0x00 │ 56 65 63 3a 3a 73 65 74 5f 6c 65 6e │ {} used in metadata",
5411                        VEC_SET_LEN_SYMBOL
5412                    ),
5413                    "    0x10 │ 20 75 73 65 64 20 69 6e 20 6d 65 74 │  used in metadata"
5414                        .to_string(),
5415                    "}".to_string(),
5416                ],
5417                span: None,
5418                ..Default::default()
5419            }],
5420        };
5421
5422        let analysis = RuleEngine::with_builtin_rules().run(&package);
5423        let matches: Vec<_> = analysis
5424            .findings
5425            .iter()
5426            .filter(|finding| finding.rule_id == "RUSTCOLA008")
5427            .collect();
5428
5429        assert!(
5430            matches.is_empty(),
5431            "metadata-style {} mention without call should not trigger RUSTCOLA008",
5432            VEC_SET_LEN_SYMBOL
5433        );
5434
5435        Ok(())
5436    }
5437
5438    #[test]
5439    fn maybe_uninit_rule_detects_usage() -> Result<()> {
5440        let package = MirPackage {
5441            crate_name: "maybe-uninit-detect".to_string(),
5442            crate_root: ".".to_string(),
5443            functions: vec![MirFunction {
5444                name: "finalize".to_string(),
5445                signature: "fn finalize(buf: &mut core::mem::MaybeUninit<i32>)".to_string(),
5446                body: vec![
5447                    "fn finalize(buf: &mut core::mem::MaybeUninit<i32>) {".to_string(),
5448                    make_maybe_uninit_assume_init_line("    "),
5449                    "}".to_string(),
5450                ],
5451                span: None,
5452                ..Default::default()
5453            }],
5454        };
5455
5456        let analysis = RuleEngine::with_builtin_rules().run(&package);
5457        let matches: Vec<_> = analysis
5458            .findings
5459            .iter()
5460            .filter(|finding| finding.rule_id == "RUSTCOLA009")
5461            .collect();
5462
5463        assert_eq!(matches.len(), 1, "expected RUSTCOLA009 to fire");
5464        assert!(matches[0]
5465            .evidence
5466            .iter()
5467            .any(|line| line.contains(MAYBE_UNINIT_ASSUME_INIT_SYMBOL)));
5468
5469        Ok(())
5470    }
5471
5472    #[test]
5473    fn maybe_uninit_rule_skips_analyzer_crate() -> Result<()> {
5474        let package = MirPackage {
5475            crate_name: "mir-extractor".to_string(),
5476            crate_root: ".".to_string(),
5477            functions: vec![MirFunction {
5478                name: "self_test".to_string(),
5479                signature: "fn self_test(vec: &mut Vec<i32>)".to_string(),
5480                body: vec![
5481                    "fn self_test(vec: &mut Vec<i32>) {".to_string(),
5482                    make_maybe_uninit_assume_init_line("    "),
5483                    "}".to_string(),
5484                ],
5485                span: None,
5486                ..Default::default()
5487            }],
5488        };
5489
5490        let analysis = RuleEngine::with_builtin_rules().run(&package);
5491        let has_maybe_uninit = analysis
5492            .findings
5493            .iter()
5494            .any(|finding| finding.rule_id == "RUSTCOLA009");
5495
5496        assert!(
5497            !has_maybe_uninit,
5498            "{}::{} rule should not flag mir-extractor crate",
5499            MAYBE_UNINIT_TYPE_SYMBOL, MAYBE_UNINIT_ASSUME_INIT_SYMBOL
5500        );
5501
5502        Ok(())
5503    }
5504
5505    #[test]
5506    fn mem_uninit_rule_detects_usage() -> Result<()> {
5507        let package = MirPackage {
5508            crate_name: "mem-uninit-detect".to_string(),
5509            crate_root: ".".to_string(),
5510            functions: vec![MirFunction {
5511                name: "allocate".to_string(),
5512                signature: "fn allocate()".to_string(),
5513                body: vec![
5514                    "fn allocate() {".to_string(),
5515                    make_mem_uninitialized_line("    "),
5516                    "}".to_string(),
5517                ],
5518                span: None,
5519                ..Default::default()
5520            }],
5521        };
5522
5523        let analysis = RuleEngine::with_builtin_rules().run(&package);
5524        let matches: Vec<_> = analysis
5525            .findings
5526            .iter()
5527            .filter(|finding| finding.rule_id == "RUSTCOLA010")
5528            .collect();
5529
5530        assert_eq!(matches.len(), 1, "expected RUSTCOLA010 to fire");
5531        assert!(matches[0]
5532            .evidence
5533            .iter()
5534            .any(|line| line.contains(MEM_UNINITIALIZED_SYMBOL)));
5535
5536        Ok(())
5537    }
5538
5539    #[test]
5540    fn mem_uninit_rule_skips_analyzer_crate() -> Result<()> {
5541        let package = MirPackage {
5542            crate_name: "mir-extractor".to_string(),
5543            crate_root: ".".to_string(),
5544            functions: vec![MirFunction {
5545                name: "self_test".to_string(),
5546                signature: "fn self_test()".to_string(),
5547                body: vec![
5548                    "fn self_test() {".to_string(),
5549                    make_mem_uninitialized_line("    "),
5550                    "}".to_string(),
5551                ],
5552                span: None,
5553                ..Default::default()
5554            }],
5555        };
5556
5557        let analysis = RuleEngine::with_builtin_rules().run(&package);
5558        let has_mem_uninit = analysis
5559            .findings
5560            .iter()
5561            .any(|finding| finding.rule_id == "RUSTCOLA010");
5562
5563        assert!(
5564            !has_mem_uninit,
5565            "{}::{} rule should not flag mir-extractor crate",
5566            MEM_MODULE_SYMBOL, MEM_UNINITIALIZED_SYMBOL
5567        );
5568
5569        Ok(())
5570    }
5571
5572    #[test]
5573    fn danger_accept_invalid_certs_rule_detects_usage() -> Result<()> {
5574        let package = MirPackage {
5575            crate_name: "danger-accept-invalid-certs-detect".to_string(),
5576            crate_root: ".".to_string(),
5577            functions: vec![MirFunction {
5578                name: "configure".to_string(),
5579                signature: "fn configure(builder: reqwest::ClientBuilder)".to_string(),
5580                body: vec![
5581                    "fn configure(builder: reqwest::ClientBuilder) {".to_string(),
5582                    make_danger_accept_invalid_certs_line("    "),
5583                    "}".to_string(),
5584                ],
5585                span: None,
5586                ..Default::default()
5587            }],
5588        };
5589
5590        let analysis = RuleEngine::with_builtin_rules().run(&package);
5591        let matches: Vec<_> = analysis
5592            .findings
5593            .iter()
5594            .filter(|finding| finding.rule_id == "RUSTCOLA012")
5595            .collect();
5596
5597        assert_eq!(matches.len(), 1, "expected RUSTCOLA012 to fire");
5598        assert!(matches[0]
5599            .evidence
5600            .iter()
5601            .any(|line| line.contains(DANGER_ACCEPT_INVALID_CERTS_SYMBOL)));
5602
5603        Ok(())
5604    }
5605
5606    #[test]
5607    fn danger_accept_invalid_certs_rule_detects_rustls_dangerous() -> Result<()> {
5608        let package = MirPackage {
5609            crate_name: "rustls-dangerous-client".to_string(),
5610            crate_root: ".".to_string(),
5611            functions: vec![MirFunction {
5612                name: "dangerous_client".to_string(),
5613                signature: "fn dangerous_client(config: &mut rustls::ClientConfig)".to_string(),
5614                body: vec![
5615                    "fn dangerous_client(config: &mut rustls::ClientConfig) {".to_string(),
5616                    "    _3 = rustls::client::dangerous::DangerousClientConfig::set_certificate_verifier(move _2, move _1);".to_string(),
5617                    "}".to_string(),
5618                ],
5619                span: None,
5620            ..Default::default()
5621            }],
5622        };
5623
5624        let analysis = RuleEngine::with_builtin_rules().run(&package);
5625        let matches: Vec<_> = analysis
5626            .findings
5627            .iter()
5628            .filter(|finding| finding.rule_id == "RUSTCOLA012")
5629            .collect();
5630
5631        assert_eq!(
5632            matches.len(),
5633            1,
5634            "expected RUSTCOLA012 to fire for rustls dangerous usage"
5635        );
5636        assert!(matches[0]
5637            .evidence
5638            .iter()
5639            .any(|line| line.contains("set_certificate_verifier")));
5640
5641        Ok(())
5642    }
5643
5644    #[test]
5645    fn danger_accept_invalid_certs_rule_skips_analyzer_crate() -> Result<()> {
5646        let package = MirPackage {
5647            crate_name: "mir-extractor".to_string(),
5648            crate_root: ".".to_string(),
5649            functions: vec![MirFunction {
5650                name: "self_test".to_string(),
5651                signature: "fn self_test(builder: reqwest::ClientBuilder)".to_string(),
5652                body: vec![
5653                    "fn self_test(builder: reqwest::ClientBuilder) {".to_string(),
5654                    make_danger_accept_invalid_certs_line("    "),
5655                    "}".to_string(),
5656                ],
5657                span: None,
5658                ..Default::default()
5659            }],
5660        };
5661
5662        let analysis = RuleEngine::with_builtin_rules().run(&package);
5663        let has_danger_tls = analysis
5664            .findings
5665            .iter()
5666            .any(|finding| finding.rule_id == "RUSTCOLA012");
5667
5668        assert!(
5669            !has_danger_tls,
5670            "{} rule should not flag mir-extractor crate",
5671            DANGER_ACCEPT_INVALID_CERTS_SYMBOL
5672        );
5673
5674        Ok(())
5675    }
5676
5677    #[test]
5678    fn length_truncation_cast_rule_detects_usage() -> Result<()> {
5679        let package = MirPackage {
5680            crate_name: "length-truncation-detect".to_string(),
5681            crate_root: ".".to_string(),
5682            functions: vec![MirFunction {
5683                name: "encode".to_string(),
5684                signature: "fn encode(len: usize, writer: &mut byteorder::io::Write)".to_string(),
5685                body: {
5686                    let mut body = Vec::with_capacity(6);
5687                    body.push(
5688                        "fn encode(len: usize, writer: &mut byteorder::io::Write) {".to_string(),
5689                    );
5690                    body.extend(make_length_truncation_cast_lines("    "));
5691                    body.push("}".to_string());
5692                    body
5693                },
5694                span: None,
5695                ..Default::default()
5696            }],
5697        };
5698
5699        let analysis = RuleEngine::with_builtin_rules().run(&package);
5700        let matches: Vec<_> = analysis
5701            .findings
5702            .iter()
5703            .filter(|finding| finding.rule_id == "RUSTCOLA022")
5704            .collect();
5705
5706        assert_eq!(matches.len(), 1, "expected RUSTCOLA022 to fire");
5707        let evidence = &matches[0].evidence;
5708        assert!(evidence
5709            .iter()
5710            .any(|line| line.contains(LENGTH_TRUNCATION_CAST_INTTOINT_SYMBOL)));
5711        assert!(evidence
5712            .iter()
5713            .any(|line| line.contains(LENGTH_TRUNCATION_CAST_WRITE_SYMBOL)));
5714
5715        Ok(())
5716    }
5717
5718    #[test]
5719    fn length_truncation_cast_rule_skips_analyzer_crate() -> Result<()> {
5720        let package = MirPackage {
5721            crate_name: "mir-extractor".to_string(),
5722            crate_root: ".".to_string(),
5723            functions: vec![MirFunction {
5724                name: "self_test".to_string(),
5725                signature: "fn self_test(len: usize)".to_string(),
5726                body: {
5727                    let mut body = Vec::with_capacity(6);
5728                    body.push("fn self_test(len: usize) {".to_string());
5729                    body.extend(make_length_truncation_cast_lines("    "));
5730                    body.push("}".to_string());
5731                    body
5732                },
5733                span: None,
5734                ..Default::default()
5735            }],
5736        };
5737
5738        let analysis = RuleEngine::with_builtin_rules().run(&package);
5739        let has_truncation = analysis
5740            .findings
5741            .iter()
5742            .any(|finding| finding.rule_id == "RUSTCOLA022");
5743
5744        assert!(
5745            !has_truncation,
5746            "{} rule should not flag mir-extractor crate",
5747            LENGTH_TRUNCATION_CAST_WRITE_SYMBOL
5748        );
5749
5750        Ok(())
5751    }
5752
5753    #[test]
5754    fn unbounded_allocation_rule_detects_usage() -> Result<()> {
5755        let package = MirPackage {
5756            crate_name: "unbounded-allocation-detect".to_string(),
5757            crate_root: ".".to_string(),
5758            functions: vec![MirFunction {
5759                name: "allocate".to_string(),
5760                signature: "fn allocate(len: usize)".to_string(),
5761                body: {
5762                    let mut body = Vec::with_capacity(5);
5763                    body.push("fn allocate(len: usize) {".to_string());
5764                    body.extend(make_unbounded_allocation_lines("    ", "len"));
5765                    body.push("}".to_string());
5766                    body
5767                },
5768                span: None,
5769                ..Default::default()
5770            }],
5771        };
5772
5773        let analysis = RuleEngine::with_builtin_rules().run(&package);
5774        let matches: Vec<_> = analysis
5775            .findings
5776            .iter()
5777            .filter(|finding| finding.rule_id == "RUSTCOLA024")
5778            .collect();
5779
5780        assert_eq!(matches.len(), 1, "expected RUSTCOLA024 to fire");
5781        let evidence = &matches[0].evidence;
5782        assert!(evidence
5783            .iter()
5784            .any(|line| line.contains(UNBOUNDED_ALLOCATION_WITH_CAPACITY_SYMBOL)));
5785
5786        Ok(())
5787    }
5788
5789    #[test]
5790    fn unbounded_allocation_rule_skips_analyzer_crate() -> Result<()> {
5791        let package = MirPackage {
5792            crate_name: "mir-extractor".to_string(),
5793            crate_root: ".".to_string(),
5794            functions: vec![MirFunction {
5795                name: "self_test".to_string(),
5796                signature: "fn self_test(len: usize)".to_string(),
5797                body: {
5798                    let mut body = Vec::with_capacity(5);
5799                    body.push("fn self_test(len: usize) {".to_string());
5800                    body.extend(make_unbounded_allocation_lines("    ", "len"));
5801                    body.push("}".to_string());
5802                    body
5803                },
5804                span: None,
5805                ..Default::default()
5806            }],
5807        };
5808
5809        let analysis = RuleEngine::with_builtin_rules().run(&package);
5810        let has_unbounded = analysis
5811            .findings
5812            .iter()
5813            .any(|finding| finding.rule_id == "RUSTCOLA024");
5814
5815        assert!(
5816            !has_unbounded,
5817            "{} rule should not flag mir-extractor crate",
5818            UNBOUNDED_ALLOCATION_WITH_CAPACITY_SYMBOL
5819        );
5820
5821        Ok(())
5822    }
5823
5824    #[test]
5825    fn transmute_rule_detects_usage() -> Result<()> {
5826        let package = MirPackage {
5827            crate_name: "transmute-detect".to_string(),
5828            crate_root: ".".to_string(),
5829            functions: vec![MirFunction {
5830                name: "reinterpret".to_string(),
5831                signature: "unsafe fn reinterpret(value: u32) -> i32".to_string(),
5832                body: vec![
5833                    "unsafe fn reinterpret(value: u32) -> i32 {".to_string(),
5834                    "    std::mem::transmute(value)".to_string(),
5835                    "}".to_string(),
5836                ],
5837                span: None,
5838                ..Default::default()
5839            }],
5840        };
5841
5842        let analysis = RuleEngine::with_builtin_rules().run(&package);
5843        let matches: Vec<_> = analysis
5844            .findings
5845            .iter()
5846            .filter(|finding| finding.rule_id == "RUSTCOLA002")
5847            .collect();
5848
5849        assert_eq!(matches.len(), 1, "expected single RUSTCOLA002 finding");
5850        assert!(matches[0]
5851            .evidence
5852            .iter()
5853            .any(|line| line.contains("std::mem::transmute")));
5854
5855        Ok(())
5856    }
5857
5858    #[test]
5859    fn transmute_rule_ignores_string_literals() -> Result<()> {
5860        let package = MirPackage {
5861            crate_name: "transmute-string".to_string(),
5862            crate_root: ".".to_string(),
5863            functions: vec![MirFunction {
5864                name: "describe".to_string(),
5865                signature: "pub fn describe() -> &'static str".to_string(),
5866                body: vec![
5867                    "pub fn describe() -> &'static str {".to_string(),
5868                    "    \"std::mem::transmute should not trigger\"".to_string(),
5869                    "}".to_string(),
5870                ],
5871                span: None,
5872                ..Default::default()
5873            }],
5874        };
5875
5876        let analysis = RuleEngine::with_builtin_rules().run(&package);
5877        let matches: Vec<_> = analysis
5878            .findings
5879            .iter()
5880            .filter(|finding| finding.rule_id == "RUSTCOLA002")
5881            .collect();
5882
5883        assert!(
5884            matches.is_empty(),
5885            "string literal should not trigger transmute rule"
5886        );
5887
5888        Ok(())
5889    }
5890
5891    #[test]
5892    fn allocator_mismatch_rule_detects_mixed_allocators() -> Result<()> {
5893        // Test using mock MIR that represents the allocator mismatch pattern
5894        // This simulates: CString::into_raw() followed by libc::free()
5895        // The free directly uses the variable from into_raw
5896        let package = MirPackage {
5897            crate_name: "allocator-mismatch".to_string(),
5898            crate_root: ".".to_string(),
5899            functions: vec![MirFunction {
5900                name: "bad_mix".to_string(),
5901                signature: "unsafe extern \"C\" fn bad_mix()".to_string(),
5902                body: vec![
5903                    "_1 = CString::new::<&str>(const \"hello\") -> [return: bb1, unwind: bb5];".to_string(),
5904                    "_2 = Result::<CString, NulError>::unwrap(move _1) -> [return: bb2, unwind: bb5];".to_string(),
5905                    "_3 = CString::into_raw(move _2) -> [return: bb3, unwind: bb5];".to_string(),
5906                    "_4 = free(move _3) -> [return: bb4, unwind: bb5];".to_string(),
5907                ],
5908                span: Some(SourceSpan {
5909                    file: "src/lib.rs".to_string(),
5910                    start_line: 4,
5911                    start_column: 1,
5912                    end_line: 8,
5913                    end_column: 1,
5914                }),
5915                ..Default::default()
5916            }],
5917        };
5918
5919        let analysis = RuleEngine::with_builtin_rules().run(&package);
5920        let findings: Vec<_> = analysis
5921            .findings
5922            .iter()
5923            .filter(|finding| finding.rule_id == "RUSTCOLA017")
5924            .collect();
5925
5926        assert_eq!(
5927            findings.len(),
5928            1,
5929            "expected single allocator mismatch finding"
5930        );
5931        let finding = findings[0];
5932        assert!(finding.function.contains("bad_mix"));
5933        assert!(finding
5934            .evidence
5935            .iter()
5936            .any(|line| line.contains("into_raw")));
5937        assert!(finding.evidence.iter().any(|line| line.contains("free")));
5938
5939        Ok(())
5940    }
5941
5942    #[test]
5943    fn rustsec_unsound_dependency_rule_flags_lockfile_matches() -> Result<()> {
5944        let temp = tempdir().expect("temp dir");
5945        let crate_root = temp.path();
5946
5947        fs::create_dir_all(crate_root.join("src"))?;
5948        fs::write(
5949            crate_root.join("Cargo.toml"),
5950            r#"[package]
5951name = "rustsec-unsound"
5952version = "0.1.0"
5953edition = "2021"
5954
5955[lib]
5956path = "src/lib.rs"
5957"#,
5958        )?;
5959        fs::write(crate_root.join("src/lib.rs"), "pub fn noop() {}")?;
5960        fs::write(
5961            crate_root.join("Cargo.lock"),
5962            r#"# This file is automatically @generated by Cargo.
5963[[package]]
5964name = "arrayvec"
5965version = "0.4.10"
5966source = "registry+https://github.com/rust-lang/crates.io-index"
5967
5968[[package]]
5969name = "rustsec-unsound"
5970version = "0.1.0"
5971"#,
5972        )?;
5973
5974        let package = MirPackage {
5975            crate_name: "rustsec-unsound".to_string(),
5976            crate_root: crate_root.to_string_lossy().to_string(),
5977            functions: Vec::new(),
5978        };
5979
5980        let analysis = RuleEngine::with_builtin_rules().run(&package);
5981        let findings: Vec<_> = analysis
5982            .findings
5983            .iter()
5984            .filter(|finding| finding.rule_id == "RUSTCOLA018")
5985            .collect();
5986
5987        assert_eq!(findings.len(), 1, "expected single RustSec unsound finding");
5988        let finding = findings[0];
5989        assert!(finding.function.ends_with("Cargo.lock"));
5990        assert!(finding.message.contains("arrayvec"));
5991        assert!(finding.message.contains("RUSTSEC-2018-0001"));
5992
5993        Ok(())
5994    }
5995
5996    #[test]
5997    fn yanked_crate_rule_flags_yanked_versions() -> Result<()> {
5998        let temp = tempdir().expect("temp dir");
5999        let crate_root = temp.path();
6000
6001        fs::create_dir_all(crate_root.join("src"))?;
6002        fs::write(
6003            crate_root.join("Cargo.toml"),
6004            r#"[package]
6005name = "yanked-dep"
6006version = "0.1.0"
6007edition = "2021"
6008
6009[lib]
6010path = "src/lib.rs"
6011"#,
6012        )?;
6013        fs::write(crate_root.join("src/lib.rs"), "pub fn noop() {}")?;
6014        fs::write(
6015            crate_root.join("Cargo.lock"),
6016            r#"# autogenerated
6017[[package]]
6018name = "memoffset"
6019version = "0.5.6"
6020source = "registry+https://github.com/rust-lang/crates.io-index"
6021
6022[[package]]
6023name = "yanked-dep"
6024version = "0.1.0"
6025"#,
6026        )?;
6027
6028        let package = MirPackage {
6029            crate_name: "yanked-dep".to_string(),
6030            crate_root: crate_root.to_string_lossy().to_string(),
6031            functions: Vec::new(),
6032        };
6033
6034        let analysis = RuleEngine::with_builtin_rules().run(&package);
6035        let findings: Vec<_> = analysis
6036            .findings
6037            .iter()
6038            .filter(|finding| finding.rule_id == "RUSTCOLA019")
6039            .collect();
6040
6041        assert_eq!(findings.len(), 1, "expected single yanked crate finding");
6042        let finding = findings[0];
6043        assert!(finding.message.contains("memoffset"));
6044        assert!(finding.message.contains("0.5.6"));
6045
6046        Ok(())
6047    }
6048
6049    #[test]
6050    fn cargo_auditable_rule_flags_missing_metadata() -> Result<()> {
6051        let temp = tempdir().expect("temp dir");
6052        let crate_root = temp.path();
6053
6054        fs::create_dir_all(crate_root.join("src"))?;
6055        fs::write(
6056            crate_root.join("Cargo.toml"),
6057            r#"[package]
6058name = "auditable-missing"
6059version = "0.1.0"
6060edition = "2021"
6061"#,
6062        )?;
6063        fs::write(
6064            crate_root.join("src/main.rs"),
6065            "fn main() { println!(\"hi\"); }",
6066        )?;
6067
6068        let package = MirPackage {
6069            crate_name: "auditable-missing".to_string(),
6070            crate_root: crate_root.to_string_lossy().to_string(),
6071            functions: Vec::new(),
6072        };
6073
6074        let analysis = RuleEngine::with_builtin_rules().run(&package);
6075        let findings: Vec<_> = analysis
6076            .findings
6077            .iter()
6078            .filter(|finding| finding.rule_id == "RUSTCOLA020")
6079            .collect();
6080
6081        assert_eq!(findings.len(), 1, "expected missing auditable finding");
6082        let finding = findings[0];
6083        assert!(finding.function.contains("Cargo.toml"));
6084        assert!(finding.message.contains("auditable"));
6085
6086        Ok(())
6087    }
6088
6089    #[test]
6090    fn cargo_auditable_rule_respects_skip_metadata() -> Result<()> {
6091        let temp = tempdir().expect("temp dir");
6092        let crate_root = temp.path();
6093
6094        fs::create_dir_all(crate_root.join("src"))?;
6095        fs::write(
6096            crate_root.join("Cargo.toml"),
6097            r#"[package]
6098name = "auditable-skip"
6099version = "0.1.0"
6100edition = "2021"
6101
6102[package.metadata.rust-cola]
6103skip_auditable_check = true
6104"#,
6105        )?;
6106        fs::write(
6107            crate_root.join("src/main.rs"),
6108            "fn main() { println!(\"hi\"); }",
6109        )?;
6110
6111        let package = MirPackage {
6112            crate_name: "auditable-skip".to_string(),
6113            crate_root: crate_root.to_string_lossy().to_string(),
6114            functions: Vec::new(),
6115        };
6116
6117        let analysis = RuleEngine::with_builtin_rules().run(&package);
6118        let finding_exists = analysis
6119            .findings
6120            .iter()
6121            .any(|finding| finding.rule_id == "RUSTCOLA020");
6122
6123        assert!(!finding_exists, "skip metadata should suppress findings");
6124
6125        Ok(())
6126    }
6127
6128    #[test]
6129    fn cargo_auditable_rule_allows_marker_dependency() -> Result<()> {
6130        let temp = tempdir().expect("temp dir");
6131        let crate_root = temp.path();
6132
6133        fs::create_dir_all(crate_root.join("src"))?;
6134        fs::write(
6135            crate_root.join("Cargo.toml"),
6136            r#"[package]
6137name = "auditable-marker"
6138version = "0.1.0"
6139edition = "2021"
6140
6141[dependencies]
6142auditable = "0.1"
6143"#,
6144        )?;
6145        fs::write(
6146            crate_root.join("src/main.rs"),
6147            "fn main() { println!(\"hi\"); }",
6148        )?;
6149
6150        let package = MirPackage {
6151            crate_name: "auditable-marker".to_string(),
6152            crate_root: crate_root.to_string_lossy().to_string(),
6153            functions: Vec::new(),
6154        };
6155
6156        let analysis = RuleEngine::with_builtin_rules().run(&package);
6157        let finding_exists = analysis
6158            .findings
6159            .iter()
6160            .any(|finding| finding.rule_id == "RUSTCOLA020");
6161
6162        assert!(
6163            !finding_exists,
6164            "auditable dependency marker should suppress findings"
6165        );
6166
6167        Ok(())
6168    }
6169
6170    #[test]
6171    fn cargo_auditable_rule_detects_workspace_ci_markers() -> Result<()> {
6172        let temp = tempdir().expect("temp dir");
6173        let workspace_root = temp.path();
6174
6175        fs::create_dir_all(workspace_root.join(".github/workflows"))?;
6176        fs::write(
6177            workspace_root.join(".github/workflows/ci.yml"),
6178            "run: cargo auditable build --release\n",
6179        )?;
6180
6181        let crate_root = workspace_root.join("workspace-bin");
6182        fs::create_dir_all(crate_root.join("src"))?;
6183        fs::write(
6184            crate_root.join("Cargo.toml"),
6185            r#"[package]
6186name = "workspace-bin"
6187version = "0.1.0"
6188edition = "2021"
6189"#,
6190        )?;
6191        fs::write(
6192            crate_root.join("src/main.rs"),
6193            "fn main() { println!(\"hi\"); }",
6194        )?;
6195
6196        let package = MirPackage {
6197            crate_name: "workspace-bin".to_string(),
6198            crate_root: crate_root.to_string_lossy().to_string(),
6199            functions: Vec::new(),
6200        };
6201
6202        let analysis = RuleEngine::with_builtin_rules().run(&package);
6203        let finding_exists = analysis
6204            .findings
6205            .iter()
6206            .any(|finding| finding.rule_id == "RUSTCOLA020");
6207
6208        assert!(
6209            !finding_exists,
6210            "workspace CI markers should suppress cargo auditable warning"
6211        );
6212
6213        Ok(())
6214    }
6215
6216    #[test]
6217    fn wasm_stub_registers_metadata() -> Result<()> {
6218        let temp = tempdir().expect("temp dir");
6219        let wasm_path = temp.path().join("rust_cola_stub.wasm");
6220        fs::write(&wasm_path, b"\0asmstub")?;
6221
6222        let mut engine = RuleEngine::new();
6223        engine.load_wasm_module(&wasm_path)?;
6224
6225        let metadata = engine.rule_metadata();
6226        assert!(metadata
6227            .iter()
6228            .any(|meta| matches!(meta.origin, RuleOrigin::Wasm { .. })));
6229
6230        let package = MirPackage {
6231            crate_name: "demo".to_string(),
6232            crate_root: ".".to_string(),
6233            functions: Vec::new(),
6234        };
6235
6236        let analysis = engine.run(&package);
6237        assert!(analysis.findings.is_empty());
6238
6239        Ok(())
6240    }
6241
6242    #[test]
6243    fn cache_hit_skips_extractor() -> Result<()> {
6244        let temp = tempdir().expect("temp dir");
6245        let crate_root = temp.path();
6246
6247        fs::create_dir_all(crate_root.join("src"))?;
6248        fs::write(
6249            crate_root.join("Cargo.toml"),
6250            r#"[package]
6251name = "cache-demo"
6252version = "0.1.0"
6253edition = "2021"
6254
6255[lib]
6256path = "src/lib.rs"
6257"#,
6258        )?;
6259        fs::write(
6260            crate_root.join("src/lib.rs"),
6261            "pub fn cached() -> i32 { 42 }",
6262        )?;
6263
6264        let cache_temp = tempdir().expect("cache dir");
6265        let cache_config = CacheConfig {
6266            enabled: true,
6267            directory: cache_temp.path().to_path_buf(),
6268            clear: false,
6269        };
6270
6271        let counter = Arc::new(AtomicUsize::new(0));
6272        let base_package = MirPackage {
6273            crate_name: "cache-demo".to_string(),
6274            crate_root: crate_root.to_string_lossy().to_string(),
6275            functions: vec![MirFunction {
6276                name: "cached".to_string(),
6277                signature: "fn cached() -> i32".to_string(),
6278                body: vec!["_0 = const 42_i32;".to_string()],
6279                span: None,
6280                ..Default::default()
6281            }],
6282        };
6283
6284        let counter_clone = counter.clone();
6285        #[cfg(feature = "hir-driver")]
6286        let hir_options = HirOptions::default();
6287
6288        let (first_artifacts, status1) = super::extract_artifacts_with_cache(
6289            crate_root,
6290            &cache_config,
6291            #[cfg(feature = "hir-driver")]
6292            &hir_options,
6293            move || {
6294                counter_clone.fetch_add(1, Ordering::SeqCst);
6295                Ok(ExtractionArtifacts {
6296                    mir: base_package.clone(),
6297                    #[cfg(feature = "hir-driver")]
6298                    hir: None,
6299                })
6300            },
6301        )?;
6302
6303        let first_package = first_artifacts.mir.clone();
6304
6305        assert_eq!(counter.load(Ordering::SeqCst), 1);
6306        match status1 {
6307            CacheStatus::Miss { .. } => {}
6308            _ => panic!("expected first run to miss cache"),
6309        }
6310
6311        let (second_artifacts, status2) = super::extract_artifacts_with_cache(
6312            crate_root,
6313            &cache_config,
6314            #[cfg(feature = "hir-driver")]
6315            &hir_options,
6316            || {
6317                panic!("extractor invoked during cache hit");
6318            },
6319        )?;
6320
6321        let second_package = second_artifacts.mir;
6322
6323        match status2 {
6324            CacheStatus::Hit(meta) => {
6325                assert_eq!(
6326                    meta.function_fingerprints.len(),
6327                    second_package.functions.len()
6328                );
6329            }
6330            _ => panic!("expected second run to hit cache"),
6331        }
6332
6333        assert_eq!(counter.load(Ordering::SeqCst), 1);
6334        assert_eq!(
6335            first_package.functions.len(),
6336            second_package.functions.len()
6337        );
6338
6339        Ok(())
6340    }
6341
6342    #[test]
6343    fn vec_set_len_rule_skips_analyzer_crate() -> Result<()> {
6344        let package = MirPackage {
6345            crate_name: "mir-extractor".to_string(),
6346            crate_root: ".".to_string(),
6347            functions: vec![MirFunction {
6348                name: "self_test".to_string(),
6349                signature: "fn self_test(vec: &mut Vec<i32>)".to_string(),
6350                body: vec![
6351                    "fn self_test(vec: &mut Vec<i32>) {".to_string(),
6352                    make_vec_set_len_line("    "),
6353                    "}".to_string(),
6354                ],
6355                span: None,
6356                ..Default::default()
6357            }],
6358        };
6359
6360        let analysis = RuleEngine::with_builtin_rules().run(&package);
6361        let has_vec_set_len = analysis
6362            .findings
6363            .iter()
6364            .any(|finding| finding.rule_id == "RUSTCOLA008");
6365
6366        assert!(
6367            !has_vec_set_len,
6368            "Vec::set_len rule should not flag mir-extractor crate"
6369        );
6370
6371        Ok(())
6372    }
6373}