Skip to main content

mir_extractor/dataflow/
path_sensitive.rs

1#![allow(unused_variables, dead_code, unused_imports)]
2
3//! Path-sensitive taint analysis
4//!
5//! This module analyzes taint flow separately for each execution path through a function's CFG.
6//! This enables detecting vulnerabilities where only some branches lack sanitization.
7
8use super::cfg::{BasicBlock, ControlFlowGraph, Terminator};
9use super::closure::ClosureInfo;
10use super::field::{FieldPath, FieldTaint, FieldTaintMap};
11use super::{DataflowSummary, TaintPropagation};
12use crate::MirFunction;
13use std::collections::HashMap;
14
15/// Taint state for a variable
16#[derive(Debug, Clone, PartialEq)]
17pub enum TaintState {
18    /// Variable is clean (not tainted)
19    Clean,
20
21    /// Variable is tainted from a source
22    Tainted {
23        source_type: String,     // e.g., "environment", "network", "file"
24        source_location: String, // e.g., "env::args", "TcpStream::read"
25    },
26
27    /// Variable was tainted but has been sanitized
28    Sanitized {
29        sanitizer: String, // e.g., "validate_input", "parse::<i32>"
30    },
31}
32
33/// Result of analyzing a single path
34#[derive(Debug, Clone)]
35pub struct PathAnalysisResult {
36    /// The execution path (sequence of block IDs)
37    pub path: Vec<String>,
38
39    /// Whether this path reaches a sink with tainted data
40    pub has_vulnerable_sink: bool,
41
42    /// Sink calls found on this path
43    pub sink_calls: Vec<SinkCall>,
44
45    /// Source calls found on this path
46    pub source_calls: Vec<SourceCall>,
47
48    /// Sanitization calls found on this path
49    pub sanitizer_calls: Vec<SanitizerCall>,
50
51    /// Whether the return value (_0) is tainted at the end of the path
52    pub return_tainted: bool,
53
54    /// Final taint state of variables at the end of the path
55    pub final_taint: HashMap<String, TaintState>,
56}
57
58#[derive(Debug, Clone)]
59pub struct SinkCall {
60    pub block_id: String,
61    pub statement: String,
62    pub sink_function: String,
63    pub tainted_args: Vec<String>,
64}
65
66#[derive(Debug, Clone)]
67pub struct SourceCall {
68    pub block_id: String,
69    pub statement: String,
70    pub source_function: String,
71    pub result_var: String,
72}
73
74#[derive(Debug, Clone)]
75pub struct SanitizerCall {
76    pub block_id: String,
77    pub statement: String,
78    pub sanitizer_function: String,
79    pub sanitized_var: String,
80}
81
82/// Path-sensitive taint analysis
83pub struct PathSensitiveTaintAnalysis {
84    cfg: ControlFlowGraph,
85    /// Taint state: (block_id, variable) -> TaintState
86    taint_map: HashMap<(String, String), TaintState>,
87}
88
89impl PathSensitiveTaintAnalysis {
90    /// Create a new path-sensitive analysis for the given CFG
91    pub fn new(cfg: ControlFlowGraph) -> Self {
92        Self {
93            cfg,
94            taint_map: HashMap::new(),
95        }
96    }
97
98    /// Analyze all paths through the function
99    pub fn analyze(
100        &mut self,
101        function: &MirFunction,
102        callee_summaries: Option<&HashMap<String, DataflowSummary>>,
103    ) -> PathSensitiveResult {
104        self.analyze_with_initial_taint(function, HashMap::new(), callee_summaries)
105    }
106
107    /// Analyze all paths through a closure function with captured variable taint
108    pub fn analyze_closure(
109        &mut self,
110        function: &MirFunction,
111        closure_info: &ClosureInfo,
112        callee_summaries: Option<&HashMap<String, DataflowSummary>>,
113    ) -> PathSensitiveResult {
114        let initial_taint = self.build_initial_taint_from_captures(closure_info);
115        self.analyze_with_initial_taint(function, initial_taint, callee_summaries)
116    }
117
118    /// Build initial taint state from captured variables
119    fn build_initial_taint_from_captures(
120        &self,
121        closure_info: &ClosureInfo,
122    ) -> HashMap<String, TaintState> {
123        let mut taint = HashMap::new();
124
125        // For each captured variable, if it's tainted, add it to initial taint
126        // The captured variable is accessed via ((*_1).N) where N is the field index
127        for capture in &closure_info.captured_vars {
128            if let super::closure::TaintState::Tainted { source_type, .. } = &capture.taint_state {
129                // Closure environment is always _1 in the closure body
130                // Field access is ((*_1).N) where N is the field index
131                let env_var = format!("((*_1).{})", capture.field_index);
132                taint.insert(
133                    env_var,
134                    TaintState::Tainted {
135                        source_type: source_type.clone(),
136                        source_location: format!("captured from {}", closure_info.parent_function),
137                    },
138                );
139            }
140        }
141
142        taint
143    }
144
145    /// Analyze all paths with given initial taint state
146    pub fn analyze_with_initial_taint(
147        &mut self,
148        function: &MirFunction,
149        initial_taint: HashMap<String, TaintState>,
150        callee_summaries: Option<&HashMap<String, DataflowSummary>>,
151    ) -> PathSensitiveResult {
152        // println!("[DEBUG] Processing function: {}", function.name);
153        let (paths, skipped_due_to_complexity) = self.cfg.get_all_paths();
154
155        // If CFG was too complex for path enumeration, log and return empty result
156        // (e.g., giant async closure with 2000+ basic blocks)
157        if skipped_due_to_complexity {
158            if std::env::var("RUSTCOLA_MEMORY_PROFILE").is_ok() {
159                eprintln!(
160                    "[MEMORY] Skipping path-sensitive analysis for complex CFG: {} blocks, {} branches",
161                    self.cfg.block_count(),
162                    self.cfg.branch_count()
163                );
164            }
165            return PathSensitiveResult {
166                path_results: Vec::new(),
167                has_any_vulnerable_path: false,
168                total_paths: 0,
169            };
170        }
171
172        // Memory optimization: Only keep paths that have findings (vulnerable sinks,
173        // return tainted, or sanitizers). Most paths are clean and don't need to be stored.
174        let mut path_results = Vec::new();
175        let mut has_any_vulnerable_path = false;
176        let total_paths = paths.len();
177
178        for path in paths {
179            // Use field-sensitive analysis by default
180            let result = self.analyze_path_field_sensitive(
181                &path,
182                function,
183                &initial_taint,
184                callee_summaries,
185            );
186
187            if result.has_vulnerable_sink {
188                has_any_vulnerable_path = true;
189            }
190
191            // Only store paths with actual findings to reduce memory
192            if result.has_vulnerable_sink
193                || result.return_tainted
194                || !result.sanitizer_calls.is_empty()
195            {
196                path_results.push(result);
197            }
198        }
199
200        PathSensitiveResult {
201            path_results,
202            has_any_vulnerable_path,
203            total_paths,
204        }
205    }
206
207    /// Analyze a single execution path (field-sensitive version)
208    fn analyze_path_field_sensitive(
209        &mut self,
210        path: &[String],
211        _function: &MirFunction,
212        initial_taint: &HashMap<String, TaintState>,
213        callee_summaries: Option<&HashMap<String, DataflowSummary>>,
214    ) -> PathAnalysisResult {
215        // Initialize field-sensitive taint state
216        let mut field_map = FieldTaintMap::new();
217
218        // Convert initial taint to field map
219        for (var, taint_state) in initial_taint {
220            Self::set_field_taint_state(&mut field_map, var, taint_state);
221        }
222
223        let mut sink_calls = Vec::new();
224        let mut source_calls = Vec::new();
225        let mut sanitizer_calls = Vec::new();
226        let mut alias_map = HashMap::new();
227
228        // Process each block in the path
229        for block_id in path {
230            if let Some(block) = self.cfg.get_block(block_id) {
231                self.process_block_field_sensitive(
232                    block,
233                    &mut field_map,
234                    &mut sink_calls,
235                    &mut source_calls,
236                    &mut sanitizer_calls,
237                    callee_summaries,
238                    &mut alias_map,
239                );
240            }
241        }
242
243        // Determine if this path is vulnerable
244        let has_vulnerable_sink = !sink_calls.is_empty();
245
246        // Check if return value is tainted
247        let return_tainted = matches!(
248            field_map.get_field_taint(&super::field::FieldPath::whole_var("_0".to_string())),
249            super::field::FieldTaint::Tainted { .. }
250        );
251
252        // Extract final taint for parameters
253        let mut final_taint = HashMap::new();
254        for i in 1..=10 {
255            // Check first 10 params
256            let var = format!("_{}", i);
257            let taint = field_map.get_field_taint(&super::field::FieldPath::whole_var(var.clone()));
258            if let super::field::FieldTaint::Tainted {
259                source_type,
260                source_location,
261            } = taint
262            {
263                final_taint.insert(
264                    var,
265                    TaintState::Tainted {
266                        source_type,
267                        source_location,
268                    },
269                );
270            }
271        }
272
273        PathAnalysisResult {
274            path: path.to_vec(),
275            has_vulnerable_sink,
276            sink_calls,
277            source_calls,
278            sanitizer_calls,
279            return_tainted,
280            final_taint,
281        }
282    }
283
284    /// Analyze a single execution path
285    fn analyze_path(
286        &mut self,
287        path: &[String],
288        _function: &MirFunction,
289        initial_taint: &HashMap<String, TaintState>,
290    ) -> PathAnalysisResult {
291        // Initialize taint state for this path with captured variables (if closure)
292        let mut current_taint: HashMap<String, TaintState> = initial_taint.clone();
293
294        // Track taint sources from function parameters
295        // For now, assume param _1 is potentially tainted from env::args
296        // (This is simplified - real implementation would parse function signature)
297
298        let mut sink_calls = Vec::new();
299        let mut source_calls = Vec::new();
300        let mut sanitizer_calls = Vec::new();
301
302        // Process each block in the path
303        for block_id in path {
304            if let Some(block) = self.cfg.get_block(block_id) {
305                self.process_block(
306                    block,
307                    &mut current_taint,
308                    &mut sink_calls,
309                    &mut source_calls,
310                    &mut sanitizer_calls,
311                );
312            }
313        }
314
315        // Determine if this path is vulnerable
316        let has_vulnerable_sink = !sink_calls.is_empty();
317
318        // Check if return value is tainted
319        let return_tainted = matches!(current_taint.get("_0"), Some(TaintState::Tainted { .. }));
320
321        PathAnalysisResult {
322            path: path.to_vec(),
323            has_vulnerable_sink,
324            sink_calls,
325            source_calls,
326            sanitizer_calls,
327            return_tainted,
328            final_taint: current_taint,
329        }
330    }
331
332    /// Process a single basic block (field-sensitive version)
333    fn process_block_field_sensitive(
334        &self,
335        block: &BasicBlock,
336        field_map: &mut FieldTaintMap,
337        sink_calls: &mut Vec<SinkCall>,
338        source_calls: &mut Vec<SourceCall>,
339        sanitizer_calls: &mut Vec<SanitizerCall>,
340        callee_summaries: Option<&HashMap<String, DataflowSummary>>,
341        alias_map: &mut HashMap<String, String>,
342    ) {
343        // Process statements in the block
344        for statement in &block.statements {
345            self.process_statement_field_sensitive(
346                &block.id,
347                statement,
348                field_map,
349                sink_calls,
350                source_calls,
351                sanitizer_calls,
352                callee_summaries,
353                alias_map,
354            );
355        }
356
357        // Process terminator (for function calls)
358        self.process_terminator_field_sensitive(
359            &block.id,
360            &block.statements,
361            &block.terminator,
362            field_map,
363            sink_calls,
364            source_calls,
365            sanitizer_calls,
366            callee_summaries,
367            alias_map,
368        );
369    }
370
371    /// Process a single basic block
372    fn process_block(
373        &self,
374        block: &BasicBlock,
375        current_taint: &mut HashMap<String, TaintState>,
376        sink_calls: &mut Vec<SinkCall>,
377        source_calls: &mut Vec<SourceCall>,
378        sanitizer_calls: &mut Vec<SanitizerCall>,
379    ) {
380        // Process statements in the block
381        for statement in &block.statements {
382            self.process_statement(
383                &block.id,
384                statement,
385                current_taint,
386                sink_calls,
387                source_calls,
388                sanitizer_calls,
389            );
390        }
391
392        // Process terminator (for function calls)
393        self.process_terminator(
394            &block.id,
395            &block.terminator,
396            current_taint,
397            sink_calls,
398            source_calls,
399            sanitizer_calls,
400        );
401    }
402
403    /// Process a statement (assignment, etc.) with field-sensitive analysis
404    fn process_statement_field_sensitive(
405        &self,
406        block_id: &str,
407        statement: &str,
408        field_map: &mut FieldTaintMap,
409        sink_calls: &mut Vec<SinkCall>,
410        source_calls: &mut Vec<SourceCall>,
411        sanitizer_calls: &mut Vec<SanitizerCall>,
412        callee_summaries: Option<&HashMap<String, DataflowSummary>>,
413        alias_map: &mut HashMap<String, String>,
414    ) {
415        use super::field::parser;
416
417        // Handle alias definition: _N = deref_copy (_M.0: ...)
418        if let Some((lhs, rhs)) = Self::parse_assignment(statement) {
419            if rhs.starts_with("deref_copy ") {
420                let source = rhs[11..].trim();
421                // Check if source is a field access like (_1.0: ...)
422                if let Some(field_path) = parser::parse_field_access(source) {
423                    // If it's a field of _1 (the generator), record alias
424                    if field_path.base_var == "_1" {
425                        // lhs is _N
426                        if let Some(lhs_var) = parser::extract_base_var(&lhs) {
427                            alias_map.insert(lhs_var, field_path.to_string());
428                        }
429                    }
430                }
431            }
432        }
433
434        // Apply aliases to statement
435        let mut statement_str = statement.to_string();
436        for (alias, target) in alias_map.iter() {
437            let alias_pattern = alias.as_str();
438            let target_pattern = target.as_str();
439
440            let mut temp_stmt = String::new();
441            let mut pos = 0;
442
443            while let Some(idx) = statement_str[pos..].find(alias_pattern) {
444                let start = pos + idx;
445                let end = start + alias_pattern.len();
446
447                // Check if whole word (followed by non-digit)
448                let is_whole_word = if end < statement_str.len() {
449                    !statement_str.as_bytes()[end].is_ascii_digit()
450                } else {
451                    true
452                };
453
454                temp_stmt.push_str(&statement_str[pos..start]);
455
456                if is_whole_word {
457                    temp_stmt.push_str(target_pattern);
458                } else {
459                    temp_stmt.push_str(alias_pattern);
460                }
461
462                pos = end;
463            }
464            temp_stmt.push_str(&statement_str[pos..]);
465            statement_str = temp_stmt;
466        }
467
468        let statement = statement_str.as_str();
469
470        // Check for sink calls (e.g., "_11 = execute_command(copy _12) -> [...]")
471        if statement.contains("execute_command")
472            || statement.contains("Command::new")
473            || statement.contains("Command::spawn")
474            || statement.contains("Command::arg")
475            || statement.contains("exec")
476        {
477            // This is a sink call - extract the argument
478            if let Some(paren_start) = statement.find('(') {
479                if let Some(paren_end) = statement.find(')') {
480                    let args_str = &statement[paren_start + 1..paren_end];
481
482                    // Extract all arguments (can be multiple, comma-separated)
483                    let mut tainted_args = Vec::new();
484                    for arg in args_str.split(',') {
485                        let arg_trimmed = arg.trim();
486                        // Check if the argument is tainted (field-sensitive)
487                        if Self::is_field_tainted(field_map, arg_trimmed) {
488                            if let Some(arg_var) = parser::extract_base_var(arg_trimmed) {
489                                tainted_args.push(arg_var);
490                            }
491                        }
492                    }
493
494                    // If any argument is tainted, this is a vulnerable sink
495                    if !tainted_args.is_empty() {
496                        let sink_name = if statement.contains("Command::spawn") {
497                            "Command::spawn"
498                        } else if statement.contains("Command::arg") {
499                            "Command::arg"
500                        } else if statement.contains("Command::new") {
501                            "Command::new"
502                        } else {
503                            "execute_command"
504                        };
505
506                        sink_calls.push(SinkCall {
507                            block_id: block_id.to_string(),
508                            statement: statement.to_string(),
509                            sink_function: sink_name.to_string(),
510                            tainted_args,
511                        });
512                    }
513                }
514            }
515        }
516
517        // Parse assignments: _1 = move _2; or (_1.0: Type) = move _2;
518        if let Some((lhs, rhs_raw)) = Self::parse_assignment(statement) {
519            // Strip terminator info (-> [return: ...])
520            let rhs = if let Some(idx) = rhs_raw.find(" -> [") {
521                rhs_raw[..idx].trim().to_string()
522            } else {
523                rhs_raw
524            };
525
526            // Check if LHS is a field access
527            let is_field_write = parser::contains_field_access(&lhs);
528
529            // Check for environment field access (closure captured variables)
530            // Pattern: _7 = deref_copy ((*_1).0: &std::string::String)
531            if let Some(env_field) = Self::extract_env_field_access(&rhs) {
532                // This is accessing a captured variable in a closure
533                let taint_state = Self::get_field_taint_state(field_map, &env_field);
534                Self::set_field_taint_state(field_map, &lhs, &taint_state);
535            }
536            // Propagate taint from RHS to LHS (field-sensitive)
537            else if parser::contains_field_access(&rhs) || Self::extract_variable(&rhs).is_some()
538            {
539                // Get taint from RHS (could be field or variable)
540                let rhs_taint = if parser::contains_field_access(&rhs) {
541                    Self::get_field_taint_state(field_map, &rhs)
542                } else if let Some(var) = Self::extract_variable(&rhs) {
543                    let t = Self::get_field_taint_state(field_map, &var);
544                    t
545                } else {
546                    Self::get_field_taint_state(field_map, &rhs)
547                };
548
549                // Set taint on LHS
550                if is_field_write {
551                    // Writing to a specific field - only that field becomes tainted
552                    Self::set_field_taint_state(field_map, &lhs, &rhs_taint);
553                } else {
554                    // Writing to entire variable - propagate to all fields
555                    Self::set_field_taint_state(field_map, &lhs, &rhs_taint);
556                }
557            }
558
559            // Check for source patterns
560            if Self::is_source_call(&rhs) {
561                let taint = TaintState::Tainted {
562                    source_type: "environment".to_string(),
563                    source_location: rhs.clone(),
564                };
565                Self::set_field_taint_state(field_map, &lhs, &taint);
566
567                source_calls.push(SourceCall {
568                    block_id: block_id.to_string(),
569                    statement: statement.to_string(),
570                    source_function: rhs.clone(),
571                    result_var: lhs.clone(),
572                });
573            }
574
575            // Check for sanitizer patterns
576            if Self::is_sanitizer_call(&rhs) {
577                if let Some(input_var) = Self::extract_variable(&rhs) {
578                    let taint = TaintState::Sanitized {
579                        sanitizer: rhs.clone(),
580                    };
581                    Self::set_field_taint_state(field_map, &lhs, &taint);
582
583                    sanitizer_calls.push(SanitizerCall {
584                        block_id: block_id.to_string(),
585                        statement: statement.to_string(),
586                        sanitizer_function: rhs.clone(),
587                        sanitized_var: input_var,
588                    });
589                }
590            }
591            // Check for closure/coroutine creation
592            else if rhs.starts_with("{closure@") || rhs.starts_with("{coroutine@") {
593                if let Some(summaries) = callee_summaries {
594                    // Extract index (#N)
595                    if let Some(hash_pos) = rhs.find("(#") {
596                        if let Some(close_paren) = rhs[hash_pos..].find(')') {
597                            let index_str = &rhs[hash_pos + 2..hash_pos + close_paren];
598                            if let Ok(index) = index_str.parse::<usize>() {
599                                // Look for a summary ending with ::{closure#N}
600                                let suffix = format!("::{{closure#{}}}", index);
601                                for (name, summary) in summaries {
602                                    if name.ends_with(&suffix) {
603                                        if summary.returns_tainted {
604                                            // println!("[DEBUG] Closure/Coroutine {} returns tainted data, propagating to {}", name, lhs);
605                                            let taint = TaintState::Tainted {
606                                                source_type: "propagated".to_string(),
607                                                source_location: format!("via {}", name),
608                                            };
609                                            Self::set_field_taint_state(field_map, &lhs, &taint);
610                                        } else {
611                                            // println!("[DEBUG] Closure/Coroutine {} found but returns CLEAN", name);
612                                        }
613
614                                        // Check for ParamToSink (closure environment flows to sink)
615                                        let mut has_sink_flow = false;
616                                        for prop in &summary.propagation {
617                                            if let TaintPropagation::ParamToSink {
618                                                param,
619                                                sink_type: _,
620                                            } = prop
621                                            {
622                                                // Param 0 is the closure environment
623                                                if *param == 0 {
624                                                    has_sink_flow = true;
625                                                }
626                                            }
627                                        }
628
629                                        if has_sink_flow {
630                                            // If closure reads from environment and sinks it, we need to check captured vars
631                                            // This is handled in analyze_closure, but here we can flag the closure object
632                                            // println!("[DEBUG] Closure {} has ParamToSink flow", name);
633                                        }
634                                    }
635                                }
636                            }
637                        }
638                    }
639                }
640            }
641            // Check for general function calls that return tainted data
642            else if rhs.ends_with(')') {
643                // Find matching open parenthesis for the function call
644                // We scan backwards to find the parenthesis that balances the last ')'
645                let mut balance = 0;
646                let mut open_paren_pos = None;
647                for (i, c) in rhs.char_indices().rev() {
648                    if c == ')' {
649                        balance += 1;
650                    } else if c == '(' {
651                        balance -= 1;
652                        if balance == 0 {
653                            open_paren_pos = Some(i);
654                            break;
655                        }
656                    }
657                }
658
659                if let Some(paren_pos) = open_paren_pos {
660                    let func_name_full = rhs[..paren_pos].trim();
661                    // Extract short name for heuristic checks
662                    let func_name_short = if let Some(idx) = func_name_full.rfind("::") {
663                        &func_name_full[idx + 2..]
664                    } else {
665                        func_name_full
666                    };
667
668                    // Parse arguments
669                    let mut args = Vec::new();
670                    if let Some(close_paren) = rhs.rfind(')') {
671                        let args_str = &rhs[paren_pos + 1..close_paren];
672                        for arg in args_str.split(',') {
673                            let arg = arg.trim();
674                            if !arg.is_empty() {
675                                args.push(arg);
676                            }
677                        }
678                    }
679
680                    let mut propagated_taint = None;
681
682                    // Check summaries
683                    if let Some(summaries) = callee_summaries {
684                        for (name, summary) in summaries {
685                            // Check if func_name matches summary name
686                            let match_found = name == func_name_full
687                                || name.ends_with(&format!("::{}", func_name_full))
688                                || func_name_full.ends_with(&format!("::{}", name));
689
690                            if match_found {
691                                // Check explicit return taint
692                                if summary.returns_tainted {
693                                    // println!("[DEBUG] Function call {} returns tainted data, propagating to {}", func_name_full, lhs);
694                                    propagated_taint = Some(TaintState::Tainted {
695                                        source_type: "propagated".to_string(),
696                                        source_location: format!("via {}", func_name_full),
697                                    });
698                                }
699
700                                // Check propagation from params
701                                for prop in &summary.propagation {
702                                    if let TaintPropagation::ParamToReturn(param_idx) = prop {
703                                        if let Some(arg_str) = args.get(*param_idx) {
704                                            // Check if argument is tainted
705                                            let is_tainted =
706                                                if parser::contains_field_access(arg_str) {
707                                                    matches!(
708                                                        Self::get_field_taint_state(
709                                                            field_map, arg_str
710                                                        ),
711                                                        TaintState::Tainted { .. }
712                                                    )
713                                                } else if let Some(arg_var) =
714                                                    Self::extract_variable(arg_str)
715                                                {
716                                                    matches!(
717                                                        Self::get_field_taint_state(
718                                                            field_map, &arg_var
719                                                        ),
720                                                        TaintState::Tainted { .. }
721                                                    )
722                                                } else {
723                                                    false
724                                                };
725
726                                            if is_tainted {
727                                                // println!("[DEBUG] Function call {} propagates taint from arg {} to return {}", func_name_full, param_idx, lhs);
728                                                propagated_taint = Some(TaintState::Tainted {
729                                                    source_type: "propagated".to_string(),
730                                                    source_location: format!(
731                                                        "via {}",
732                                                        func_name_full
733                                                    ),
734                                                });
735                                            }
736                                        }
737                                    } else if let TaintPropagation::ParamToParam { from, to } = prop
738                                    {
739                                        // Check if source argument is tainted
740                                        if let Some(src_arg_str) = args.get(*from) {
741                                            let is_tainted =
742                                                if parser::contains_field_access(src_arg_str) {
743                                                    matches!(
744                                                        Self::get_field_taint_state(
745                                                            field_map,
746                                                            src_arg_str
747                                                        ),
748                                                        TaintState::Tainted { .. }
749                                                    )
750                                                } else if let Some(arg_var) =
751                                                    Self::extract_variable(src_arg_str)
752                                                {
753                                                    matches!(
754                                                        Self::get_field_taint_state(
755                                                            field_map, &arg_var
756                                                        ),
757                                                        TaintState::Tainted { .. }
758                                                    )
759                                                } else {
760                                                    false
761                                                };
762
763                                            if is_tainted {
764                                                // Propagate to destination argument
765                                                if let Some(dest_arg_str) = args.get(*to) {
766                                                    // Destination might be "move _1" or "&mut _1"
767                                                    if let Some(dest_var) =
768                                                        Self::extract_variable(dest_arg_str)
769                                                    {
770                                                        // println!("[DEBUG] Function call {} propagates taint from arg {} to arg {}", func_name_full, from, to);
771                                                        let taint = TaintState::Tainted {
772                                                            source_type: "propagated".to_string(),
773                                                            source_location: format!(
774                                                                "via {} (arg {} -> arg {})",
775                                                                func_name_full, from, to
776                                                            ),
777                                                        };
778                                                        Self::set_field_taint_state(
779                                                            field_map, &dest_var, &taint,
780                                                        );
781                                                    }
782                                                }
783                                            }
784                                        }
785                                    }
786                                }
787                            }
788                        }
789                    }
790
791                    // Heuristic: Propagate taint for known methods if no summary exists
792                    if propagated_taint.is_none() {
793                        let heuristic_methods = [
794                            "into_future",
795                            "poll",
796                            "new",
797                            "new_unchecked",
798                            "from",
799                            "deref",
800                            "as_ref",
801                            "clone",
802                        ];
803
804                        if heuristic_methods.iter().any(|m| func_name_short == *m) {
805                            // Propagate from first argument
806                            if let Some(first_arg) = args.first() {
807                                let is_tainted = if parser::contains_field_access(first_arg) {
808                                    matches!(
809                                        Self::get_field_taint_state(field_map, first_arg),
810                                        TaintState::Tainted { .. }
811                                    )
812                                } else if let Some(arg_var) = Self::extract_variable(first_arg) {
813                                    let t = Self::get_field_taint_state(field_map, &arg_var);
814                                    matches!(t, TaintState::Tainted { .. })
815                                } else {
816                                    false
817                                };
818
819                                if is_tainted {
820                                    // println!("[DEBUG] Heuristic: Function call {} propagates taint from arg to return {}", func_name_full, lhs);
821                                    propagated_taint = Some(TaintState::Tainted {
822                                        source_type: "propagated".to_string(),
823                                        source_location: format!("via {}", func_name_full),
824                                    });
825                                } else if func_name_short == "into_future" {
826                                    // println!("[DEBUG] Heuristic arg check failed for arg '{}' (var: {:?})", first_arg, Self::extract_variable(first_arg));
827                                }
828                            } else if func_name_short == "into_future" {
829                                // println!("[DEBUG] Heuristic mismatch: '{}' not in list", func_name_short);
830                            }
831                        }
832                    }
833
834                    // Heuristic: ParamToParam propagation (e.g. push_str)
835                    let param_to_param_methods =
836                        ["push_str", "push", "append", "extend", "insert_str"];
837
838                    if param_to_param_methods.iter().any(|m| func_name_short == *m) {
839                        // Propagate from arg 1 (source) to arg 0 (dest)
840                        // Check if we have at least 2 args
841                        if args.len() >= 2 {
842                            let dest_arg = args[0];
843                            let src_arg = args[1];
844
845                            // Check if src is tainted
846                            let is_src_tainted = if parser::contains_field_access(src_arg) {
847                                matches!(
848                                    Self::get_field_taint_state(field_map, src_arg),
849                                    TaintState::Tainted { .. }
850                                )
851                            } else if let Some(arg_var) = Self::extract_variable(src_arg) {
852                                matches!(
853                                    Self::get_field_taint_state(field_map, &arg_var),
854                                    TaintState::Tainted { .. }
855                                )
856                            } else {
857                                false
858                            };
859
860                            if is_src_tainted {
861                                // Propagate to dest
862                                if let Some(dest_var) = Self::extract_variable(dest_arg) {
863                                    // println!("[DEBUG] Heuristic: Function call {} propagates taint from arg 1 to arg 0 ({})", func_name_full, dest_var);
864                                    let taint = TaintState::Tainted {
865                                        source_type: "propagated".to_string(),
866                                        source_location: format!("via {}", func_name_full),
867                                    };
868                                    Self::set_field_taint_state(field_map, &dest_var, &taint);
869                                }
870                            }
871                        }
872                    }
873
874                    if let Some(taint) = propagated_taint {
875                        Self::set_field_taint_state(field_map, &lhs, &taint);
876                    }
877                }
878            }
879        }
880    }
881
882    /// Process a statement (assignment, etc.)
883    fn process_statement(
884        &self,
885        block_id: &str,
886        statement: &str,
887        current_taint: &mut HashMap<String, TaintState>,
888        sink_calls: &mut Vec<SinkCall>,
889        source_calls: &mut Vec<SourceCall>,
890        sanitizer_calls: &mut Vec<SanitizerCall>,
891    ) {
892        // Check for sink calls (e.g., "_11 = execute_command(copy _12) -> [...]")
893        if statement.contains("execute_command")
894            || statement.contains("Command::new")
895            || statement.contains("Command::spawn")
896            || statement.contains("Command::arg")
897            || statement.contains("exec")
898        {
899            // This is a sink call - extract the argument
900            if let Some(paren_start) = statement.find('(') {
901                if let Some(paren_end) = statement.find(')') {
902                    let args_str = &statement[paren_start + 1..paren_end];
903
904                    // Extract all arguments (can be multiple, comma-separated)
905                    let mut tainted_args = Vec::new();
906                    for arg in args_str.split(',') {
907                        if let Some(arg_var) = Self::extract_variable(arg.trim()) {
908                            // Check if the argument is tainted
909                            if matches!(
910                                current_taint.get(&arg_var),
911                                Some(TaintState::Tainted { .. })
912                            ) {
913                                tainted_args.push(arg_var);
914                            }
915                        }
916                    }
917
918                    // If any argument is tainted, this is a vulnerable sink
919                    if !tainted_args.is_empty() {
920                        let sink_name = if statement.contains("Command::spawn") {
921                            "Command::spawn"
922                        } else if statement.contains("Command::arg") {
923                            "Command::arg"
924                        } else if statement.contains("Command::new") {
925                            "Command::new"
926                        } else {
927                            "execute_command"
928                        };
929
930                        sink_calls.push(SinkCall {
931                            block_id: block_id.to_string(),
932                            statement: statement.to_string(),
933                            sink_function: sink_name.to_string(),
934                            tainted_args,
935                        });
936                    }
937                }
938            }
939        }
940
941        // Parse assignments: _1 = move _2; or _3 = &_1;
942        if let Some((lhs, rhs)) = Self::parse_assignment(statement) {
943            // Check for environment field access (closure captured variables)
944            // Pattern: _7 = deref_copy ((*_1).0: &std::string::String)
945            if let Some(env_field) = Self::extract_env_field_access(&rhs) {
946                // This is accessing a captured variable in a closure
947                // The env_field will be something like "((*_1).0)"
948                if let Some(taint) = current_taint.get(&env_field) {
949                    // Propagate taint from captured variable to the local variable
950                    current_taint.insert(lhs.clone(), taint.clone());
951                }
952            }
953            // Propagate taint from RHS to LHS
954            else if let Some(rhs_var) = Self::extract_variable(&rhs) {
955                if let Some(taint) = current_taint.get(&rhs_var) {
956                    current_taint.insert(lhs.clone(), taint.clone());
957                }
958            }
959
960            // Check for source patterns
961            if Self::is_source_call(&rhs) {
962                current_taint.insert(
963                    lhs.clone(),
964                    TaintState::Tainted {
965                        source_type: "environment".to_string(),
966                        source_location: rhs.clone(),
967                    },
968                );
969
970                source_calls.push(SourceCall {
971                    block_id: block_id.to_string(),
972                    statement: statement.to_string(),
973                    source_function: rhs.clone(),
974                    result_var: lhs.clone(),
975                });
976            }
977
978            // Check for sanitizer patterns
979            if Self::is_sanitizer_call(&rhs) {
980                if let Some(input_var) = Self::extract_variable(&rhs) {
981                    current_taint.insert(
982                        lhs.clone(),
983                        TaintState::Sanitized {
984                            sanitizer: rhs.clone(),
985                        },
986                    );
987
988                    sanitizer_calls.push(SanitizerCall {
989                        block_id: block_id.to_string(),
990                        statement: statement.to_string(),
991                        sanitizer_function: rhs.clone(),
992                        sanitized_var: input_var,
993                    });
994                }
995            }
996        }
997    }
998
999    /// Process a terminator (field-sensitive version)
1000    fn process_terminator_field_sensitive(
1001        &self,
1002        _block_id: &str,
1003        _statements: &[String],
1004        terminator: &Terminator,
1005        _field_map: &mut FieldTaintMap,
1006        _sink_calls: &mut Vec<SinkCall>,
1007        _source_calls: &mut Vec<SourceCall>,
1008        _sanitizer_calls: &mut Vec<SanitizerCall>,
1009        _callee_summaries: Option<&HashMap<String, DataflowSummary>>,
1010        _alias_map: &mut HashMap<String, String>,
1011    ) {
1012        // For Call terminators, we need to look at the preceding statement
1013        // to determine what function is being called and with what arguments
1014        // This is simplified for now - real implementation would parse call syntax
1015
1016        if let Terminator::Call { .. } = terminator {
1017            // Logic for call terminators is currently handled in process_statement_field_sensitive
1018            // which sees the assignment statement corresponding to the call.
1019            // Future improvements could handle calls that are not assignments here.
1020        }
1021    }
1022
1023    /// Process a terminator (mainly for function calls)
1024    fn process_terminator(
1025        &self,
1026        _block_id: &str,
1027        terminator: &Terminator,
1028        _current_taint: &mut HashMap<String, TaintState>,
1029        _sink_calls: &mut Vec<SinkCall>,
1030        _source_calls: &mut Vec<SourceCall>,
1031        _sanitizer_calls: &mut Vec<SanitizerCall>,
1032    ) {
1033        // For Call terminators, we need to look at the preceding statement
1034        // to determine what function is being called and with what arguments
1035        // This is simplified for now - real implementation would parse call syntax
1036
1037        if let Terminator::Call { .. } = terminator {
1038            // Look for sink patterns in the block's statements
1039            // (In real MIR, function calls appear before the Call terminator)
1040            // For now, we'll use a simplified heuristic
1041        }
1042    }
1043
1044    /// Parse an assignment statement
1045    fn parse_assignment(statement: &str) -> Option<(String, String)> {
1046        if let Some(eq_pos) = statement.find(" = ") {
1047            let lhs = statement[..eq_pos].trim();
1048            let rhs = statement[eq_pos + 3..].trim().trim_end_matches(';');
1049            Some((lhs.to_string(), rhs.to_string()))
1050        } else {
1051            None
1052        }
1053    }
1054
1055    /// Extract a variable name from an expression
1056    fn extract_variable(expr: &str) -> Option<String> {
1057        let expr = expr.trim();
1058
1059        // Handle: move _1, copy _2
1060        if expr.starts_with("move ") {
1061            let var = expr[5..]
1062                .trim()
1063                .split(|c: char| !c.is_numeric() && c != '_')
1064                .next()?;
1065            if var.is_empty() {
1066                return None;
1067            }
1068            return Some(var.to_string());
1069        }
1070        if expr.starts_with("copy ") {
1071            let var = expr[5..]
1072                .trim()
1073                .split(|c: char| !c.is_numeric() && c != '_')
1074                .next()?;
1075            if var.is_empty() {
1076                return None;
1077            }
1078            return Some(var.to_string());
1079        }
1080
1081        // Handle: &_3, &mut _4
1082        if expr.starts_with("&mut ") {
1083            let var = expr[5..]
1084                .trim()
1085                .split(|c: char| !c.is_numeric() && c != '_')
1086                .next()?;
1087            if var.is_empty() {
1088                return None;
1089            }
1090            return Some(var.to_string());
1091        }
1092        if expr.starts_with('&') {
1093            let var = expr[1..]
1094                .trim()
1095                .split(|c: char| !c.is_numeric() && c != '_')
1096                .next()?;
1097            if var.is_empty() {
1098                return None;
1099            }
1100            return Some(var.to_string());
1101        }
1102
1103        // Handle function calls: extract first argument
1104        // E.g., "deref(copy _16)" -> "_16"
1105        // E.g., "<String as Deref>::deref(copy _16)" -> "_16"
1106        if expr.contains('(') {
1107            if let Some(start) = expr.find('(') {
1108                if let Some(end) = expr.rfind(')') {
1109                    if start < end {
1110                        let arg = &expr[start + 1..end];
1111                        // Only recurse if it looks like a function call, not a field access
1112                        // Field access usually has ':' inside parens
1113                        if !arg.contains(':') {
1114                            return Self::extract_variable(arg); // Recursive call
1115                        }
1116                    }
1117                }
1118            }
1119        }
1120
1121        // Simple variable: _1, _2, etc.
1122        if expr.starts_with('_') {
1123            if let Some(end) = expr.find(|c: char| !c.is_numeric() && c != '_') {
1124                return Some(expr[..end].to_string());
1125            }
1126            return Some(expr.to_string());
1127        }
1128
1129        None
1130    }
1131
1132    /// Extract environment field access pattern
1133    /// Pattern: deref_copy ((*_1).0: &std::string::String)
1134    /// Returns: Some("((*_1).0)") if pattern matches
1135    fn extract_env_field_access(expr: &str) -> Option<String> {
1136        let expr = expr.trim();
1137
1138        // Look for deref_copy followed by environment field access
1139        if expr.starts_with("deref_copy ") {
1140            // Extract the part inside parentheses after deref_copy
1141            if let Some(start) = expr.find('(') {
1142                if let Some(end) = expr[start..].find(':') {
1143                    let field_expr = &expr[start..start + end].trim();
1144                    // Should be something like "((*_1).0"
1145                    if field_expr.contains("(*_1).") {
1146                        // Extract the full field access including closing paren
1147                        // e.g., "((*_1).0)"
1148                        let field_access = field_expr.to_string() + ")";
1149                        return Some(field_access);
1150                    }
1151                    // Handle async closure pattern: (_1.0
1152                    if field_expr.starts_with("(_1.") {
1153                        // Convert (_1.0 to ((*_1).0)
1154                        // field_expr is "(_1.0"
1155                        if let Ok(idx) = field_expr[4..].parse::<usize>() {
1156                            return Some(format!("((*_1).{})", idx));
1157                        }
1158                    }
1159                }
1160            }
1161        }
1162
1163        None
1164    }
1165
1166    /// Check if an expression is a source call
1167    fn is_source_call(expr: &str) -> bool {
1168        expr.contains("env::args")
1169            || expr.contains("env::var")
1170            || expr.contains("std::env::args")
1171            || expr.contains("std::env::var")
1172            || expr.contains("args()") // Simplified MIR format
1173            || expr.contains("var(") // Simplified MIR format
1174    }
1175
1176    /// Check if an expression is a sanitizer call
1177    ///
1178    /// Recognizes sanitization patterns from:
1179    /// - Common validation functions (validate_input, sanitize, escape, etc.)
1180    /// - Actix-web: web::Json, web::Path, web::Query, web::Form (typed extractors with validation)
1181    /// - Axum: extract::Json, extract::Path, extract::Query (typed extractors)
1182    /// - Rocket: FromForm, FromParam, FromData (validation traits)
1183    /// - Common escaping: html_escape, sql_escape, url_encode, etc.
1184    fn is_sanitizer_call(expr: &str) -> bool {
1185        // Generic sanitization patterns
1186        expr.contains("validate_input")
1187            || expr.contains("sanitize")
1188            || expr.contains("parse::<")
1189            || expr.contains("to_string()")
1190            || expr.contains("validate")
1191            || expr.contains("is_valid")
1192            || expr.contains("is_safe")
1193            || expr.contains("clean")
1194            || expr.contains("filter")
1195            // Actix-web typed extractors (perform validation on deserialization)
1196            || expr.contains("actix_web::web::Json")
1197            || expr.contains("web::Json")
1198            || expr.contains("actix_web::web::Path")
1199            || expr.contains("web::Path")
1200            || expr.contains("actix_web::web::Query")
1201            || expr.contains("web::Query")
1202            || expr.contains("actix_web::web::Form")
1203            || expr.contains("web::Form")
1204            || expr.contains("actix_web::web::Data")
1205            // Axum typed extractors
1206            || expr.contains("axum::extract::Json")
1207            || expr.contains("extract::Json")
1208            || expr.contains("axum::extract::Path")
1209            || expr.contains("extract::Path")
1210            || expr.contains("axum::extract::Query")
1211            || expr.contains("extract::Query")
1212            || expr.contains("axum::extract::Form")
1213            || expr.contains("extract::Form")
1214            || expr.contains("axum::extract::State")
1215            // Rocket validation traits
1216            || expr.contains("rocket::form::FromForm")
1217            || expr.contains("FromForm")
1218            || expr.contains("rocket::request::FromParam")
1219            || expr.contains("FromParam")
1220            || expr.contains("rocket::data::FromData")
1221            || expr.contains("FromData")
1222            || expr.contains("rocket::form::FromFormField")
1223            || expr.contains("FromFormField")
1224            // HTML escaping
1225            || expr.contains("html_escape")
1226            || expr.contains("encode_safe")
1227            || expr.contains("encode_text")
1228            || expr.contains("escape_html")
1229            || expr.contains("askama")  // Askama templates auto-escape
1230            || expr.contains("tera::escape")
1231            || expr.contains("maud")    // Maud templates auto-escape
1232            // SQL escaping / parameterization
1233            || expr.contains("sql_escape")
1234            || expr.contains("escape_string")
1235            || expr.contains("quote_literal")
1236            || expr.contains("bind")    // Parameterized queries
1237            // URL encoding
1238            || expr.contains("url_encode")
1239            || expr.contains("urlencoding")
1240            || expr.contains("percent_encode")
1241            || expr.contains("form_urlencoded")
1242            // Regex validation
1243            || expr.contains("Regex::is_match")
1244            || expr.contains("regex::is_match")
1245            || expr.contains("regex_match")
1246            // Serde deserialization with validation
1247            || expr.contains("serde_valid")
1248            || expr.contains("validator::Validate")
1249            || expr.contains("garde::Validate")
1250    }
1251
1252    /// Convert TaintState to FieldTaint
1253    fn taint_state_to_field_taint(taint: &TaintState) -> FieldTaint {
1254        match taint {
1255            TaintState::Clean => FieldTaint::Clean,
1256            TaintState::Tainted {
1257                source_type,
1258                source_location,
1259            } => FieldTaint::Tainted {
1260                source_type: source_type.clone(),
1261                source_location: source_location.clone(),
1262            },
1263            TaintState::Sanitized { sanitizer } => FieldTaint::Sanitized {
1264                sanitizer: sanitizer.clone(),
1265            },
1266        }
1267    }
1268
1269    /// Convert FieldTaint to TaintState
1270    fn field_taint_to_taint_state(taint: &FieldTaint) -> TaintState {
1271        match taint {
1272            FieldTaint::Clean => TaintState::Clean,
1273            FieldTaint::Tainted {
1274                source_type,
1275                source_location,
1276            } => TaintState::Tainted {
1277                source_type: source_type.clone(),
1278                source_location: source_location.clone(),
1279            },
1280            FieldTaint::Sanitized { sanitizer } => TaintState::Sanitized {
1281                sanitizer: sanitizer.clone(),
1282            },
1283            FieldTaint::Unknown => TaintState::Clean, // Conservative: treat unknown as clean
1284        }
1285    }
1286
1287    /// Check if a variable or field is tainted in the field-sensitive map
1288    fn is_field_tainted(field_map: &FieldTaintMap, var_or_field: &str) -> bool {
1289        use super::field::parser;
1290
1291        // Try to parse as field access first
1292        if parser::contains_field_access(var_or_field) {
1293            if let Some(field_path) = parser::parse_field_access(var_or_field) {
1294                return matches!(
1295                    field_map.get_field_taint(&field_path),
1296                    FieldTaint::Tainted { .. }
1297                );
1298            }
1299        }
1300
1301        // Fall back to whole variable check
1302        if let Some(base_var) = parser::extract_base_var(var_or_field) {
1303            let whole_var_path = FieldPath::whole_var(base_var);
1304            return matches!(
1305                field_map.get_field_taint(&whole_var_path),
1306                FieldTaint::Tainted { .. }
1307            ) || field_map.has_tainted_field(&whole_var_path.base_var);
1308        }
1309
1310        false
1311    }
1312
1313    /// Get taint state for a variable or field from the field-sensitive map
1314    fn get_field_taint_state(field_map: &FieldTaintMap, var_or_field: &str) -> TaintState {
1315        use super::field::parser;
1316
1317        // Strip prefixes like &mut, move, copy, etc. to handle MIR expressions
1318        let mut clean_expr = var_or_field.trim();
1319        loop {
1320            if clean_expr.starts_with("&mut ") {
1321                clean_expr = &clean_expr[5..].trim();
1322            } else if clean_expr.starts_with("move ") {
1323                clean_expr = &clean_expr[5..].trim();
1324            } else if clean_expr.starts_with("copy ") {
1325                clean_expr = &clean_expr[5..].trim();
1326            } else if clean_expr.starts_with("&") {
1327                clean_expr = &clean_expr[1..].trim();
1328            } else if clean_expr.starts_with("deref_copy ") {
1329                clean_expr = &clean_expr[11..].trim();
1330            } else {
1331                break;
1332            }
1333        }
1334
1335        // Try to parse as field access first
1336        if parser::contains_field_access(clean_expr) {
1337            if let Some(field_path) = parser::parse_field_access(clean_expr) {
1338                let field_taint = field_map.get_field_taint(&field_path);
1339                return Self::field_taint_to_taint_state(&field_taint);
1340            }
1341        }
1342
1343        // Fall back to whole variable check - use clean_expr, not var_or_field
1344        if let Some(base_var) = parser::extract_base_var(clean_expr) {
1345            let whole_var_path = FieldPath::whole_var(base_var);
1346            let field_taint = field_map.get_field_taint(&whole_var_path);
1347            return Self::field_taint_to_taint_state(&field_taint);
1348        }
1349
1350        TaintState::Clean
1351    }
1352
1353    /// Set taint for a variable or field in the field-sensitive map
1354    fn set_field_taint_state(
1355        field_map: &mut FieldTaintMap,
1356        var_or_field: &str,
1357        taint: &TaintState,
1358    ) {
1359        use super::field::parser;
1360
1361        let field_taint = Self::taint_state_to_field_taint(taint);
1362
1363        // Strip prefixes like &mut, move, copy, etc. to handle MIR expressions
1364        let mut clean_expr = var_or_field.trim();
1365        loop {
1366            if clean_expr.starts_with("&mut ") {
1367                clean_expr = &clean_expr[5..].trim();
1368            } else if clean_expr.starts_with("move ") {
1369                clean_expr = &clean_expr[5..].trim();
1370            } else if clean_expr.starts_with("copy ") {
1371                clean_expr = &clean_expr[5..].trim();
1372            } else if clean_expr.starts_with("&") {
1373                clean_expr = &clean_expr[1..].trim();
1374            } else if clean_expr.starts_with("deref_copy ") {
1375                clean_expr = &clean_expr[11..].trim();
1376            } else {
1377                break;
1378            }
1379        }
1380
1381        // Try to parse as field access first
1382        if parser::contains_field_access(clean_expr) {
1383            if let Some(field_path) = parser::parse_field_access(clean_expr) {
1384                field_map.set_field_taint(field_path, field_taint);
1385                return;
1386            }
1387        }
1388
1389        // Fall back to whole variable - use clean_expr, not var_or_field
1390        if let Some(base_var) = parser::extract_base_var(clean_expr) {
1391            field_map.set_var_taint(&base_var, field_taint);
1392        }
1393    }
1394}
1395
1396/// Result of path-sensitive analysis
1397#[derive(Debug)]
1398pub struct PathSensitiveResult {
1399    /// Results for each path
1400    pub path_results: Vec<PathAnalysisResult>,
1401
1402    /// True if at least one path is vulnerable
1403    pub has_any_vulnerable_path: bool,
1404
1405    /// Total number of paths analyzed
1406    pub total_paths: usize,
1407}
1408
1409impl PathSensitiveResult {
1410    /// Get vulnerable paths
1411    pub fn vulnerable_paths(&self) -> Vec<&PathAnalysisResult> {
1412        self.path_results
1413            .iter()
1414            .filter(|r| r.has_vulnerable_sink)
1415            .collect()
1416    }
1417
1418    /// Get safe paths
1419    pub fn safe_paths(&self) -> Vec<&PathAnalysisResult> {
1420        self.path_results
1421            .iter()
1422            .filter(|r| !r.has_vulnerable_sink)
1423            .collect()
1424    }
1425}
1426
1427#[cfg(test)]
1428mod tests {
1429    use super::*;
1430
1431    #[test]
1432    fn test_parse_assignment() {
1433        assert_eq!(
1434            PathSensitiveTaintAnalysis::parse_assignment("_1 = move _2;"),
1435            Some(("_1".to_string(), "move _2".to_string()))
1436        );
1437
1438        assert_eq!(
1439            PathSensitiveTaintAnalysis::parse_assignment("_3 = &_1;"),
1440            Some(("_3".to_string(), "&_1".to_string()))
1441        );
1442    }
1443
1444    #[test]
1445    fn test_extract_variable() {
1446        assert_eq!(
1447            PathSensitiveTaintAnalysis::extract_variable("move _1"),
1448            Some("_1".to_string())
1449        );
1450
1451        assert_eq!(
1452            PathSensitiveTaintAnalysis::extract_variable("&_2"),
1453            Some("_2".to_string())
1454        );
1455
1456        assert_eq!(
1457            PathSensitiveTaintAnalysis::extract_variable("&mut _3"),
1458            Some("_3".to_string())
1459        );
1460    }
1461
1462    #[test]
1463    fn test_is_source_call() {
1464        assert!(PathSensitiveTaintAnalysis::is_source_call(
1465            "std::env::args()"
1466        ));
1467        assert!(PathSensitiveTaintAnalysis::is_source_call(
1468            "env::var(\"PATH\")"
1469        ));
1470        assert!(!PathSensitiveTaintAnalysis::is_source_call(
1471            "some_function()"
1472        ));
1473    }
1474
1475    #[test]
1476    fn test_is_sanitizer_call() {
1477        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1478            "validate_input(_1)"
1479        ));
1480        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1481            "parse::<i32>()"
1482        ));
1483        assert!(!PathSensitiveTaintAnalysis::is_sanitizer_call(
1484            "some_function()"
1485        ));
1486    }
1487
1488    #[test]
1489    fn test_actix_web_sanitizers() {
1490        // Actix-web typed extractors perform validation on deserialization
1491        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1492            "actix_web::web::Json::extract"
1493        ));
1494        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1495            "web::Json::from_request"
1496        ));
1497        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1498            "actix_web::web::Path::extract"
1499        ));
1500        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1501            "web::Path::from_request"
1502        ));
1503        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1504            "actix_web::web::Query::extract"
1505        ));
1506        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1507            "web::Query::from_request"
1508        ));
1509        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1510            "actix_web::web::Form::extract"
1511        ));
1512        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1513            "web::Form::from_request"
1514        ));
1515        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1516            "actix_web::web::Data::get"
1517        ));
1518    }
1519
1520    #[test]
1521    fn test_axum_sanitizers() {
1522        // Axum typed extractors
1523        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1524            "axum::extract::Json::from_request"
1525        ));
1526        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1527            "extract::Json::from_request"
1528        ));
1529        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1530            "axum::extract::Path::from_request"
1531        ));
1532        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1533            "extract::Path::from_request"
1534        ));
1535        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1536            "axum::extract::Query::from_request"
1537        ));
1538        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1539            "extract::Query::from_request"
1540        ));
1541        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1542            "axum::extract::Form::from_request"
1543        ));
1544        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1545            "extract::Form::from_request"
1546        ));
1547        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1548            "axum::extract::State::from_request"
1549        ));
1550    }
1551
1552    #[test]
1553    fn test_rocket_sanitizers() {
1554        // Rocket validation traits
1555        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1556            "rocket::form::FromForm::from_form"
1557        ));
1558        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1559            "FromForm::from_form"
1560        ));
1561        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1562            "rocket::request::FromParam::from_param"
1563        ));
1564        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1565            "FromParam::from_param"
1566        ));
1567        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1568            "rocket::data::FromData::from_data"
1569        ));
1570        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1571            "FromData::from_data"
1572        ));
1573        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1574            "rocket::form::FromFormField::from_value"
1575        ));
1576        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1577            "FromFormField::from_value"
1578        ));
1579    }
1580
1581    #[test]
1582    fn test_html_escape_sanitizers() {
1583        // HTML escaping libraries
1584        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1585            "html_escape::encode_safe"
1586        ));
1587        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1588            "encode_safe(&input)"
1589        ));
1590        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1591            "encode_text(&input)"
1592        ));
1593        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1594            "escape_html(&input)"
1595        ));
1596        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1597            "askama::Template::render"
1598        ));
1599        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1600            "tera::escape::escape_html"
1601        ));
1602        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call("maud::html!"));
1603    }
1604
1605    #[test]
1606    fn test_sql_escape_sanitizers() {
1607        // SQL escaping / parameterization
1608        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1609            "sql_escape(&query)"
1610        ));
1611        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1612            "escape_string(&value)"
1613        ));
1614        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1615            "quote_literal(&value)"
1616        ));
1617        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1618            "query.bind(value)"
1619        ));
1620    }
1621
1622    #[test]
1623    fn test_url_encoding_sanitizers() {
1624        // URL encoding
1625        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1626            "url_encode(&path)"
1627        ));
1628        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1629            "urlencoding::encode"
1630        ));
1631        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1632            "percent_encode(&input)"
1633        ));
1634        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1635            "form_urlencoded::serialize"
1636        ));
1637    }
1638
1639    #[test]
1640    fn test_validation_sanitizers() {
1641        // Regex and validation libraries
1642        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1643            "Regex::is_match(&input)"
1644        ));
1645        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1646            "regex::is_match(&input)"
1647        ));
1648        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1649            "regex_match(&pattern, &input)"
1650        ));
1651        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1652            "serde_valid::Validate::validate"
1653        ));
1654        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1655            "validator::Validate::validate"
1656        ));
1657        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1658            "garde::Validate::validate"
1659        ));
1660    }
1661
1662    #[test]
1663    fn test_generic_sanitizers() {
1664        // Generic validation patterns
1665        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1666            "validate(&input)"
1667        ));
1668        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1669            "is_valid(&input)"
1670        ));
1671        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1672            "is_safe(&input)"
1673        ));
1674        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1675            "clean(&input)"
1676        ));
1677        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1678            "filter(&input)"
1679        ));
1680        assert!(PathSensitiveTaintAnalysis::is_sanitizer_call(
1681            "sanitize(&input)"
1682        ));
1683    }
1684
1685    #[test]
1686    fn test_extract_env_field_access() {
1687        // Test closure environment field access
1688        assert_eq!(
1689            PathSensitiveTaintAnalysis::extract_env_field_access(
1690                "deref_copy ((*_1).0: &std::string::String)"
1691            ),
1692            Some("((*_1).0)".to_string())
1693        );
1694
1695        assert_eq!(
1696            PathSensitiveTaintAnalysis::extract_env_field_access("deref_copy ((*_1).1: &i32)"),
1697            Some("((*_1).1)".to_string())
1698        );
1699
1700        // Should not match non-environment patterns
1701        assert_eq!(
1702            PathSensitiveTaintAnalysis::extract_env_field_access("move _1"),
1703            None
1704        );
1705
1706        assert_eq!(
1707            PathSensitiveTaintAnalysis::extract_env_field_access("deref_copy _2"),
1708            None
1709        );
1710    }
1711
1712    #[test]
1713    fn test_field_sensitive_helpers() {
1714        use super::super::field::FieldTaintMap;
1715
1716        let mut field_map = FieldTaintMap::new();
1717
1718        // Test setting and getting field taint
1719        let taint = TaintState::Tainted {
1720            source_type: "test".to_string(),
1721            source_location: "test_source".to_string(),
1722        };
1723
1724        PathSensitiveTaintAnalysis::set_field_taint_state(&mut field_map, "_1", &taint);
1725
1726        assert_eq!(
1727            PathSensitiveTaintAnalysis::get_field_taint_state(&field_map, "_1"),
1728            taint
1729        );
1730
1731        // Test aliasing
1732        PathSensitiveTaintAnalysis::set_field_taint_state(&mut field_map, "_2", &taint);
1733        assert_eq!(
1734            PathSensitiveTaintAnalysis::get_field_taint_state(&field_map, "_2"),
1735            taint
1736        );
1737
1738        // Test field sensitivity
1739        PathSensitiveTaintAnalysis::set_field_taint_state(&mut field_map, "_3.0", &taint);
1740        assert_eq!(
1741            PathSensitiveTaintAnalysis::get_field_taint_state(&field_map, "_3.0"),
1742            taint
1743        );
1744
1745        assert_eq!(
1746            PathSensitiveTaintAnalysis::get_field_taint_state(&field_map, "_3.1"),
1747            TaintState::Clean
1748        );
1749    }
1750}